diff --git a/devel/404.html b/devel/404.html index ddd244a9f..db1437a65 100644 --- a/devel/404.html +++ b/devel/404.html @@ -12,7 +12,7 @@ - + @@ -20,7 +20,7 @@ - + diff --git a/devel/CHANGELOG/index.html b/devel/CHANGELOG/index.html index cf751b9a4..15e6e3f72 100644 --- a/devel/CHANGELOG/index.html +++ b/devel/CHANGELOG/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2726,7 +2726,8 @@

ChangelogUnreleased

@@ -2974,11 +2975,11 @@

0.1.0 - 🎉 first release Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/index.html b/devel/api/pydvl/index.html index 29cbf14b3..30535badd 100644 --- a/devel/api/pydvl/index.html +++ b/devel/api/pydvl/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2502,11 +2502,11 @@

The Python Data Valuation Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/influence/general/index.html b/devel/api/pydvl/influence/general/index.html index 920967e8e..3e325117b 100644 --- a/devel/api/pydvl/influence/general/index.html +++ b/devel/api/pydvl/influence/general/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3887,11 +3887,11 @@

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/influence/index.html b/devel/api/pydvl/influence/index.html index 5ffb8764b..5d012e009 100644 --- a/devel/api/pydvl/influence/index.html +++ b/devel/api/pydvl/influence/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2497,11 +2497,11 @@

Influence

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/influence/inversion/index.html b/devel/api/pydvl/influence/inversion/index.html index 24538e7ea..88483a65f 100644 --- a/devel/api/pydvl/influence/inversion/index.html +++ b/devel/api/pydvl/influence/inversion/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3449,11 +3449,11 @@

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/influence/torch/functional/index.html b/devel/api/pydvl/influence/torch/functional/index.html index 9b5706a35..126065448 100644 --- a/devel/api/pydvl/influence/torch/functional/index.html +++ b/devel/api/pydvl/influence/torch/functional/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3626,11 +3626,11 @@

2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/influence/torch/index.html b/devel/api/pydvl/influence/torch/index.html index 8ee196b4b..061f8be2a 100644 --- a/devel/api/pydvl/influence/torch/index.html +++ b/devel/api/pydvl/influence/torch/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2493,11 +2493,11 @@

Torch

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/influence/torch/torch_differentiable/index.html b/devel/api/pydvl/influence/torch/torch_differentiable/index.html index d6160e05f..0b1cf4ab0 100644 --- a/devel/api/pydvl/influence/torch/torch_differentiable/index.html +++ b/devel/api/pydvl/influence/torch/torch_differentiable/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -6524,11 +6524,11 @@

2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/influence/torch/util/index.html b/devel/api/pydvl/influence/torch/util/index.html index 6ae72ecbc..c8ed64872 100644 --- a/devel/api/pydvl/influence/torch/util/index.html +++ b/devel/api/pydvl/influence/torch/util/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3430,11 +3430,11 @@

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/influence/twice_differentiable/index.html b/devel/api/pydvl/influence/twice_differentiable/index.html index 24b0ddef9..22e9b388c 100644 --- a/devel/api/pydvl/influence/twice_differentiable/index.html +++ b/devel/api/pydvl/influence/twice_differentiable/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -4230,11 +4230,11 @@

2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/parallel/backend/index.html b/devel/api/pydvl/parallel/backend/index.html index 1a0446c6b..25aa23bec 100644 --- a/devel/api/pydvl/parallel/backend/index.html +++ b/devel/api/pydvl/parallel/backend/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3195,11 +3195,11 @@

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/parallel/backends/index.html b/devel/api/pydvl/parallel/backends/index.html index d9fc1b9ca..c47febb67 100644 --- a/devel/api/pydvl/parallel/backends/index.html +++ b/devel/api/pydvl/parallel/backends/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2493,11 +2493,11 @@

Backends

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/parallel/backends/joblib/index.html b/devel/api/pydvl/parallel/backends/joblib/index.html index 2cfca978d..35f70a351 100644 --- a/devel/api/pydvl/parallel/backends/joblib/index.html +++ b/devel/api/pydvl/parallel/backends/joblib/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2753,11 +2753,11 @@

2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/parallel/backends/ray/index.html b/devel/api/pydvl/parallel/backends/ray/index.html index e02235483..0e42b399b 100644 --- a/devel/api/pydvl/parallel/backends/ray/index.html +++ b/devel/api/pydvl/parallel/backends/ray/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2779,11 +2779,11 @@

2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/parallel/config/index.html b/devel/api/pydvl/parallel/config/index.html index c6963ab71..b7d9d5c06 100644 --- a/devel/api/pydvl/parallel/config/index.html +++ b/devel/api/pydvl/parallel/config/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2694,11 +2694,11 @@

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/parallel/futures/index.html b/devel/api/pydvl/parallel/futures/index.html index 017314059..71e58bf12 100644 --- a/devel/api/pydvl/parallel/futures/index.html +++ b/devel/api/pydvl/parallel/futures/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2679,11 +2679,11 @@

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/parallel/futures/ray/index.html b/devel/api/pydvl/parallel/futures/ray/index.html index f0caa2ff7..f79ec6914 100644 --- a/devel/api/pydvl/parallel/futures/ray/index.html +++ b/devel/api/pydvl/parallel/futures/ray/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3213,11 +3213,11 @@

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/parallel/index.html b/devel/api/pydvl/parallel/index.html index ca7d90b9e..775f02031 100644 --- a/devel/api/pydvl/parallel/index.html +++ b/devel/api/pydvl/parallel/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2518,11 +2518,11 @@

Parallel

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/parallel/map_reduce/index.html b/devel/api/pydvl/parallel/map_reduce/index.html index dd224549a..aa187b5e3 100644 --- a/devel/api/pydvl/parallel/map_reduce/index.html +++ b/devel/api/pydvl/parallel/map_reduce/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3032,11 +3032,11 @@

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/reporting/index.html b/devel/api/pydvl/reporting/index.html index 9311a8bf2..f6ca13783 100644 --- a/devel/api/pydvl/reporting/index.html +++ b/devel/api/pydvl/reporting/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2491,11 +2491,11 @@

Reporting

Last update: - 2023-09-18 + 2023-09-20
Created: - 2023-09-18 + 2023-09-20
diff --git a/devel/api/pydvl/reporting/plots/index.html b/devel/api/pydvl/reporting/plots/index.html index a7712c8d8..6464f4a2d 100644 --- a/devel/api/pydvl/reporting/plots/index.html +++ b/devel/api/pydvl/reporting/plots/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -1524,6 +1524,20 @@ shaded_mean_std()
+ + +
  • + + plot_ci_array() + + +
  • + +
  • + + plot_ci_values() + +
  • @@ -2515,6 +2529,20 @@ shaded_mean_std() +
  • + +
  • + + plot_ci_array() + + +
  • + +
  • + + plot_ci_values() + +
  • @@ -2591,7 +2619,13 @@

    -

    The usual mean \(\pm\) std deviation plot to aggregate runs of experiments.

    +

    The usual mean \(\pm\) std deviation plot to aggregate runs of +experiments.

    +
    +

    Deprecation notice

    +

    This function is bogus and will be removed in the future in favour of +properly computed confidence intervals.

    +
    @@ -2807,11 +2841,7 @@

    Source code in src/pydvl/reporting/plots.py -
    12
    -13
    -14
    -15
    -16
    +            
    16
     17
     18
     19
    @@ -2854,54 +2884,702 @@ 

    56 57 58 -59

    def shaded_mean_std(
    -    data: np.ndarray,
    -    abscissa: Optional[Sequence[Any]] = None,
    -    num_std: float = 1.0,
    -    mean_color: Optional[str] = "dodgerblue",
    -    shade_color: Optional[str] = "lightblue",
    -    title: Optional[str] = None,
    -    xlabel: Optional[str] = None,
    -    ylabel: Optional[str] = None,
    -    ax: Optional[Axes] = None,
    -    **kwargs,
    -) -> Axes:
    -    """The usual mean \(\pm\) std deviation plot to aggregate runs of experiments.
    -
    -    Args:
    -        data: axis 0 is to be aggregated on (e.g. runs) and axis 1 is the
    -            data for each run.
    -        abscissa: values for the x-axis. Leave empty to use increasing integers.
    -        num_std: number of standard deviations to shade around the mean.
    -        mean_color: color for the mean
    -        shade_color: color for the shaded region
    -        title: Title text. To use mathematics, use LaTeX notation.
    -        xlabel: Text for the horizontal axis.
    -        ylabel: Text for the vertical axis
    -        ax: If passed, axes object into which to insert the figure. Otherwise,
    -            a new figure is created and returned
    -        kwargs: these are forwarded to the ax.plot() call for the mean.
    -
    -    Returns:
    -        The axes used (or created)
    -    """
    -    assert len(data.shape) == 2
    -    mean = data.mean(axis=0)
    -    std = num_std * data.std(axis=0)
    -
    -    if ax is None:
    -        fig, ax = plt.subplots()
    -    if abscissa is None:
    -        abscissa = list(range(data.shape[1]))
    -
    -    ax.fill_between(abscissa, mean - std, mean + std, alpha=0.3, color=shade_color)
    -    ax.plot(abscissa, mean, color=mean_color, **kwargs)
    -
    -    ax.set_title(title)
    -    ax.set_xlabel(xlabel)
    -    ax.set_ylabel(ylabel)
    -
    -    return ax
    +59
    +60
    +61
    +62
    +63
    +64
    +65
    +66
    +67
    +68
    +69
    @deprecated(target=None, deprecated_in="0.7.1", remove_in="0.9.0")
    +def shaded_mean_std(
    +    data: np.ndarray,
    +    abscissa: Optional[Sequence[Any]] = None,
    +    num_std: float = 1.0,
    +    mean_color: Optional[str] = "dodgerblue",
    +    shade_color: Optional[str] = "lightblue",
    +    title: Optional[str] = None,
    +    xlabel: Optional[str] = None,
    +    ylabel: Optional[str] = None,
    +    ax: Optional[Axes] = None,
    +    **kwargs,
    +) -> Axes:
    +    r"""The usual mean \(\pm\) std deviation plot to aggregate runs of
    +    experiments.
    +
    +    !!! warning "Deprecation notice"
    +        This function is bogus and will be removed in the future in favour of
    +        properly computed confidence intervals.
    +
    +    Args:
    +        data: axis 0 is to be aggregated on (e.g. runs) and axis 1 is the
    +            data for each run.
    +        abscissa: values for the x-axis. Leave empty to use increasing integers.
    +        num_std: number of standard deviations to shade around the mean.
    +        mean_color: color for the mean
    +        shade_color: color for the shaded region
    +        title: Title text. To use mathematics, use LaTeX notation.
    +        xlabel: Text for the horizontal axis.
    +        ylabel: Text for the vertical axis
    +        ax: If passed, axes object into which to insert the figure. Otherwise,
    +            a new figure is created and returned
    +        kwargs: these are forwarded to the ax.plot() call for the mean.
    +
    +    Returns:
    +        The axes used (or created)
    +    """
    +    assert len(data.shape) == 2
    +    mean = data.mean(axis=0)
    +    std = num_std * data.std(axis=0)
    +
    +    if ax is None:
    +        fig, ax = plt.subplots()
    +    if abscissa is None:
    +        abscissa = list(range(data.shape[1]))
    +
    +    ax.fill_between(abscissa, mean - std, mean + std, alpha=0.3, color=shade_color)
    +    ax.plot(abscissa, mean, color=mean_color, **kwargs)
    +
    +    ax.set_title(title)
    +    ax.set_xlabel(xlabel)
    +    ax.set_ylabel(ylabel)
    +
    +    return ax
    +
    + +
    + + + + +
    + + + + +

    + plot_ci_array(data, level, type='normal', abscissa=None, mean_color='dodgerblue', shade_color='lightblue', ax=None, **kwargs) + +

    + + +
    + +

    Plot values and a confidence interval from a 2D array.

    +

    Supported intervals are based on the normal and the t distributions.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    PARAMETER DESCRIPTION
    data +
    +

    A 2D array with M different values for each of the N indices.

    +
    +

    + + TYPE: + NDArray + +

    +
    level +
    +

    The confidence level.

    +
    +

    + + TYPE: + float + +

    +
    type +
    +

    The type of confidence interval to use.

    +
    +

    + + TYPE: + Literal['normal', 't', 'auto'] + + + DEFAULT: + 'normal' + +

    +
    abscissa +
    +

    The values for the x-axis. Leave empty to use increasing +integers.

    +
    +

    + + TYPE: + Optional[Sequence[str]] + + + DEFAULT: + None + +

    +
    mean_color +
    +

    The color of the mean line.

    +
    +

    + + TYPE: + Optional[str] + + + DEFAULT: + 'dodgerblue' + +

    +
    shade_color +
    +

    The color of the confidence interval.

    +
    +

    + + TYPE: + Optional[str] + + + DEFAULT: + 'lightblue' + +

    +
    ax +
    +

    If passed, axes object into which to insert the figure. Otherwise, +a new figure is created and the axes returned.

    +
    +

    + + TYPE: + Optional[Axes] + + + DEFAULT: + None + +

    +
    **kwargs +
    +

    Additional arguments to pass to the plot function.

    +
    +

    + + DEFAULT: + {} + +

    +
    + + + + + + + + + + + + + + + + +
    RETURNSDESCRIPTION
    + + Axes + + +
    +

    The matplotlib axes.

    +
    +
    + +
    + Source code in src/pydvl/reporting/plots.py +
    def plot_ci_array(
    +    data: NDArray,
    +    level: float,
    +    type: Literal["normal", "t", "auto"] = "normal",
    +    abscissa: Optional[Sequence[str]] = None,
    +    mean_color: Optional[str] = "dodgerblue",
    +    shade_color: Optional[str] = "lightblue",
    +    ax: Optional[plt.Axes] = None,
    +    **kwargs,
    +) -> plt.Axes:
    +    """Plot values and a confidence interval from a 2D array.
    +
    +    Supported intervals are based on the normal and the t distributions.
    +
    +    Args:
    +        data: A 2D array with M different values for each of the N indices.
    +        level: The confidence level.
    +        type: The type of confidence interval to use.
    +        abscissa: The values for the x-axis. Leave empty to use increasing
    +            integers.
    +        mean_color: The color of the mean line.
    +        shade_color: The color of the confidence interval.
    +        ax: If passed, axes object into which to insert the figure. Otherwise,
    +            a new figure is created and the axes returned.
    +        **kwargs: Additional arguments to pass to the plot function.
    +
    +    Returns:
    +        The matplotlib axes.
    +    """
    +
    +    m, n = data.shape
    +
    +    means = np.mean(data, axis=0)
    +    variances = np.var(data, axis=0, ddof=1)
    +
    +    dummy: ValuationResult[np.int_, str] = ValuationResult(
    +        algorithm="dummy",
    +        values=means,
    +        variances=variances,
    +        counts=np.ones_like(means, dtype=np.int_) * m,
    +        indices=np.arange(n),
    +        data_names=np.array(abscissa, dtype=str)
    +        if abscissa is not None
    +        else np.arange(n, dtype=str),
    +    )
    +
    +    return plot_ci_values(
    +        dummy,
    +        level=level,
    +        type=type,
    +        mean_color=mean_color,
    +        shade_color=shade_color,
    +        ax=ax,
    +        **kwargs,
    +    )
    +
    +
    +
    + +
    + + +
    + + + + +

    + plot_ci_values(values, level, type='auto', abscissa=None, mean_color='dodgerblue', shade_color='lightblue', ax=None, **kwargs) + +

    + + +
    + +

    Plot values and a confidence interval.

    +

    Uses values.data_names for the x-axis.

    +

    Supported intervals are based on the normal and the t distributions.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    PARAMETER DESCRIPTION
    values +
    +

    The valuation result.

    +
    +

    + + TYPE: + ValuationResult + +

    +
    level +
    +

    The confidence level.

    +
    +

    + + TYPE: + float + +

    +
    type +
    +

    The type of confidence interval to use. If "auto", uses "norm" if +the minimum number of updates for all indices is greater than 30, +otherwise uses "t".

    +
    +

    + + TYPE: + Literal['normal', 't', 'auto'] + + + DEFAULT: + 'auto' + +

    +
    abscissa +
    +

    The values for the x-axis. Leave empty to use increasing +integers.

    +
    +

    + + TYPE: + Optional[Sequence[str]] + + + DEFAULT: + None + +

    +
    mean_color +
    +

    The color of the mean line.

    +
    +

    + + TYPE: + Optional[str] + + + DEFAULT: + 'dodgerblue' + +

    +
    shade_color +
    +

    The color of the confidence interval.

    +
    +

    + + TYPE: + Optional[str] + + + DEFAULT: + 'lightblue' + +

    +
    ax +
    +

    If passed, axes object into which to insert the figure. Otherwise, +a new figure is created and the axes returned.

    +
    +

    + + TYPE: + Optional[Axes] + + + DEFAULT: + None + +

    +
    **kwargs +
    +

    Additional arguments to pass to the plot function.

    +
    +

    + + DEFAULT: + {} + +

    +
    + + + + + + + + + + + + + + + + +
    RETURNSDESCRIPTION
    + +
    +

    The matplotlib axes.

    +
    +
    + +
    + Source code in src/pydvl/reporting/plots.py +
    def plot_ci_values(
    +    values: ValuationResult,
    +    level: float,
    +    type: Literal["normal", "t", "auto"] = "auto",
    +    abscissa: Optional[Sequence[str]] = None,
    +    mean_color: Optional[str] = "dodgerblue",
    +    shade_color: Optional[str] = "lightblue",
    +    ax: Optional[plt.Axes] = None,
    +    **kwargs,
    +):
    +    """Plot values and a confidence interval.
    +
    +    Uses `values.data_names` for the x-axis.
    +
    +    Supported intervals are based on the normal and the t distributions.
    +
    +    Args:
    +        values: The valuation result.
    +        level: The confidence level.
    +        type: The type of confidence interval to use. If "auto", uses "norm" if
    +            the minimum number of updates for all indices is greater than 30,
    +            otherwise uses "t".
    +        abscissa: The values for the x-axis. Leave empty to use increasing
    +            integers.
    +        mean_color: The color of the mean line.
    +        shade_color: The color of the confidence interval.
    +        ax: If passed, axes object into which to insert the figure. Otherwise,
    +            a new figure is created and the axes returned.
    +        **kwargs: Additional arguments to pass to the plot function.
    +
    +    Returns:
    +        The matplotlib axes.
    +    """
    +
    +    ppfs = {
    +        "normal": norm.ppf,
    +        "t": partial(t.ppf, df=values.counts - 1),
    +        "auto": norm.ppf
    +        if np.min(values.counts) > 30
    +        else partial(t.ppf, df=values.counts - 1),
    +    }
    +
    +    try:
    +        score = ppfs[type](1 - level / 2)
    +    except KeyError:
    +        raise ValueError(
    +            f"Unknown confidence interval type requested: {type}."
    +        ) from None
    +
    +    if abscissa is None:
    +        abscissa = [str(i) for i, _ in enumerate(values)]
    +    bound = score * values.stderr
    +
    +    if ax is None:
    +        fig, ax = plt.subplots()
    +
    +    ax.fill_between(
    +        abscissa,
    +        values.values - bound,
    +        values.values + bound,
    +        alpha=0.3,
    +        color=shade_color,
    +    )
    +    ax.plot(abscissa, values.values, color=mean_color, **kwargs)
    +    return ax
     
    @@ -2984,79 +3662,79 @@

    Source code in src/pydvl/reporting/plots.py -
    62
    -63
    -64
    -65
    -66
    -67
    -68
    -69
    -70
    -71
    -72
    -73
    -74
    -75
    -76
    -77
    -78
    -79
    -80
    -81
    -82
    -83
    -84
    -85
    -86
    -87
    -88
    -89
    -90
    -91
    -92
    -93
    -94
    -95
    -96
    -97
    -98
    def spearman_correlation(vv: List[OrderedDict], num_values: int, pvalue: float):
    -    """Simple matrix plots with spearman correlation for each pair in vv.
    -
    -    Args:
    -        vv: list of OrderedDicts with index: value. Spearman correlation
    -            is computed for the keys.
    -        num_values: Use only these many values from the data (from the start
    -            of the OrderedDicts)
    -        pvalue: correlation coefficients for which the p-value is below the
    -            threshold `pvalue/len(vv)` will be discarded.
    -    """
    -    r: np.ndarray = np.ndarray((len(vv), len(vv)))
    -    p: np.ndarray = np.ndarray((len(vv), len(vv)))
    -    for i, a in enumerate(vv):
    -        for j, b in enumerate(vv):
    -            from scipy.stats._stats_py import SpearmanrResult
    -
    -            spearman: SpearmanrResult = sp.stats.spearmanr(
    -                list(a.keys())[:num_values], list(b.keys())[:num_values]
    -            )
    -            r[i][j] = (
    -                spearman.correlation if spearman.pvalue < pvalue / len(vv) else np.nan
    -            )  # Bonferroni correction
    -            p[i][j] = spearman.pvalue
    -    fig, axs = plt.subplots(1, 2, figsize=(16, 7))
    -    plot1 = axs[0].matshow(r, vmin=-1, vmax=1)
    -    axs[0].set_title(f"Spearman correlation (top {num_values} values)")
    -    axs[0].set_xlabel("Runs")
    -    axs[0].set_ylabel("Runs")
    -    fig.colorbar(plot1, ax=axs[0])
    -    plot2 = axs[1].matshow(p, vmin=0, vmax=1)
    -    axs[1].set_title("p-value")
    -    axs[1].set_xlabel("Runs")
    -    axs[1].set_ylabel("Runs")
    -    fig.colorbar(plot2, ax=axs[1])
    -
    -    return fig
    +            
    def spearman_correlation(vv: List[OrderedDict], num_values: int, pvalue: float):
    +    """Simple matrix plots with spearman correlation for each pair in vv.
    +
    +    Args:
    +        vv: list of OrderedDicts with index: value. Spearman correlation
    +            is computed for the keys.
    +        num_values: Use only these many values from the data (from the start
    +            of the OrderedDicts)
    +        pvalue: correlation coefficients for which the p-value is below the
    +            threshold `pvalue/len(vv)` will be discarded.
    +    """
    +    r: np.ndarray = np.ndarray((len(vv), len(vv)))
    +    p: np.ndarray = np.ndarray((len(vv), len(vv)))
    +    for i, a in enumerate(vv):
    +        for j, b in enumerate(vv):
    +            from scipy.stats._stats_py import SpearmanrResult
    +
    +            spearman: SpearmanrResult = sp.stats.spearmanr(
    +                list(a.keys())[:num_values], list(b.keys())[:num_values]
    +            )
    +            r[i][j] = (
    +                spearman.correlation if spearman.pvalue < pvalue / len(vv) else np.nan
    +            )  # Bonferroni correction
    +            p[i][j] = spearman.pvalue
    +    fig, axs = plt.subplots(1, 2, figsize=(16, 7))
    +    plot1 = axs[0].matshow(r, vmin=-1, vmax=1)
    +    axs[0].set_title(f"Spearman correlation (top {num_values} values)")
    +    axs[0].set_xlabel("Runs")
    +    axs[0].set_ylabel("Runs")
    +    fig.colorbar(plot1, ax=axs[0])
    +    plot2 = axs[1].matshow(p, vmin=0, vmax=1)
    +    axs[1].set_title("p-value")
    +    axs[1].set_xlabel("Runs")
    +    axs[1].set_ylabel("Runs")
    +    fig.colorbar(plot2, ax=axs[1])
    +
    +    return fig
     
    @@ -3078,8 +3756,9 @@

    Plots the shapley values, as returned from -compute_shapley_values, with error bars -corresponding to an \(\alpha\)-level confidence interval.

    +compute_shapley_values, +with error bars corresponding to an \(\alpha\)-level Normal confidence +interval.

    @@ -3225,75 +3904,77 @@

    Source code in src/pydvl/reporting/plots.py -
    - + - + @@ -3017,12 +3089,12 @@

    @@ -3031,27 +3103,27 @@

    Source code in src/pydvl/value/oob/oob.py -

    def plot_shapley(
    -    df: pd.DataFrame,
    -    *,
    -    level: float = 0.05,
    -    ax: Optional[plt.Axes] = None,
    -    title: Optional[str] = None,
    -    xlabel: Optional[str] = None,
    -    ylabel: Optional[str] = None,
    -) -> plt.Axes:
    -    r"""Plots the shapley values, as returned from
    -    [compute_shapley_values][pydvl.value.shapley.common.compute_shapley_values], with error bars
    -    corresponding to an $\alpha$-level confidence interval.
    -
    -    Args:
    -        df: dataframe with the shapley values
    -        level: confidence level for the error bars
    -        ax: axes to plot on or None if a new subplots should be created
    -        title: string, title of the plot
    -        xlabel: string, x label of the plot
    -        ylabel: string, y label of the plot
    -
    -    Returns:
    -        The axes created or used
    -    """
    -    if ax is None:
    -        _, ax = plt.subplots()
    -
    -    yerr = norm.ppf(1 - level / 2) * df["data_value_stderr"]
    -
    -    ax.errorbar(x=df.index, y=df["data_value"], yerr=yerr, fmt="o", capsize=6)
    -    ax.set_xlabel(xlabel)
    -    ax.set_ylabel(ylabel)
    -    ax.set_title(title)
    -    plt.xticks(rotation=60)
    -    return ax
    +            
    def plot_shapley(
    +    df: pd.DataFrame,
    +    *,
    +    level: float = 0.05,
    +    ax: Optional[plt.Axes] = None,
    +    title: Optional[str] = None,
    +    xlabel: Optional[str] = None,
    +    ylabel: Optional[str] = None,
    +) -> plt.Axes:
    +    r"""Plots the shapley values, as returned from
    +    [compute_shapley_values][pydvl.value.shapley.common.compute_shapley_values],
    +    with error bars corresponding to an $\alpha$-level Normal confidence
    +    interval.
    +
    +    Args:
    +        df: dataframe with the shapley values
    +        level: confidence level for the error bars
    +        ax: axes to plot on or None if a new subplots should be created
    +        title: string, title of the plot
    +        xlabel: string, x label of the plot
    +        ylabel: string, y label of the plot
    +
    +    Returns:
    +        The axes created or used
    +    """
    +    if ax is None:
    +        _, ax = plt.subplots()
    +
    +    yerr = norm.ppf(1 - level / 2) * df["data_value_stderr"]
    +
    +    ax.errorbar(x=df.index, y=df["data_value"], yerr=yerr, fmt="o", capsize=6)
    +    ax.set_xlabel(xlabel)
    +    ax.set_ylabel(ylabel)
    +    ax.set_title(title)
    +    plt.xticks(rotation=60)
    +    return ax
     
    @@ -3378,45 +4059,45 @@

    Source code in src/pydvl/reporting/plots.py -
    + + + + - +
    def plot_influence_distribution_by_label(
    -    influences: NDArray[np.float_], labels: NDArray[np.float_], title_extra: str = ""
    -):
    -    """Plots the histogram of the influence that all samples in the training set
    -    have over a single sample index, separated by labels.
    -
    -    Args:
    -       influences: array of influences (training samples x test samples)
    -       labels: labels for the training set.
    -       title_extra:
    -    """
    -    _, ax = plt.subplots()
    -    unique_labels = np.unique(labels)
    -    for label in unique_labels:
    -        ax.hist(influences[labels == label], label=label, alpha=0.7)
    -    ax.set_xlabel("Influence values")
    -    ax.set_ylabel("Number of samples")
    -    ax.set_title(f"Distribution of influences " + title_extra)
    -    ax.legend()
    -    plt.show()
    +            
    def plot_influence_distribution_by_label(
    +    influences: NDArray[np.float_], labels: NDArray[np.float_], title_extra: str = ""
    +):
    +    """Plots the histogram of the influence that all samples in the training set
    +    have over a single sample index, separated by labels.
    +
    +    Args:
    +       influences: array of influences (training samples x test samples)
    +       labels: labels for the training set.
    +       title_extra:
    +    """
    +    _, ax = plt.subplots()
    +    unique_labels = np.unique(labels)
    +    for label in unique_labels:
    +        ax.hist(influences[labels == label], label=label, alpha=0.7)
    +    ax.set_xlabel("Influence values")
    +    ax.set_ylabel("Number of samples")
    +    ax.set_title(f"Distribution of influences " + title_extra)
    +    ax.legend()
    +    plt.show()
     
    @@ -3436,11 +4117,11 @@

    2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/reporting/scores/index.html b/devel/api/pydvl/reporting/scores/index.html index 1dbed6494..5aed61488 100644 --- a/devel/api/pydvl/reporting/scores/index.html +++ b/devel/api/pydvl/reporting/scores/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2770,11 +2770,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/utils/caching/index.html b/devel/api/pydvl/utils/caching/index.html index 113de7b86..d98711e0f 100644 --- a/devel/api/pydvl/utils/caching/index.html +++ b/devel/api/pydvl/utils/caching/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3530,11 +3530,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/utils/config/index.html b/devel/api/pydvl/utils/config/index.html index 66c685d36..e71706e71 100644 --- a/devel/api/pydvl/utils/config/index.html +++ b/devel/api/pydvl/utils/config/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3050,11 +3050,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/utils/dataset/index.html b/devel/api/pydvl/utils/dataset/index.html index d1fa0cd36..fec35b6b2 100644 --- a/devel/api/pydvl/utils/dataset/index.html +++ b/devel/api/pydvl/utils/dataset/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -5528,11 +5528,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/utils/functional/index.html b/devel/api/pydvl/utils/functional/index.html index 96cd31706..22624b52a 100644 --- a/devel/api/pydvl/utils/functional/index.html +++ b/devel/api/pydvl/utils/functional/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2888,11 +2888,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/utils/index.html b/devel/api/pydvl/utils/index.html index fd306cd21..3bb4ac525 100644 --- a/devel/api/pydvl/utils/index.html +++ b/devel/api/pydvl/utils/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2491,11 +2491,11 @@

    Utils

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/utils/numeric/index.html b/devel/api/pydvl/utils/numeric/index.html index cd56fe9da..c48c10619 100644 --- a/devel/api/pydvl/utils/numeric/index.html +++ b/devel/api/pydvl/utils/numeric/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -4001,11 +4001,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/utils/parallel/index.html b/devel/api/pydvl/utils/parallel/index.html index 770f81a2e..f3de0eef0 100644 --- a/devel/api/pydvl/utils/parallel/index.html +++ b/devel/api/pydvl/utils/parallel/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2556,11 +2556,11 @@

    This module is deprecat Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/utils/progress/index.html b/devel/api/pydvl/utils/progress/index.html index 686dcfb4f..b68543dee 100644 --- a/devel/api/pydvl/utils/progress/index.html +++ b/devel/api/pydvl/utils/progress/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2737,11 +2737,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/utils/score/index.html b/devel/api/pydvl/utils/score/index.html index 9053e993f..abb11c527 100644 --- a/devel/api/pydvl/utils/score/index.html +++ b/devel/api/pydvl/utils/score/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3058,11 +3058,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/utils/status/index.html b/devel/api/pydvl/utils/status/index.html index 503f47d85..d3a4b901b 100644 --- a/devel/api/pydvl/utils/status/index.html +++ b/devel/api/pydvl/utils/status/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2678,11 +2678,11 @@

    Boolean casting2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/utils/types/index.html b/devel/api/pydvl/utils/types/index.html index f958365ee..13cb47b6d 100644 --- a/devel/api/pydvl/utils/types/index.html +++ b/devel/api/pydvl/utils/types/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3194,11 +3194,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/utils/utility/index.html b/devel/api/pydvl/utils/utility/index.html index 7e9d76080..0db8cf7e6 100644 --- a/devel/api/pydvl/utils/utility/index.html +++ b/devel/api/pydvl/utils/utility/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3589,11 +3589,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/index.html b/devel/api/pydvl/value/index.html index 778983427..2ec8bffe5 100644 --- a/devel/api/pydvl/value/index.html +++ b/devel/api/pydvl/value/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2496,11 +2496,11 @@

    Value

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/least_core/common/index.html b/devel/api/pydvl/value/least_core/common/index.html index f44a5ce8b..dd3af9e59 100644 --- a/devel/api/pydvl/value/least_core/common/index.html +++ b/devel/api/pydvl/value/least_core/common/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3135,11 +3135,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/value/least_core/index.html b/devel/api/pydvl/value/least_core/index.html index 0ed80bc2e..6ab57e6c5 100644 --- a/devel/api/pydvl/value/least_core/index.html +++ b/devel/api/pydvl/value/least_core/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2906,11 +2906,11 @@

    2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/value/least_core/montecarlo/index.html b/devel/api/pydvl/value/least_core/montecarlo/index.html index 10a60def0..30be551e1 100644 --- a/devel/api/pydvl/value/least_core/montecarlo/index.html +++ b/devel/api/pydvl/value/least_core/montecarlo/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3044,11 +3044,11 @@

    2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/value/least_core/naive/index.html b/devel/api/pydvl/value/least_core/naive/index.html index 8463f715d..900a0ce02 100644 --- a/devel/api/pydvl/value/least_core/naive/index.html +++ b/devel/api/pydvl/value/least_core/naive/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2940,11 +2940,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/value/loo/index.html b/devel/api/pydvl/value/loo/index.html index 440cc9cd4..fdd277101 100644 --- a/devel/api/pydvl/value/loo/index.html +++ b/devel/api/pydvl/value/loo/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2493,11 +2493,11 @@

    Loo

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/loo/loo/index.html b/devel/api/pydvl/value/loo/loo/index.html index 941a93571..32ff7d565 100644 --- a/devel/api/pydvl/value/loo/loo/index.html +++ b/devel/api/pydvl/value/loo/loo/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2841,11 +2841,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/loo/naive/index.html b/devel/api/pydvl/value/loo/naive/index.html index c030e6eef..8b1eb1f6b 100644 --- a/devel/api/pydvl/value/loo/naive/index.html +++ b/devel/api/pydvl/value/loo/naive/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2591,11 +2591,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/oob/index.html b/devel/api/pydvl/value/oob/index.html index d15aaaed7..09bfb00d0 100644 --- a/devel/api/pydvl/value/oob/index.html +++ b/devel/api/pydvl/value/oob/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2493,11 +2493,11 @@

    Oob

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/oob/oob/index.html b/devel/api/pydvl/value/oob/oob/index.html index 19b91cb72..053d756ad 100644 --- a/devel/api/pydvl/value/oob/oob/index.html +++ b/devel/api/pydvl/value/oob/oob/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2610,7 +2610,7 @@

    References - compute_data_oob(u, n_est=10, max_samples=0.8, n_jobs=None, loss=None, *, progress=False) + compute_data_oob(u, *, n_est=10, max_samples=0.8, loss=None, n_jobs=None, seed=None, progress=False)

    @@ -2618,10 +2618,15 @@

    Computes Data out of bag values

    -

    This implements the method described in (Kwon and Zou, 2023) 1. -It fits several base estimators provided through u.model through a bagging process. The point value corresponds to the average loss of estimators which were not fit on it.

    -

    \(w_{bj}\in Z\) is the number of times the j-th datum \((x_j, y_j)\) is selected in the b-th bootstrap dataset.

    -
    \[\psi((x_i,y_i),\Theta_B):=\frac{\sum_{b=1}^{B}\mathbb{1}(w_{bi}=0)T(y_i, \hat{f}_b(x_i))}{\sum_{b=1}^{B} +

    This implements the method described in +(Kwon and Zou, 2023)1. +It fits several base estimators provided through u.model through a bagging +process. The point value corresponds to the average loss of estimators which +were not fit on it.

    +

    \(w_{bj}\in Z\) is the number of times the j-th datum \((x_j, y_j)\) is selected +in the b-th bootstrap dataset.

    +
    \[\psi((x_i,y_i),\Theta_B):=\frac{\sum_{b=1}^{B}\mathbb{1}(w_{bi}=0)T(y_i, +\hat{f}_b(x_i))}{\sum_{b=1}^{B} \mathbb{1} (w_{bi}=0)}\]

    With:

    @@ -2629,8 +2634,11 @@

    T: Y \times Y \rightarrow \mathbb{R} \]

    -

    T is a score function that represents the goodness of a weak learner \(\hat{f}_b\) at the i-th datum \((x_i, y_i)\).

    -

    There is a need to tune n_est and max_samples jointly to ensure all samples are at least 1 time oob, otherwise the result could include a nan value for that datum.

    +

    T is a score function that represents the goodness of a weak learner +\(\hat{f}_b\) at the i-th datum \((x_i, y_i)\).

    +

    n_est and max_samples must be tuned jointly to ensure that all samples +are at least 1 time out-of-bag, otherwise the result could include a NaN +value for that datum.

    @@ -2678,7 +2686,8 @@

    max_samples
    -

    The fraction of samples to draw to train each base estimator.

    +

    The fraction of samples to draw to train each base +estimator.

    @@ -2692,6 +2701,25 @@

    loss +
    +

    A function taking as parameters model prediction and corresponding +data labels(preds, y) and returning an array of point-wise errors.

    +
    +

    + + TYPE: + Callable + + + DEFAULT: + None + +

    +
    n_jobs @@ -2712,16 +2740,16 @@

    lossseed
    -

    A function taking as parameters model prediction and corresponding -data labels(preds, y) and returning an array of point-wise errors.

    +

    Either an instance of a numpy random number generator or a seed +for it.

    TYPE: - Callable + Optional[Seed] DEFAULT: @@ -2859,88 +2887,132 @@

    106 107 108 -109

    def compute_data_oob(
    +109
    +110
    +111
    +112
    +113
    +114
    +115
    +116
    +117
    +118
    +119
    +120
    +121
    +122
    +123
    +124
    +125
    +126
    +127
    +128
    +129
    +130
    +131
    def compute_data_oob(
         u: Utility,
    -    n_est: int = 10,
    -    max_samples: float = 0.8,
    -    n_jobs: int = None,
    +    *,
    +    n_est: int = 10,
    +    max_samples: float = 0.8,
         loss: Callable = None,
    -    *,
    -    progress: bool = False,
    -) -> ValuationResult:
    -    r"""Computes Data out of bag values
    -
    -    This implements the method described in (Kwon and Zou, 2023) <sup><a href="kwon_data_2023">1</a></sup>.
    -    It fits several base estimators provided through u.model through a bagging process. The point value corresponds to the average loss of estimators which were not fit on it.
    -
    -    $w_{bj}\in Z$ is the number of times the j-th datum $(x_j, y_j)$ is selected in the b-th bootstrap dataset.
    -
    -    $$\psi((x_i,y_i),\Theta_B):=\frac{\sum_{b=1}^{B}\mathbb{1}(w_{bi}=0)T(y_i, \hat{f}_b(x_i))}{\sum_{b=1}^{B}
    -    \mathbb{1}
    -    (w_{bi}=0)}$$
    -
    -    With:
    -
    -    $$
    -    T: Y \times Y
    -    \rightarrow \mathbb{R}
    -    $$
    -
    -    T is a score function that represents the goodness of a weak learner $\hat{f}_b$ at the i-th datum $(x_i, y_i)$.
    -
    -    There is a need to tune n_est and max_samples jointly to ensure all samples are at least 1 time oob, otherwise the result could include a nan value for that datum.
    -
    -    Args:
    -        u: Utility object with model, data, and scoring function.
    -        n_est: Number of estimator used in the bagging procedure.
    -        max_samples: The fraction of samples to draw to train each base estimator.
    -        n_jobs: The number of jobs to run in parallel used in the bagging
    -            procedure for both fit and predict.
    -        loss: A function taking as parameters model prediction and corresponding
    -            data labels(preds, y) and returning an array of point-wise errors.
    -        progress: If True, display a progress bar.
    -
    -    Returns:
    -        Object with the data values.
    -    """
    -
    -    result: ValuationResult[np.int_, np.object_] = ValuationResult.empty(
    -        algorithm="data_oob", indices=u.data.indices, data_names=u.data.data_names
    -    )
    -
    -    if is_classifier(u.model):
    -        bag = BaggingClassifier(
    -            u.model, n_estimators=n_est, max_samples=max_samples, n_jobs=n_jobs
    -        )
    -        if loss is None:
    -            loss = point_wise_accuracy
    -    elif is_regressor(u.model):
    -        bag = BaggingRegressor(
    -            u.model, n_estimators=n_est, max_samples=max_samples, n_jobs=n_jobs
    -        )
    -        if loss is None:
    -            loss = neg_l2_distance
    -    else:
    -        raise Exception(
    -            "Model has to be a classifier or a regressor in sklearn format."
    -        )
    -
    -    bag.fit(u.data.x_train, u.data.y_train)
    -
    -    for est, samples in maybe_progress(
    -        zip(bag.estimators_, bag.estimators_samples_), progress, total=n_est
    -    ):  # The bottleneck is the bag fitting not this part so TQDM is not very useful here
    -        oob_idx = np.setxor1d(u.data.indices, np.unique(samples))
    -        array_loss = loss(
    -            preds=est.predict(u.data.x_train[oob_idx]), y=u.data.y_train[oob_idx]
    -        )
    -        result += ValuationResult(
    -            algorithm="data_oob",
    -            indices=oob_idx,
    -            values=array_loss,
    -            counts=np.ones_like(array_loss, dtype=u.data.indices.dtype),
    +    n_jobs: int = None,
    +    seed: Optional[Seed] = None,
    +    progress: bool = False,
    +) -> ValuationResult:
    +    r"""Computes Data out of bag values
    +
    +    This implements the method described in
    +    (Kwon and Zou, 2023)<sup><a href="kwon_data_2023">1</a></sup>.
    +    It fits several base estimators provided through u.model through a bagging
    +    process. The point value corresponds to the average loss of estimators which
    +    were not fit on it.
    +
    +    $w_{bj}\in Z$ is the number of times the j-th datum $(x_j, y_j)$ is selected
    +    in the b-th bootstrap dataset.
    +
    +    $$\psi((x_i,y_i),\Theta_B):=\frac{\sum_{b=1}^{B}\mathbb{1}(w_{bi}=0)T(y_i,
    +    \hat{f}_b(x_i))}{\sum_{b=1}^{B}
    +    \mathbb{1}
    +    (w_{bi}=0)}$$
    +
    +    With:
    +
    +    $$
    +    T: Y \times Y
    +    \rightarrow \mathbb{R}
    +    $$
    +
    +    T is a score function that represents the goodness of a weak learner
    +    $\hat{f}_b$ at the i-th datum $(x_i, y_i)$.
    +
    +    `n_est` and `max_samples` must be tuned jointly to ensure that all samples
    +    are at least 1 time out-of-bag, otherwise the result could include a NaN
    +    value for that datum.
    +
    +    Args:
    +        u: Utility object with model, data, and scoring function.
    +        n_est: Number of estimator used in the bagging procedure.
    +        max_samples: The fraction of samples to draw to train each base
    +            estimator.
    +        loss: A function taking as parameters model prediction and corresponding
    +            data labels(preds, y) and returning an array of point-wise errors.
    +        n_jobs: The number of jobs to run in parallel used in the bagging
    +            procedure for both fit and predict.
    +        seed: Either an instance of a numpy random number generator or a seed
    +            for it.
    +        progress: If True, display a progress bar.
    +
    +    Returns:
    +        Object with the data values.
    +    """
    +    rng = np.random.default_rng(seed)
    +    random_state = np.random.RandomState(rng.bit_generator)
    +
    +    result: ValuationResult[np.int_, np.object_] = ValuationResult.empty(
    +        algorithm="data_oob", indices=u.data.indices, data_names=u.data.data_names
    +    )
    +
    +    if is_classifier(u.model):
    +        bag = BaggingClassifier(
    +            u.model,
    +            n_estimators=n_est,
    +            max_samples=max_samples,
    +            n_jobs=n_jobs,
    +            random_state=random_state,
    +        )
    +        if loss is None:
    +            loss = point_wise_accuracy
    +    elif is_regressor(u.model):
    +        bag = BaggingRegressor(
    +            u.model,
    +            n_estimators=n_est,
    +            max_samples=max_samples,
    +            n_jobs=n_jobs,
    +            random_state=random_state,
             )
    -    return result
    +        if loss is None:
    +            loss = neg_l2_distance
    +    else:
    +        raise Exception(
    +            "Model has to be a classifier or a regressor in sklearn format."
    +        )
    +
    +    bag.fit(u.data.x_train, u.data.y_train)
    +
    +    for est, samples in maybe_progress(
    +        zip(bag.estimators_, bag.estimators_samples_), progress, total=n_est
    +    ):  # The bottleneck is the bag fitting not this part so TQDM is not very useful here
    +        oob_idx = np.setxor1d(u.data.indices, np.unique(samples))
    +        array_loss = loss(
    +            y_true=u.data.y_train[oob_idx], y_pred=est.predict(u.data.x_train[oob_idx])
    +        )
    +        result += ValuationResult(
    +            algorithm="data_oob",
    +            indices=oob_idx,
    +            values=array_loss,
    +            counts=np.ones_like(array_loss, dtype=u.data.indices.dtype),
    +        )
    +    return result
     
    @@ -2954,14 +3026,14 @@

    - point_wise_accuracy(preds, y) + point_wise_accuracy(y_true, y_pred)

    -

    Computes point wise accuracy

    +

    Point-wise 0-1 loss between two arrays

    @@ -2974,29 +3046,29 @@

    predsy_true
    -

    Model prediction on

    +

    Array of true values (e.g. labels)

    TYPE: - NDArray + NDArray[T]

    yy_pred
    -

    data labels corresponding to the model predictions

    +

    Array of estimated values (e.g. model predictions)

    TYPE: - NDArray + NDArray[T]

    - NDArray + NDArray[T]
    -

    Array of point wise accuracy

    +

    Array with point-wise 0-1 losses between labels and model predictions

    - + - + + + + + @@ -3142,37 +3226,29 @@

    Source code in src/pydvl/value/oob/oob.py -

    def point_wise_accuracy(preds: NDArray, y: NDArray) -> NDArray:
    -    r"""Computes point wise accuracy
    -
    -    Args:
    -        preds: Model prediction on
    -        y:  data labels corresponding to the model predictions
    -
    -    Returns:
    -        Array of point wise accuracy
    -    """
    -    return np.array(preds == y, dtype=np.int_)
    +            
    def point_wise_accuracy(y_true: NDArray[T], y_pred: NDArray[T]) -> NDArray[T]:
    +    r"""Point-wise 0-1 loss between two arrays
    +
    +    Args:
    +        y_true: Array of true values (e.g. labels)
    +        y_pred: Array of estimated values (e.g. model predictions)
    +
    +    Returns:
    +        Array with point-wise 0-1 losses between labels and model predictions
    +    """
    +    return np.array(y_pred == y_true, dtype=y_pred.dtype)
     
    @@ -3065,14 +3137,14 @@

    - neg_l2_distance(preds, y) + neg_l2_distance(y_true, y_pred)

    -

    Computes negative l2 distance between label and model prediction

    +

    Point-wise negative \(l_2\) distance between two arrays

    @@ -3085,10 +3157,10 @@

    predsy_true
    -

    Model prediction on

    +

    Array of true values (e.g. labels)

    @@ -3099,10 +3171,10 @@

    yy_pred
    -

    data labels corresponding to the model predictions

    +

    Array of estimated values (e.g. model predictions)

    @@ -3133,7 +3205,19 @@

    -

    Array with point wise negative l2 distance between label and model prediction

    +

    Array with point-wise negative \(l_2\) distances between labels and model

    +
    +
    + + NDArray[T] + + +
    +

    predictions

    def neg_l2_distance(preds: NDArray[T], y: NDArray[T]) -> NDArray[T]:
    -    r"""Computes negative l2 distance between label and model prediction
    -
    -    Args:
    -        preds: Model prediction on
    -        y:  data labels corresponding to the model predictions
    -
    -    Returns:
    -        Array with point wise negative l2 distance between label and model prediction
    -    """
    -    return -np.square(
    -        np.array(
    -            preds - y,
    -            dtype=np.float64,
    -        )
    -    )
    +            
    def neg_l2_distance(y_true: NDArray[T], y_pred: NDArray[T]) -> NDArray[T]:
    +    r"""Point-wise negative $l_2$ distance between two arrays
    +
    +    Args:
    +        y_true: Array of true values (e.g. labels)
    +        y_pred: Array of estimated values (e.g. model predictions)
    +
    +    Returns:
    +        Array with point-wise negative $l_2$ distances between labels and model
    +        predictions
    +    """
    +    return -np.square(np.array(y_pred - y_true), dtype=y_pred.dtype)
     
    @@ -3192,11 +3268,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/result/index.html b/devel/api/pydvl/value/result/index.html index 68f94256a..61323bd81 100644 --- a/devel/api/pydvl/value/result/index.html +++ b/devel/api/pydvl/value/result/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -5099,11 +5099,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/sampler/index.html b/devel/api/pydvl/value/sampler/index.html index 79b936c36..69e7ef875 100644 --- a/devel/api/pydvl/value/sampler/index.html +++ b/devel/api/pydvl/value/sampler/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3591,11 +3591,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/semivalues/index.html b/devel/api/pydvl/value/semivalues/index.html index b4f8f4434..b32eb99c9 100644 --- a/devel/api/pydvl/value/semivalues/index.html +++ b/devel/api/pydvl/value/semivalues/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3283,7 +3283,7 @@

    """ from concurrent.futures import FIRST_COMPLETED, Future, wait - from pydvl.utils import effective_n_jobs, init_executor, init_parallel_backend + from pydvl.parallel import effective_n_jobs, init_executor, init_parallel_backend if isinstance(sampler, PermutationSampler) and not u.enable_cache: log.warning( @@ -4791,11 +4791,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/shapley/common/index.html b/devel/api/pydvl/value/shapley/common/index.html index 05389805b..5357fbdaf 100644 --- a/devel/api/pydvl/value/shapley/common/index.html +++ b/devel/api/pydvl/value/shapley/common/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3031,11 +3031,11 @@

    2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/value/shapley/gt/index.html b/devel/api/pydvl/value/shapley/gt/index.html index 67513714d..8d2f98148 100644 --- a/devel/api/pydvl/value/shapley/gt/index.html +++ b/devel/api/pydvl/value/shapley/gt/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3262,11 +3262,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/shapley/index.html b/devel/api/pydvl/value/shapley/index.html index 284d0e2c6..14db5c65f 100644 --- a/devel/api/pydvl/value/shapley/index.html +++ b/devel/api/pydvl/value/shapley/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2501,11 +2501,11 @@

    Shapley

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/shapley/knn/index.html b/devel/api/pydvl/value/shapley/knn/index.html index d16a152f5..299b61aa7 100644 --- a/devel/api/pydvl/value/shapley/knn/index.html +++ b/devel/api/pydvl/value/shapley/knn/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2861,11 +2861,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/shapley/montecarlo/index.html b/devel/api/pydvl/value/shapley/montecarlo/index.html index d32b4a68d..40732e2da 100644 --- a/devel/api/pydvl/value/shapley/montecarlo/index.html +++ b/devel/api/pydvl/value/shapley/montecarlo/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3347,11 +3347,11 @@

    2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/value/shapley/naive/index.html b/devel/api/pydvl/value/shapley/naive/index.html index c093aa14b..0aa094f54 100644 --- a/devel/api/pydvl/value/shapley/naive/index.html +++ b/devel/api/pydvl/value/shapley/naive/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3003,11 +3003,11 @@

    2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/value/shapley/owen/index.html b/devel/api/pydvl/value/shapley/owen/index.html index 982b8b4b5..893fb806a 100644 --- a/devel/api/pydvl/value/shapley/owen/index.html +++ b/devel/api/pydvl/value/shapley/owen/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2988,11 +2988,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/shapley/truncated/index.html b/devel/api/pydvl/value/shapley/truncated/index.html index b58758a6c..8c226610f 100644 --- a/devel/api/pydvl/value/shapley/truncated/index.html +++ b/devel/api/pydvl/value/shapley/truncated/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3524,11 +3524,11 @@

    2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/api/pydvl/value/shapley/types/index.html b/devel/api/pydvl/value/shapley/types/index.html index aec9c2bda..832374107 100644 --- a/devel/api/pydvl/value/shapley/types/index.html +++ b/devel/api/pydvl/value/shapley/types/index.html @@ -16,7 +16,7 @@ - + @@ -24,7 +24,7 @@ - + @@ -2592,11 +2592,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/api/pydvl/value/stopping/index.html b/devel/api/pydvl/value/stopping/index.html index 3225d6e2b..f79a5ee63 100644 --- a/devel/api/pydvl/value/stopping/index.html +++ b/devel/api/pydvl/value/stopping/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3996,11 +3996,11 @@

    Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/assets/stylesheets/main.046329b4.min.css b/devel/assets/stylesheets/main.046329b4.min.css deleted file mode 100644 index 77ac5aae8..000000000 --- a/devel/assets/stylesheets/main.046329b4.min.css +++ /dev/null @@ -1 +0,0 @@ -@charset "UTF-8";html{-webkit-text-size-adjust:none;-moz-text-size-adjust:none;text-size-adjust:none;box-sizing:border-box}*,:after,:before{box-sizing:inherit}@media (prefers-reduced-motion){*,:after,:before{transition:none!important}}body{margin:0}a,button,input,label{-webkit-tap-highlight-color:transparent}a{color:inherit;text-decoration:none}hr{border:0;box-sizing:initial;display:block;height:.05rem;overflow:visible;padding:0}small{font-size:80%}sub,sup{line-height:1em}img{border-style:none}table{border-collapse:initial;border-spacing:0}td,th{font-weight:400;vertical-align:top}button{background:#0000;border:0;font-family:inherit;font-size:inherit;margin:0;padding:0}input{border:0;outline:none}:root{--md-primary-fg-color:#4051b5;--md-primary-fg-color--light:#5d6cc0;--md-primary-fg-color--dark:#303fa1;--md-primary-bg-color:#fff;--md-primary-bg-color--light:#ffffffb3;--md-accent-fg-color:#526cfe;--md-accent-fg-color--transparent:#526cfe1a;--md-accent-bg-color:#fff;--md-accent-bg-color--light:#ffffffb3}[data-md-color-scheme=default]{color-scheme:light}[data-md-color-scheme=default] img[src$="#gh-dark-mode-only"],[data-md-color-scheme=default] img[src$="#only-dark"]{display:none}:root,[data-md-color-scheme=default]{--md-default-fg-color:#000000de;--md-default-fg-color--light:#0000008a;--md-default-fg-color--lighter:#00000052;--md-default-fg-color--lightest:#00000012;--md-default-bg-color:#fff;--md-default-bg-color--light:#ffffffb3;--md-default-bg-color--lighter:#ffffff4d;--md-default-bg-color--lightest:#ffffff1f;--md-code-fg-color:#36464e;--md-code-bg-color:#f5f5f5;--md-code-hl-color:#ffff0080;--md-code-hl-number-color:#d52a2a;--md-code-hl-special-color:#db1457;--md-code-hl-function-color:#a846b9;--md-code-hl-constant-color:#6e59d9;--md-code-hl-keyword-color:#3f6ec6;--md-code-hl-string-color:#1c7d4d;--md-code-hl-name-color:var(--md-code-fg-color);--md-code-hl-operator-color:var(--md-default-fg-color--light);--md-code-hl-punctuation-color:var(--md-default-fg-color--light);--md-code-hl-comment-color:var(--md-default-fg-color--light);--md-code-hl-generic-color:var(--md-default-fg-color--light);--md-code-hl-variable-color:var(--md-default-fg-color--light);--md-typeset-color:var(--md-default-fg-color);--md-typeset-a-color:var(--md-primary-fg-color);--md-typeset-mark-color:#ffff0080;--md-typeset-del-color:#f5503d26;--md-typeset-ins-color:#0bd57026;--md-typeset-kbd-color:#fafafa;--md-typeset-kbd-accent-color:#fff;--md-typeset-kbd-border-color:#b8b8b8;--md-typeset-table-color:#0000001f;--md-typeset-table-color--light:rgba(0,0,0,.035);--md-admonition-fg-color:var(--md-default-fg-color);--md-admonition-bg-color:var(--md-default-bg-color);--md-warning-fg-color:#000000de;--md-warning-bg-color:#ff9;--md-footer-fg-color:#fff;--md-footer-fg-color--light:#ffffffb3;--md-footer-fg-color--lighter:#ffffff73;--md-footer-bg-color:#000000de;--md-footer-bg-color--dark:#00000052;--md-shadow-z1:0 0.2rem 0.5rem #0000000d,0 0 0.05rem #0000001a;--md-shadow-z2:0 0.2rem 0.5rem #0000001a,0 0 0.05rem #00000040;--md-shadow-z3:0 0.2rem 0.5rem #0003,0 0 0.05rem #00000059}.md-icon svg{fill:currentcolor;display:block;height:1.2rem;width:1.2rem}body{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;--md-text-font-family:var(--md-text-font,_),-apple-system,BlinkMacSystemFont,Helvetica,Arial,sans-serif;--md-code-font-family:var(--md-code-font,_),SFMono-Regular,Consolas,Menlo,monospace}aside,body,input{font-feature-settings:"kern","liga";color:var(--md-typeset-color);font-family:var(--md-text-font-family)}code,kbd,pre{font-feature-settings:"kern";font-family:var(--md-code-font-family)}:root{--md-typeset-table-sort-icon:url('data:image/svg+xml;charset=utf-8,');--md-typeset-table-sort-icon--asc:url('data:image/svg+xml;charset=utf-8,');--md-typeset-table-sort-icon--desc:url('data:image/svg+xml;charset=utf-8,')}.md-typeset{-webkit-print-color-adjust:exact;color-adjust:exact;font-size:.8rem;line-height:1.6}@media print{.md-typeset{font-size:.68rem}}.md-typeset blockquote,.md-typeset dl,.md-typeset figure,.md-typeset ol,.md-typeset pre,.md-typeset ul{margin-bottom:1em;margin-top:1em}.md-typeset h1{color:var(--md-default-fg-color--light);font-size:2em;line-height:1.3;margin:0 0 1.25em}.md-typeset h1,.md-typeset h2{font-weight:300;letter-spacing:-.01em}.md-typeset h2{font-size:1.5625em;line-height:1.4;margin:1.6em 0 .64em}.md-typeset h3{font-size:1.25em;font-weight:400;letter-spacing:-.01em;line-height:1.5;margin:1.6em 0 .8em}.md-typeset h2+h3{margin-top:.8em}.md-typeset h4{font-weight:700;letter-spacing:-.01em;margin:1em 0}.md-typeset h5,.md-typeset h6{color:var(--md-default-fg-color--light);font-size:.8em;font-weight:700;letter-spacing:-.01em;margin:1.25em 0}.md-typeset h5{text-transform:uppercase}.md-typeset hr{border-bottom:.05rem solid var(--md-default-fg-color--lightest);display:flow-root;margin:1.5em 0}.md-typeset a{color:var(--md-typeset-a-color);word-break:break-word}.md-typeset a,.md-typeset a:before{transition:color 125ms}.md-typeset a:focus,.md-typeset a:hover{color:var(--md-accent-fg-color)}.md-typeset a:focus code,.md-typeset a:hover code{background-color:var(--md-accent-fg-color--transparent)}.md-typeset a code{color:currentcolor;transition:background-color 125ms}.md-typeset a.focus-visible{outline-color:var(--md-accent-fg-color);outline-offset:.2rem}.md-typeset code,.md-typeset kbd,.md-typeset pre{color:var(--md-code-fg-color);direction:ltr;font-variant-ligatures:none}@media print{.md-typeset code,.md-typeset kbd,.md-typeset pre{white-space:pre-wrap}}.md-typeset code{background-color:var(--md-code-bg-color);border-radius:.1rem;-webkit-box-decoration-break:clone;box-decoration-break:clone;font-size:.85em;padding:0 .2941176471em;word-break:break-word}.md-typeset code:not(.focus-visible){-webkit-tap-highlight-color:transparent;outline:none}.md-typeset pre{display:flow-root;line-height:1.4;position:relative}.md-typeset pre>code{-webkit-box-decoration-break:slice;box-decoration-break:slice;box-shadow:none;display:block;margin:0;outline-color:var(--md-accent-fg-color);overflow:auto;padding:.7720588235em 1.1764705882em;scrollbar-color:var(--md-default-fg-color--lighter) #0000;scrollbar-width:thin;touch-action:auto;word-break:normal}.md-typeset pre>code:hover{scrollbar-color:var(--md-accent-fg-color) #0000}.md-typeset pre>code::-webkit-scrollbar{height:.2rem;width:.2rem}.md-typeset pre>code::-webkit-scrollbar-thumb{background-color:var(--md-default-fg-color--lighter)}.md-typeset pre>code::-webkit-scrollbar-thumb:hover{background-color:var(--md-accent-fg-color)}.md-typeset kbd{background-color:var(--md-typeset-kbd-color);border-radius:.1rem;box-shadow:0 .1rem 0 .05rem var(--md-typeset-kbd-border-color),0 .1rem 0 var(--md-typeset-kbd-border-color),0 -.1rem .2rem var(--md-typeset-kbd-accent-color) inset;color:var(--md-default-fg-color);display:inline-block;font-size:.75em;padding:0 .6666666667em;vertical-align:text-top;word-break:break-word}.md-typeset mark{background-color:var(--md-typeset-mark-color);-webkit-box-decoration-break:clone;box-decoration-break:clone;color:inherit;word-break:break-word}.md-typeset abbr{border-bottom:.05rem dotted var(--md-default-fg-color--light);cursor:help;text-decoration:none}@media (hover:none){.md-typeset abbr[title]:focus:after,.md-typeset abbr[title]:hover:after{background-color:var(--md-default-fg-color);border-radius:.1rem;box-shadow:var(--md-shadow-z3);color:var(--md-default-bg-color);content:attr(title);font-size:.7rem;left:.8rem;margin-top:2em;padding:.2rem .3rem;position:absolute;right:.8rem}}.md-typeset small{opacity:.75}[dir=ltr] .md-typeset sub,[dir=ltr] .md-typeset sup{margin-left:.078125em}[dir=rtl] .md-typeset sub,[dir=rtl] .md-typeset sup{margin-right:.078125em}[dir=ltr] .md-typeset blockquote{padding-left:.6rem}[dir=rtl] .md-typeset blockquote{padding-right:.6rem}[dir=ltr] .md-typeset blockquote{border-left:.2rem solid var(--md-default-fg-color--lighter)}[dir=rtl] .md-typeset blockquote{border-right:.2rem solid var(--md-default-fg-color--lighter)}.md-typeset blockquote{color:var(--md-default-fg-color--light);margin-left:0;margin-right:0}.md-typeset ul{list-style-type:disc}[dir=ltr] .md-typeset ol,[dir=ltr] .md-typeset ul{margin-left:.625em}[dir=rtl] .md-typeset ol,[dir=rtl] .md-typeset ul{margin-right:.625em}.md-typeset ol,.md-typeset ul{padding:0}.md-typeset ol:not([hidden]),.md-typeset ul:not([hidden]){display:flow-root}.md-typeset ol ol,.md-typeset ul ol{list-style-type:lower-alpha}.md-typeset ol ol ol,.md-typeset ul ol ol{list-style-type:lower-roman}[dir=ltr] .md-typeset ol li,[dir=ltr] .md-typeset ul li{margin-left:1.25em}[dir=rtl] .md-typeset ol li,[dir=rtl] .md-typeset ul li{margin-right:1.25em}.md-typeset ol li,.md-typeset ul li{margin-bottom:.5em}.md-typeset ol li blockquote,.md-typeset ol li p,.md-typeset ul li blockquote,.md-typeset ul li p{margin:.5em 0}.md-typeset ol li:last-child,.md-typeset ul li:last-child{margin-bottom:0}[dir=ltr] .md-typeset ol li ol,[dir=ltr] .md-typeset ol li ul,[dir=ltr] .md-typeset ul li ol,[dir=ltr] .md-typeset ul li ul{margin-left:.625em}[dir=rtl] .md-typeset ol li ol,[dir=rtl] .md-typeset ol li ul,[dir=rtl] .md-typeset ul li ol,[dir=rtl] .md-typeset ul li ul{margin-right:.625em}.md-typeset ol li ol,.md-typeset ol li ul,.md-typeset ul li ol,.md-typeset ul li ul{margin-bottom:.5em;margin-top:.5em}[dir=ltr] .md-typeset dd{margin-left:1.875em}[dir=rtl] .md-typeset dd{margin-right:1.875em}.md-typeset dd{margin-bottom:1.5em;margin-top:1em}.md-typeset img,.md-typeset svg,.md-typeset video{height:auto;max-width:100%}.md-typeset img[align=left]{margin:1em 1em 1em 0}.md-typeset img[align=right]{margin:1em 0 1em 1em}.md-typeset img[align]:only-child{margin-top:0}.md-typeset figure{display:flow-root;margin:1em auto;max-width:100%;text-align:center;width:-webkit-fit-content;width:-moz-fit-content;width:fit-content}.md-typeset figure img{display:block}.md-typeset figcaption{font-style:italic;margin:1em auto;max-width:24rem}.md-typeset iframe{max-width:100%}.md-typeset table:not([class]){background-color:var(--md-default-bg-color);border:.05rem solid var(--md-typeset-table-color);border-radius:.1rem;display:inline-block;font-size:.64rem;max-width:100%;overflow:auto;touch-action:auto}@media print{.md-typeset table:not([class]){display:table}}.md-typeset table:not([class])+*{margin-top:1.5em}.md-typeset table:not([class]) td>:first-child,.md-typeset table:not([class]) th>:first-child{margin-top:0}.md-typeset table:not([class]) td>:last-child,.md-typeset table:not([class]) th>:last-child{margin-bottom:0}.md-typeset table:not([class]) td:not([align]),.md-typeset table:not([class]) th:not([align]){text-align:left}[dir=rtl] .md-typeset table:not([class]) td:not([align]),[dir=rtl] .md-typeset table:not([class]) th:not([align]){text-align:right}.md-typeset table:not([class]) th{font-weight:700;min-width:5rem;padding:.9375em 1.25em;vertical-align:top}.md-typeset table:not([class]) td{border-top:.05rem solid var(--md-typeset-table-color);padding:.9375em 1.25em;vertical-align:top}.md-typeset table:not([class]) tbody tr{transition:background-color 125ms}.md-typeset table:not([class]) tbody tr:hover{background-color:var(--md-typeset-table-color--light);box-shadow:0 .05rem 0 var(--md-default-bg-color) inset}.md-typeset table:not([class]) a{word-break:normal}.md-typeset table th[role=columnheader]{cursor:pointer}[dir=ltr] .md-typeset table th[role=columnheader]:after{margin-left:.5em}[dir=rtl] .md-typeset table th[role=columnheader]:after{margin-right:.5em}.md-typeset table th[role=columnheader]:after{content:"";display:inline-block;height:1.2em;-webkit-mask-image:var(--md-typeset-table-sort-icon);mask-image:var(--md-typeset-table-sort-icon);-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background-color 125ms;vertical-align:text-bottom;width:1.2em}.md-typeset table th[role=columnheader]:hover:after{background-color:var(--md-default-fg-color--lighter)}.md-typeset table th[role=columnheader][aria-sort=ascending]:after{background-color:var(--md-default-fg-color--light);-webkit-mask-image:var(--md-typeset-table-sort-icon--asc);mask-image:var(--md-typeset-table-sort-icon--asc)}.md-typeset table th[role=columnheader][aria-sort=descending]:after{background-color:var(--md-default-fg-color--light);-webkit-mask-image:var(--md-typeset-table-sort-icon--desc);mask-image:var(--md-typeset-table-sort-icon--desc)}.md-typeset__scrollwrap{margin:1em -.8rem;overflow-x:auto;touch-action:auto}.md-typeset__table{display:inline-block;margin-bottom:.5em;padding:0 .8rem}@media print{.md-typeset__table{display:block}}html .md-typeset__table table{display:table;margin:0;overflow:hidden;width:100%}@media screen and (max-width:44.9375em){.md-content__inner>pre{margin:1em -.8rem}.md-content__inner>pre code{border-radius:0}}.md-typeset .md-author{display:block;flex-shrink:0;height:1.6rem;overflow:hidden;position:relative;transition:color 125ms,transform 125ms;width:1.6rem}.md-typeset .md-author img{border-radius:100%;display:block}.md-typeset .md-author--more{background:var(--md-default-fg-color--lightest);color:var(--md-default-fg-color--lighter);font-size:.6rem;font-weight:700;line-height:1.6rem;text-align:center}.md-typeset .md-author--long{height:2.4rem;width:2.4rem}.md-typeset a.md-author{transform:scale(1)}.md-typeset a.md-author img{filter:grayscale(100%) opacity(75%);transition:filter 125ms}.md-typeset a.md-author:focus,.md-typeset a.md-author:hover{transform:scale(1.1);z-index:1}.md-typeset a.md-author:focus img,.md-typeset a.md-author:hover img{filter:grayscale(0)}.md-banner{background-color:var(--md-footer-bg-color);color:var(--md-footer-fg-color);overflow:auto}@media print{.md-banner{display:none}}.md-banner--warning{background-color:var(--md-warning-bg-color);color:var(--md-warning-fg-color)}.md-banner__inner{font-size:.7rem;margin:.6rem auto;padding:0 .8rem}[dir=ltr] .md-banner__button{float:right}[dir=rtl] .md-banner__button{float:left}.md-banner__button{color:inherit;cursor:pointer;transition:opacity .25s}.no-js .md-banner__button{display:none}.md-banner__button:hover{opacity:.7}html{font-size:125%;height:100%;overflow-x:hidden}@media screen and (min-width:100em){html{font-size:137.5%}}@media screen and (min-width:125em){html{font-size:150%}}body{background-color:var(--md-default-bg-color);display:flex;flex-direction:column;font-size:.5rem;min-height:100%;position:relative;width:100%}@media print{body{display:block}}@media screen and (max-width:59.9375em){body[data-md-scrolllock]{position:fixed}}.md-grid{margin-left:auto;margin-right:auto;max-width:61rem}.md-container{display:flex;flex-direction:column;flex-grow:1}@media print{.md-container{display:block}}.md-main{flex-grow:1}.md-main__inner{display:flex;height:100%;margin-top:1.5rem}.md-ellipsis{overflow:hidden;text-overflow:ellipsis}.md-toggle{display:none}.md-option{height:0;opacity:0;position:absolute;width:0}.md-option:checked+label:not([hidden]){display:block}.md-option.focus-visible+label{outline-color:var(--md-accent-fg-color);outline-style:auto}.md-skip{background-color:var(--md-default-fg-color);border-radius:.1rem;color:var(--md-default-bg-color);font-size:.64rem;margin:.5rem;opacity:0;outline-color:var(--md-accent-fg-color);padding:.3rem .5rem;position:fixed;transform:translateY(.4rem);z-index:-1}.md-skip:focus{opacity:1;transform:translateY(0);transition:transform .25s cubic-bezier(.4,0,.2,1),opacity 175ms 75ms;z-index:10}@page{margin:25mm}:root{--md-clipboard-icon:url('data:image/svg+xml;charset=utf-8,')}.md-clipboard{border-radius:.1rem;color:var(--md-default-fg-color--lightest);cursor:pointer;height:1.5em;outline-color:var(--md-accent-fg-color);outline-offset:.1rem;position:absolute;right:.5em;top:.5em;transition:color .25s;width:1.5em;z-index:1}@media print{.md-clipboard{display:none}}.md-clipboard:not(.focus-visible){-webkit-tap-highlight-color:transparent;outline:none}:hover>.md-clipboard{color:var(--md-default-fg-color--light)}.md-clipboard:focus,.md-clipboard:hover{color:var(--md-accent-fg-color)}.md-clipboard:after{background-color:currentcolor;content:"";display:block;height:1.125em;margin:0 auto;-webkit-mask-image:var(--md-clipboard-icon);mask-image:var(--md-clipboard-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:1.125em}.md-clipboard--inline{cursor:pointer}.md-clipboard--inline code{transition:color .25s,background-color .25s}.md-clipboard--inline:focus code,.md-clipboard--inline:hover code{background-color:var(--md-accent-fg-color--transparent);color:var(--md-accent-fg-color)}@keyframes consent{0%{opacity:0;transform:translateY(100%)}to{opacity:1;transform:translateY(0)}}@keyframes overlay{0%{opacity:0}to{opacity:1}}.md-consent__overlay{animation:overlay .25s both;-webkit-backdrop-filter:blur(.1rem);backdrop-filter:blur(.1rem);background-color:#0000008a;height:100%;opacity:1;position:fixed;top:0;width:100%;z-index:5}.md-consent__inner{animation:consent .5s cubic-bezier(.1,.7,.1,1) both;background-color:var(--md-default-bg-color);border:0;border-radius:.1rem;bottom:0;box-shadow:0 0 .2rem #0000001a,0 .2rem .4rem #0003;max-height:100%;overflow:auto;padding:0;position:fixed;width:100%;z-index:5}.md-consent__form{padding:.8rem}.md-consent__settings{display:none;margin:1em 0}input:checked+.md-consent__settings{display:block}.md-consent__controls{margin-bottom:.8rem}.md-typeset .md-consent__controls .md-button{display:inline}@media screen and (max-width:44.9375em){.md-typeset .md-consent__controls .md-button{display:block;margin-top:.4rem;text-align:center;width:100%}}.md-consent label{cursor:pointer}.md-content{flex-grow:1;min-width:0}.md-content__inner{margin:0 .8rem 1.2rem;padding-top:.6rem}@media screen and (min-width:76.25em){[dir=ltr] .md-sidebar--primary:not([hidden])~.md-content>.md-content__inner{margin-left:1.2rem}[dir=ltr] .md-sidebar--secondary:not([hidden])~.md-content>.md-content__inner,[dir=rtl] .md-sidebar--primary:not([hidden])~.md-content>.md-content__inner{margin-right:1.2rem}[dir=rtl] .md-sidebar--secondary:not([hidden])~.md-content>.md-content__inner{margin-left:1.2rem}}.md-content__inner:before{content:"";display:block;height:.4rem}.md-content__inner>:last-child{margin-bottom:0}[dir=ltr] .md-content__button{float:right}[dir=rtl] .md-content__button{float:left}[dir=ltr] .md-content__button{margin-left:.4rem}[dir=rtl] .md-content__button{margin-right:.4rem}.md-content__button{margin:.4rem 0;padding:0}@media print{.md-content__button{display:none}}.md-typeset .md-content__button{color:var(--md-default-fg-color--lighter)}.md-content__button svg{display:inline;vertical-align:top}[dir=rtl] .md-content__button svg{transform:scaleX(-1)}[dir=ltr] .md-dialog{right:.8rem}[dir=rtl] .md-dialog{left:.8rem}.md-dialog{background-color:var(--md-default-fg-color);border-radius:.1rem;bottom:.8rem;box-shadow:var(--md-shadow-z3);min-width:11.1rem;opacity:0;padding:.4rem .6rem;pointer-events:none;position:fixed;transform:translateY(100%);transition:transform 0ms .4s,opacity .4s;z-index:4}@media print{.md-dialog{display:none}}.md-dialog--active{opacity:1;pointer-events:auto;transform:translateY(0);transition:transform .4s cubic-bezier(.075,.85,.175,1),opacity .4s}.md-dialog__inner{color:var(--md-default-bg-color);font-size:.7rem}.md-feedback{margin:2em 0 1em;text-align:center}.md-feedback fieldset{border:none;margin:0;padding:0}.md-feedback__title{font-weight:700;margin:1em auto}.md-feedback__inner{position:relative}.md-feedback__list{align-content:baseline;display:flex;flex-wrap:wrap;justify-content:center;position:relative}.md-feedback__list:hover .md-icon:not(:disabled){color:var(--md-default-fg-color--lighter)}:disabled .md-feedback__list{min-height:1.8rem}.md-feedback__icon{color:var(--md-default-fg-color--light);cursor:pointer;flex-shrink:0;margin:0 .1rem;transition:color 125ms}.md-feedback__icon:not(:disabled).md-icon:hover{color:var(--md-accent-fg-color)}.md-feedback__icon:disabled{color:var(--md-default-fg-color--lightest);pointer-events:none}.md-feedback__note{opacity:0;position:relative;transform:translateY(.4rem);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s}.md-feedback__note>*{margin:0 auto;max-width:16rem}:disabled .md-feedback__note{opacity:1;transform:translateY(0)}.md-footer{background-color:var(--md-footer-bg-color);color:var(--md-footer-fg-color)}@media print{.md-footer{display:none}}.md-footer__inner{justify-content:space-between;overflow:auto;padding:.2rem}.md-footer__inner:not([hidden]){display:flex}.md-footer__link{align-items:end;display:flex;flex-grow:0.01;margin-bottom:.4rem;margin-top:1rem;max-width:100%;outline-color:var(--md-accent-fg-color);overflow:hidden;transition:opacity .25s}.md-footer__link:focus,.md-footer__link:hover{opacity:.7}[dir=rtl] .md-footer__link svg{transform:scaleX(-1)}@media screen and (max-width:44.9375em){.md-footer__link--prev{flex-shrink:0}.md-footer__link--prev .md-footer__title{display:none}}[dir=ltr] .md-footer__link--next{margin-left:auto}[dir=rtl] .md-footer__link--next{margin-right:auto}.md-footer__link--next{text-align:right}[dir=rtl] .md-footer__link--next{text-align:left}.md-footer__title{flex-grow:1;font-size:.9rem;margin-bottom:.7rem;max-width:calc(100% - 2.4rem);padding:0 1rem;white-space:nowrap}.md-footer__button{margin:.2rem;padding:.4rem}.md-footer__direction{font-size:.64rem;opacity:.7}.md-footer-meta{background-color:var(--md-footer-bg-color--dark)}.md-footer-meta__inner{display:flex;flex-wrap:wrap;justify-content:space-between;padding:.2rem}html .md-footer-meta.md-typeset a{color:var(--md-footer-fg-color--light)}html .md-footer-meta.md-typeset a:focus,html .md-footer-meta.md-typeset a:hover{color:var(--md-footer-fg-color)}.md-copyright{color:var(--md-footer-fg-color--lighter);font-size:.64rem;margin:auto .6rem;padding:.4rem 0;width:100%}@media screen and (min-width:45em){.md-copyright{width:auto}}.md-copyright__highlight{color:var(--md-footer-fg-color--light)}.md-social{display:inline-flex;gap:.2rem;margin:0 .4rem;padding:.2rem 0 .6rem}@media screen and (min-width:45em){.md-social{padding:.6rem 0}}.md-social__link{display:inline-block;height:1.6rem;text-align:center;width:1.6rem}.md-social__link:before{line-height:1.9}.md-social__link svg{fill:currentcolor;max-height:.8rem;vertical-align:-25%}.md-typeset .md-button{border:.1rem solid;border-radius:.1rem;color:var(--md-primary-fg-color);cursor:pointer;display:inline-block;font-weight:700;padding:.625em 2em;transition:color 125ms,background-color 125ms,border-color 125ms}.md-typeset .md-button--primary{background-color:var(--md-primary-fg-color);border-color:var(--md-primary-fg-color);color:var(--md-primary-bg-color)}.md-typeset .md-button:focus,.md-typeset .md-button:hover{background-color:var(--md-accent-fg-color);border-color:var(--md-accent-fg-color);color:var(--md-accent-bg-color)}[dir=ltr] .md-typeset .md-input{border-top-left-radius:.1rem}[dir=ltr] .md-typeset .md-input,[dir=rtl] .md-typeset .md-input{border-top-right-radius:.1rem}[dir=rtl] .md-typeset .md-input{border-top-left-radius:.1rem}.md-typeset .md-input{border-bottom:.1rem solid var(--md-default-fg-color--lighter);box-shadow:var(--md-shadow-z1);font-size:.8rem;height:1.8rem;padding:0 .6rem;transition:border .25s,box-shadow .25s}.md-typeset .md-input:focus,.md-typeset .md-input:hover{border-bottom-color:var(--md-accent-fg-color);box-shadow:var(--md-shadow-z2)}.md-typeset .md-input--stretch{width:100%}.md-header{background-color:var(--md-primary-fg-color);box-shadow:0 0 .2rem #0000,0 .2rem .4rem #0000;color:var(--md-primary-bg-color);display:block;left:0;position:sticky;right:0;top:0;z-index:4}@media print{.md-header{display:none}}.md-header[hidden]{transform:translateY(-100%);transition:transform .25s cubic-bezier(.8,0,.6,1),box-shadow .25s}.md-header--shadow{box-shadow:0 0 .2rem #0000001a,0 .2rem .4rem #0003;transition:transform .25s cubic-bezier(.1,.7,.1,1),box-shadow .25s}.md-header__inner{align-items:center;display:flex;padding:0 .2rem}.md-header__button{color:currentcolor;cursor:pointer;margin:.2rem;outline-color:var(--md-accent-fg-color);padding:.4rem;position:relative;transition:opacity .25s;vertical-align:middle;z-index:1}.md-header__button:hover{opacity:.7}.md-header__button:not([hidden]){display:inline-block}.md-header__button:not(.focus-visible){-webkit-tap-highlight-color:transparent;outline:none}.md-header__button.md-logo{margin:.2rem;padding:.4rem}@media screen and (max-width:76.1875em){.md-header__button.md-logo{display:none}}.md-header__button.md-logo img,.md-header__button.md-logo svg{fill:currentcolor;display:block;height:1.2rem;width:auto}@media screen and (min-width:60em){.md-header__button[for=__search]{display:none}}.no-js .md-header__button[for=__search]{display:none}[dir=rtl] .md-header__button[for=__search] svg{transform:scaleX(-1)}@media screen and (min-width:76.25em){.md-header__button[for=__drawer]{display:none}}.md-header__topic{display:flex;max-width:100%;position:absolute;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s;white-space:nowrap}.md-header__topic+.md-header__topic{opacity:0;pointer-events:none;transform:translateX(1.25rem);transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s;z-index:-1}[dir=rtl] .md-header__topic+.md-header__topic{transform:translateX(-1.25rem)}.md-header__topic:first-child{font-weight:700}[dir=ltr] .md-header__title{margin-left:1rem}[dir=rtl] .md-header__title{margin-right:1rem}[dir=ltr] .md-header__title{margin-right:.4rem}[dir=rtl] .md-header__title{margin-left:.4rem}.md-header__title{flex-grow:1;font-size:.9rem;height:2.4rem;line-height:2.4rem}.md-header__title--active .md-header__topic{opacity:0;pointer-events:none;transform:translateX(-1.25rem);transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s;z-index:-1}[dir=rtl] .md-header__title--active .md-header__topic{transform:translateX(1.25rem)}.md-header__title--active .md-header__topic+.md-header__topic{opacity:1;pointer-events:auto;transform:translateX(0);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s;z-index:0}.md-header__title>.md-header__ellipsis{height:100%;position:relative;width:100%}.md-header__option{display:flex;flex-shrink:0;max-width:100%;transition:max-width 0ms .25s,opacity .25s .25s;white-space:nowrap}[data-md-toggle=search]:checked~.md-header .md-header__option{max-width:0;opacity:0;transition:max-width 0ms,opacity 0ms}.md-header__option>input{bottom:0}.md-header__source{display:none}@media screen and (min-width:60em){[dir=ltr] .md-header__source{margin-left:1rem}[dir=rtl] .md-header__source{margin-right:1rem}.md-header__source{display:block;max-width:11.7rem;width:11.7rem}}@media screen and (min-width:76.25em){[dir=ltr] .md-header__source{margin-left:1.4rem}[dir=rtl] .md-header__source{margin-right:1.4rem}}.md-meta{color:var(--md-default-fg-color--light);font-size:.7rem;line-height:1.3}.md-meta__list{display:inline-flex;flex-wrap:wrap;list-style:none;margin:0;padding:0}.md-meta__item:not(:last-child):after{content:"·";margin-left:.2rem;margin-right:.2rem}.md-meta__link{color:var(--md-typeset-a-color)}.md-meta__link:focus,.md-meta__link:hover{color:var(--md-accent-fg-color)}.md-draft{background-color:#ff1744;border-radius:.125em;color:#fff;display:inline-block;font-weight:700;padding-left:.5714285714em;padding-right:.5714285714em}:root{--md-nav-icon--prev:url('data:image/svg+xml;charset=utf-8,');--md-nav-icon--next:url('data:image/svg+xml;charset=utf-8,');--md-toc-icon:url('data:image/svg+xml;charset=utf-8,')}.md-nav{font-size:.7rem;line-height:1.3}.md-nav__title{color:var(--md-default-fg-color--light);display:block;font-weight:700;overflow:hidden;padding:0 .6rem;text-overflow:ellipsis}.md-nav__title .md-nav__button{display:none}.md-nav__title .md-nav__button img{height:100%;width:auto}.md-nav__title .md-nav__button.md-logo img,.md-nav__title .md-nav__button.md-logo svg{fill:currentcolor;display:block;height:2.4rem;max-width:100%;object-fit:contain;width:auto}.md-nav__list{list-style:none;margin:0;padding:0}.md-nav__item{padding:0 .6rem}[dir=ltr] .md-nav__item .md-nav__item{padding-right:0}[dir=rtl] .md-nav__item .md-nav__item{padding-left:0}.md-nav__link{align-items:flex-start;display:flex;margin-top:.625em;scroll-snap-align:start;transition:color 125ms}.md-nav__link--passed{color:var(--md-default-fg-color--light)}.md-nav__item .md-nav__link--active,.md-nav__item .md-nav__link--active code{color:var(--md-typeset-a-color)}.md-nav__link .md-ellipsis{position:relative}.md-nav__link .md-icon:last-child{margin-left:auto}.md-nav__link svg{fill:currentcolor;flex-shrink:0;height:1.3em}[dir=ltr] .md-nav__link svg+*{margin-left:.4rem}[dir=rtl] .md-nav__link svg+*{margin-right:.4rem}.md-nav__link:not(.md-nav__container):focus,.md-nav__link:not(.md-nav__container):hover{color:var(--md-accent-fg-color);cursor:pointer}.md-nav__link.focus-visible{outline-color:var(--md-accent-fg-color);outline-offset:.2rem}.md-nav--primary .md-nav__link[for=__toc]{display:none}.md-nav--primary .md-nav__link[for=__toc] .md-icon:after{background-color:currentcolor;display:block;height:100%;-webkit-mask-image:var(--md-toc-icon);mask-image:var(--md-toc-icon);width:100%}.md-nav--primary .md-nav__link[for=__toc]~.md-nav{display:none}.md-nav__container>.md-nav__link{margin-top:0}.md-nav__container>.md-nav__link:first-child{flex-grow:1}.md-nav__icon{flex-shrink:0}.md-nav__source{display:none}@media screen and (max-width:76.1875em){.md-nav--primary,.md-nav--primary .md-nav{background-color:var(--md-default-bg-color);display:flex;flex-direction:column;height:100%;left:0;position:absolute;right:0;top:0;z-index:1}.md-nav--primary .md-nav__item,.md-nav--primary .md-nav__title{font-size:.8rem;line-height:1.5}.md-nav--primary .md-nav__title{background-color:var(--md-default-fg-color--lightest);color:var(--md-default-fg-color--light);cursor:pointer;height:5.6rem;line-height:2.4rem;padding:3rem .8rem .2rem;position:relative;white-space:nowrap}[dir=ltr] .md-nav--primary .md-nav__title .md-nav__icon{left:.4rem}[dir=rtl] .md-nav--primary .md-nav__title .md-nav__icon{right:.4rem}.md-nav--primary .md-nav__title .md-nav__icon{display:block;height:1.2rem;margin:.2rem;position:absolute;top:.4rem;width:1.2rem}.md-nav--primary .md-nav__title .md-nav__icon:after{background-color:currentcolor;content:"";display:block;height:100%;-webkit-mask-image:var(--md-nav-icon--prev);mask-image:var(--md-nav-icon--prev);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:100%}.md-nav--primary .md-nav__title~.md-nav__list{background-color:var(--md-default-bg-color);box-shadow:0 .05rem 0 var(--md-default-fg-color--lightest) inset;overflow-y:auto;scroll-snap-type:y mandatory;touch-action:pan-y}.md-nav--primary .md-nav__title~.md-nav__list>:first-child{border-top:0}.md-nav--primary .md-nav__title[for=__drawer]{background-color:var(--md-primary-fg-color);color:var(--md-primary-bg-color);font-weight:700}.md-nav--primary .md-nav__title .md-logo{display:block;left:.2rem;margin:.2rem;padding:.4rem;position:absolute;right:.2rem;top:.2rem}.md-nav--primary .md-nav__list{flex:1}.md-nav--primary .md-nav__item{border-top:.05rem solid var(--md-default-fg-color--lightest);padding:0}.md-nav--primary .md-nav__item--active>.md-nav__link{color:var(--md-typeset-a-color)}.md-nav--primary .md-nav__item--active>.md-nav__link:focus,.md-nav--primary .md-nav__item--active>.md-nav__link:hover{color:var(--md-accent-fg-color)}.md-nav--primary .md-nav__link{margin-top:0;padding:.6rem .8rem}.md-nav--primary .md-nav__link svg{margin-top:.1em}.md-nav--primary .md-nav__link>.md-nav__link{padding:0}[dir=ltr] .md-nav--primary .md-nav__link .md-nav__icon{margin-right:-.2rem}[dir=rtl] .md-nav--primary .md-nav__link .md-nav__icon{margin-left:-.2rem}.md-nav--primary .md-nav__link .md-nav__icon{font-size:1.2rem;height:1.2rem;width:1.2rem}.md-nav--primary .md-nav__link .md-nav__icon:after{background-color:currentcolor;content:"";display:block;height:100%;-webkit-mask-image:var(--md-nav-icon--next);mask-image:var(--md-nav-icon--next);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:100%}[dir=rtl] .md-nav--primary .md-nav__icon:after{transform:scale(-1)}.md-nav--primary .md-nav--secondary .md-nav{background-color:initial;position:static}[dir=ltr] .md-nav--primary .md-nav--secondary .md-nav .md-nav__link{padding-left:1.4rem}[dir=rtl] .md-nav--primary .md-nav--secondary .md-nav .md-nav__link{padding-right:1.4rem}[dir=ltr] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav__link{padding-left:2rem}[dir=rtl] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav__link{padding-right:2rem}[dir=ltr] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav__link{padding-left:2.6rem}[dir=rtl] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav__link{padding-right:2.6rem}[dir=ltr] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav .md-nav__link{padding-left:3.2rem}[dir=rtl] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav .md-nav__link{padding-right:3.2rem}.md-nav--secondary{background-color:initial}.md-nav__toggle~.md-nav{display:flex;opacity:0;transform:translateX(100%);transition:transform .25s cubic-bezier(.8,0,.6,1),opacity 125ms 50ms}[dir=rtl] .md-nav__toggle~.md-nav{transform:translateX(-100%)}.md-nav__toggle:checked~.md-nav{opacity:1;transform:translateX(0);transition:transform .25s cubic-bezier(.4,0,.2,1),opacity 125ms 125ms}.md-nav__toggle:checked~.md-nav>.md-nav__list{-webkit-backface-visibility:hidden;backface-visibility:hidden}}@media screen and (max-width:59.9375em){.md-nav--primary .md-nav__link[for=__toc]{display:flex}.md-nav--primary .md-nav__link[for=__toc] .md-icon:after{content:""}.md-nav--primary .md-nav__link[for=__toc]+.md-nav__link{display:none}.md-nav--primary .md-nav__link[for=__toc]~.md-nav{display:flex}.md-nav__source{background-color:var(--md-primary-fg-color--dark);color:var(--md-primary-bg-color);display:block;padding:0 .2rem}}@media screen and (min-width:60em) and (max-width:76.1875em){.md-nav--integrated .md-nav__link[for=__toc]{display:flex}.md-nav--integrated .md-nav__link[for=__toc] .md-icon:after{content:""}.md-nav--integrated .md-nav__link[for=__toc]+.md-nav__link{display:none}.md-nav--integrated .md-nav__link[for=__toc]~.md-nav{display:flex}}@media screen and (min-width:60em){.md-nav--secondary .md-nav__title{background:var(--md-default-bg-color);box-shadow:0 0 .4rem .4rem var(--md-default-bg-color);position:sticky;top:0;z-index:1}.md-nav--secondary .md-nav__title[for=__toc]{scroll-snap-align:start}.md-nav--secondary .md-nav__title .md-nav__icon{display:none}}@media screen and (min-width:76.25em){.md-nav{transition:max-height .25s cubic-bezier(.86,0,.07,1)}.md-nav--primary .md-nav__title{background:var(--md-default-bg-color);box-shadow:0 0 .4rem .4rem var(--md-default-bg-color);position:sticky;top:0;z-index:1}.md-nav--primary .md-nav__title[for=__drawer]{scroll-snap-align:start}.md-nav--primary .md-nav__title .md-nav__icon,.md-nav__toggle~.md-nav{display:none}.md-nav__toggle:checked~.md-nav,.md-nav__toggle:indeterminate~.md-nav{display:block}.md-nav__item--nested>.md-nav>.md-nav__title{display:none}.md-nav__item--section{display:block;margin:1.25em 0}.md-nav__item--section:last-child{margin-bottom:0}.md-nav__item--section>.md-nav__link{font-weight:700}.md-nav__item--section>.md-nav__link[for]{color:var(--md-default-fg-color--light)}.md-nav__item--section>.md-nav__link:not(.md-nav__container){pointer-events:none}.md-nav__item--section>.md-nav__link .md-nav__icon{display:none}.md-nav__item--section>.md-nav{display:block}.md-nav__item--section>.md-nav>.md-nav__list>.md-nav__item{padding:0}.md-nav__icon{border-radius:100%;height:.9rem;transition:background-color .25s;width:.9rem}.md-nav__icon:hover{background-color:var(--md-accent-fg-color--transparent)}.md-nav__icon:after{background-color:currentcolor;border-radius:100%;content:"";display:inline-block;height:100%;-webkit-mask-image:var(--md-nav-icon--next);mask-image:var(--md-nav-icon--next);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:transform .25s;vertical-align:-.1rem;width:100%}[dir=rtl] .md-nav__icon:after{transform:rotate(180deg)}.md-nav__item--nested .md-nav__toggle:checked~.md-nav__link .md-nav__icon:after,.md-nav__item--nested .md-nav__toggle:indeterminate~.md-nav__link .md-nav__icon:after{transform:rotate(90deg)}.md-nav--lifted>.md-nav__list>.md-nav__item,.md-nav--lifted>.md-nav__list>.md-nav__item--nested,.md-nav--lifted>.md-nav__title{display:none}.md-nav--lifted>.md-nav__list>.md-nav__item--active{display:block;padding:0}.md-nav--lifted>.md-nav__list>.md-nav__item--active>.md-nav__link{background:var(--md-default-bg-color);box-shadow:0 0 .4rem .4rem var(--md-default-bg-color);font-weight:700;margin-top:0;padding:0 .6rem;position:sticky;top:0;z-index:1}.md-nav--lifted>.md-nav__list>.md-nav__item--active>.md-nav__link:not(.md-nav__container){pointer-events:none}.md-nav--lifted>.md-nav__list>.md-nav__item--active>.md-nav__link .md-nav__icon{display:none}.md-nav--lifted>.md-nav__list>.md-nav__item>[for]{color:var(--md-default-fg-color--light)}.md-nav--lifted .md-nav[data-md-level="1"]{display:block}[dir=ltr] .md-nav--lifted .md-nav[data-md-level="1"]>.md-nav__list>.md-nav__item{padding-right:.6rem}[dir=rtl] .md-nav--lifted .md-nav[data-md-level="1"]>.md-nav__list>.md-nav__item{padding-left:.6rem}.md-nav--integrated>.md-nav__list>.md-nav__item--active:not(.md-nav__item--nested){padding:0 .6rem}.md-nav--integrated>.md-nav__list>.md-nav__item--active:not(.md-nav__item--nested)>.md-nav__link{padding:0}[dir=ltr] .md-nav--integrated>.md-nav__list>.md-nav__item--active .md-nav--secondary{border-left:.05rem solid var(--md-primary-fg-color)}[dir=rtl] .md-nav--integrated>.md-nav__list>.md-nav__item--active .md-nav--secondary{border-right:.05rem solid var(--md-primary-fg-color)}.md-nav--integrated>.md-nav__list>.md-nav__item--active .md-nav--secondary{display:block;margin-bottom:1.25em}.md-nav--integrated>.md-nav__list>.md-nav__item--active .md-nav--secondary>.md-nav__title{display:none}}.md-pagination{font-size:.8rem;font-weight:700;gap:.4rem}.md-pagination,.md-pagination>*{align-items:center;display:flex;justify-content:center}.md-pagination>*{border-radius:.2rem;height:1.8rem;min-width:1.8rem;text-align:center}.md-pagination__current{background-color:var(--md-default-fg-color--lightest);color:var(--md-default-fg-color--light)}.md-pagination__link{transition:color 125ms,background-color 125ms}.md-pagination__link:focus,.md-pagination__link:hover{background-color:var(--md-accent-fg-color--transparent);color:var(--md-accent-fg-color)}.md-pagination__link:focus svg,.md-pagination__link:hover svg{color:var(--md-accent-fg-color)}.md-pagination__link.focus-visible{outline-color:var(--md-accent-fg-color);outline-offset:.2rem}.md-pagination__link svg{fill:currentcolor;color:var(--md-default-fg-color--lighter);display:block;max-height:100%;width:1.2rem}.md-post__back{border-bottom:.05rem solid var(--md-default-fg-color--lightest);margin-bottom:1.2rem;padding-bottom:1.2rem}@media screen and (max-width:76.1875em){.md-post__back{display:none}}[dir=rtl] .md-post__back svg{transform:scaleX(-1)}.md-post__authors{display:flex;flex-direction:column;gap:.6rem;margin:0 .6rem}.md-post .md-post__meta a{transition:color 125ms}.md-post .md-post__meta a:focus,.md-post .md-post__meta a:hover{color:var(--md-accent-fg-color)}.md-post--excerpt{margin-bottom:3.2rem}.md-post--excerpt .md-post__header{align-items:center;display:flex;gap:.6rem;min-height:1.6rem}.md-post--excerpt .md-post__authors{align-items:center;display:inline-flex;flex-direction:row;gap:.2rem;margin:0;min-height:2.4rem}[dir=ltr] .md-post--excerpt .md-post__meta .md-meta__list{margin-right:.4rem}[dir=rtl] .md-post--excerpt .md-post__meta .md-meta__list{margin-left:.4rem}.md-post--excerpt .md-post__content>:first-child{--md-scroll-margin:6rem;margin-top:0}.md-post>.md-nav--secondary,.md-post>.md-nav:first-child>.md-nav__list{margin:1em 0}.md-profile{align-items:center;display:flex;font-size:.7rem;gap:.6rem;line-height:1.4;width:100%}.md-profile__description{flex-grow:1}.md-content--post{display:flex}@media screen and (max-width:76.1875em){.md-content--post{flex-flow:column-reverse}}.md-content--post>.md-content__inner{min-width:0}@media screen and (min-width:76.25em){[dir=ltr] .md-content--post>.md-content__inner{margin-left:1.2rem}[dir=rtl] .md-content--post>.md-content__inner{margin-right:1.2rem}}@media screen and (max-width:76.1875em){.md-sidebar.md-sidebar--post{padding:0}}:root{--md-search-result-icon:url('data:image/svg+xml;charset=utf-8,')}.md-search{position:relative}@media screen and (min-width:60em){.md-search{padding:.2rem 0}}.no-js .md-search{display:none}.md-search__overlay{opacity:0;z-index:1}@media screen and (max-width:59.9375em){[dir=ltr] .md-search__overlay{left:-2.2rem}[dir=rtl] .md-search__overlay{right:-2.2rem}.md-search__overlay{background-color:var(--md-default-bg-color);border-radius:1rem;height:2rem;overflow:hidden;pointer-events:none;position:absolute;top:-1rem;transform-origin:center;transition:transform .3s .1s,opacity .2s .2s;width:2rem}[data-md-toggle=search]:checked~.md-header .md-search__overlay{opacity:1;transition:transform .4s,opacity .1s}}@media screen and (min-width:60em){[dir=ltr] .md-search__overlay{left:0}[dir=rtl] .md-search__overlay{right:0}.md-search__overlay{background-color:#0000008a;cursor:pointer;height:0;position:fixed;top:0;transition:width 0ms .25s,height 0ms .25s,opacity .25s;width:0}[data-md-toggle=search]:checked~.md-header .md-search__overlay{height:200vh;opacity:1;transition:width 0ms,height 0ms,opacity .25s;width:100%}}@media screen and (max-width:29.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{transform:scale(45)}}@media screen and (min-width:30em) and (max-width:44.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{transform:scale(60)}}@media screen and (min-width:45em) and (max-width:59.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{transform:scale(75)}}.md-search__inner{-webkit-backface-visibility:hidden;backface-visibility:hidden}@media screen and (max-width:59.9375em){[dir=ltr] .md-search__inner{left:0}[dir=rtl] .md-search__inner{right:0}.md-search__inner{height:0;opacity:0;overflow:hidden;position:fixed;top:0;transform:translateX(5%);transition:width 0ms .3s,height 0ms .3s,transform .15s cubic-bezier(.4,0,.2,1) .15s,opacity .15s .15s;width:0;z-index:2}[dir=rtl] .md-search__inner{transform:translateX(-5%)}[data-md-toggle=search]:checked~.md-header .md-search__inner{height:100%;opacity:1;transform:translateX(0);transition:width 0ms 0ms,height 0ms 0ms,transform .15s cubic-bezier(.1,.7,.1,1) .15s,opacity .15s .15s;width:100%}}@media screen and (min-width:60em){[dir=ltr] .md-search__inner{float:right}[dir=rtl] .md-search__inner{float:left}.md-search__inner{padding:.1rem 0;position:relative;transition:width .25s cubic-bezier(.1,.7,.1,1);width:11.7rem}}@media screen and (min-width:60em) and (max-width:76.1875em){[data-md-toggle=search]:checked~.md-header .md-search__inner{width:23.4rem}}@media screen and (min-width:76.25em){[data-md-toggle=search]:checked~.md-header .md-search__inner{width:34.4rem}}.md-search__form{background-color:var(--md-default-bg-color);box-shadow:0 0 .6rem #0000;height:2.4rem;position:relative;transition:color .25s,background-color .25s;z-index:2}@media screen and (min-width:60em){.md-search__form{background-color:#00000042;border-radius:.1rem;height:1.8rem}.md-search__form:hover{background-color:#ffffff1f}}[data-md-toggle=search]:checked~.md-header .md-search__form{background-color:var(--md-default-bg-color);border-radius:.1rem .1rem 0 0;box-shadow:0 0 .6rem #00000012;color:var(--md-default-fg-color)}[dir=ltr] .md-search__input{padding-left:3.6rem;padding-right:2.2rem}[dir=rtl] .md-search__input{padding-left:2.2rem;padding-right:3.6rem}.md-search__input{background:#0000;font-size:.9rem;height:100%;position:relative;text-overflow:ellipsis;width:100%;z-index:2}.md-search__input::placeholder{transition:color .25s}.md-search__input::placeholder,.md-search__input~.md-search__icon{color:var(--md-default-fg-color--light)}.md-search__input::-ms-clear{display:none}@media screen and (max-width:59.9375em){.md-search__input{font-size:.9rem;height:2.4rem;width:100%}}@media screen and (min-width:60em){[dir=ltr] .md-search__input{padding-left:2.2rem}[dir=rtl] .md-search__input{padding-right:2.2rem}.md-search__input{color:inherit;font-size:.8rem}.md-search__input::placeholder{color:var(--md-primary-bg-color--light)}.md-search__input+.md-search__icon{color:var(--md-primary-bg-color)}[data-md-toggle=search]:checked~.md-header .md-search__input{text-overflow:clip}[data-md-toggle=search]:checked~.md-header .md-search__input+.md-search__icon{color:var(--md-default-fg-color--light)}[data-md-toggle=search]:checked~.md-header .md-search__input::placeholder{color:#0000}}.md-search__icon{cursor:pointer;display:inline-block;height:1.2rem;transition:color .25s,opacity .25s;width:1.2rem}.md-search__icon:hover{opacity:.7}[dir=ltr] .md-search__icon[for=__search]{left:.5rem}[dir=rtl] .md-search__icon[for=__search]{right:.5rem}.md-search__icon[for=__search]{position:absolute;top:.3rem;z-index:2}[dir=rtl] .md-search__icon[for=__search] svg{transform:scaleX(-1)}@media screen and (max-width:59.9375em){[dir=ltr] .md-search__icon[for=__search]{left:.8rem}[dir=rtl] .md-search__icon[for=__search]{right:.8rem}.md-search__icon[for=__search]{top:.6rem}.md-search__icon[for=__search] svg:first-child{display:none}}@media screen and (min-width:60em){.md-search__icon[for=__search]{pointer-events:none}.md-search__icon[for=__search] svg:last-child{display:none}}[dir=ltr] .md-search__options{right:.5rem}[dir=rtl] .md-search__options{left:.5rem}.md-search__options{pointer-events:none;position:absolute;top:.3rem;z-index:2}@media screen and (max-width:59.9375em){[dir=ltr] .md-search__options{right:.8rem}[dir=rtl] .md-search__options{left:.8rem}.md-search__options{top:.6rem}}[dir=ltr] .md-search__options>.md-icon{margin-left:.2rem}[dir=rtl] .md-search__options>.md-icon{margin-right:.2rem}.md-search__options>.md-icon{color:var(--md-default-fg-color--light);opacity:0;transform:scale(.75);transition:transform .15s cubic-bezier(.1,.7,.1,1),opacity .15s}.md-search__options>.md-icon:not(.focus-visible){-webkit-tap-highlight-color:transparent;outline:none}[data-md-toggle=search]:checked~.md-header .md-search__input:valid~.md-search__options>.md-icon{opacity:1;pointer-events:auto;transform:scale(1)}[data-md-toggle=search]:checked~.md-header .md-search__input:valid~.md-search__options>.md-icon:hover{opacity:.7}[dir=ltr] .md-search__suggest{padding-left:3.6rem;padding-right:2.2rem}[dir=rtl] .md-search__suggest{padding-left:2.2rem;padding-right:3.6rem}.md-search__suggest{align-items:center;color:var(--md-default-fg-color--lighter);display:flex;font-size:.9rem;height:100%;opacity:0;position:absolute;top:0;transition:opacity 50ms;white-space:nowrap;width:100%}@media screen and (min-width:60em){[dir=ltr] .md-search__suggest{padding-left:2.2rem}[dir=rtl] .md-search__suggest{padding-right:2.2rem}.md-search__suggest{font-size:.8rem}}[data-md-toggle=search]:checked~.md-header .md-search__suggest{opacity:1;transition:opacity .3s .1s}[dir=ltr] .md-search__output{border-bottom-left-radius:.1rem}[dir=ltr] .md-search__output,[dir=rtl] .md-search__output{border-bottom-right-radius:.1rem}[dir=rtl] .md-search__output{border-bottom-left-radius:.1rem}.md-search__output{overflow:hidden;position:absolute;width:100%;z-index:1}@media screen and (max-width:59.9375em){.md-search__output{bottom:0;top:2.4rem}}@media screen and (min-width:60em){.md-search__output{opacity:0;top:1.9rem;transition:opacity .4s}[data-md-toggle=search]:checked~.md-header .md-search__output{box-shadow:var(--md-shadow-z3);opacity:1}}.md-search__scrollwrap{-webkit-backface-visibility:hidden;backface-visibility:hidden;background-color:var(--md-default-bg-color);height:100%;overflow-y:auto;touch-action:pan-y}@media (-webkit-max-device-pixel-ratio:1),(max-resolution:1dppx){.md-search__scrollwrap{transform:translateZ(0)}}@media screen and (min-width:60em) and (max-width:76.1875em){.md-search__scrollwrap{width:23.4rem}}@media screen and (min-width:76.25em){.md-search__scrollwrap{width:34.4rem}}@media screen and (min-width:60em){.md-search__scrollwrap{max-height:0;scrollbar-color:var(--md-default-fg-color--lighter) #0000;scrollbar-width:thin}[data-md-toggle=search]:checked~.md-header .md-search__scrollwrap{max-height:75vh}.md-search__scrollwrap:hover{scrollbar-color:var(--md-accent-fg-color) #0000}.md-search__scrollwrap::-webkit-scrollbar{height:.2rem;width:.2rem}.md-search__scrollwrap::-webkit-scrollbar-thumb{background-color:var(--md-default-fg-color--lighter)}.md-search__scrollwrap::-webkit-scrollbar-thumb:hover{background-color:var(--md-accent-fg-color)}}.md-search-result{color:var(--md-default-fg-color);word-break:break-word}.md-search-result__meta{background-color:var(--md-default-fg-color--lightest);color:var(--md-default-fg-color--light);font-size:.64rem;line-height:1.8rem;padding:0 .8rem;scroll-snap-align:start}@media screen and (min-width:60em){[dir=ltr] .md-search-result__meta{padding-left:2.2rem}[dir=rtl] .md-search-result__meta{padding-right:2.2rem}}.md-search-result__list{list-style:none;margin:0;padding:0;-webkit-user-select:none;user-select:none}.md-search-result__item{box-shadow:0 -.05rem var(--md-default-fg-color--lightest)}.md-search-result__item:first-child{box-shadow:none}.md-search-result__link{display:block;outline:none;scroll-snap-align:start;transition:background-color .25s}.md-search-result__link:focus,.md-search-result__link:hover{background-color:var(--md-accent-fg-color--transparent)}.md-search-result__link:last-child p:last-child{margin-bottom:.6rem}.md-search-result__more>summary{cursor:pointer;display:block;outline:none;position:sticky;scroll-snap-align:start;top:0;z-index:1}.md-search-result__more>summary::marker{display:none}.md-search-result__more>summary::-webkit-details-marker{display:none}.md-search-result__more>summary>div{color:var(--md-typeset-a-color);font-size:.64rem;padding:.75em .8rem;transition:color .25s,background-color .25s}@media screen and (min-width:60em){[dir=ltr] .md-search-result__more>summary>div{padding-left:2.2rem}[dir=rtl] .md-search-result__more>summary>div{padding-right:2.2rem}}.md-search-result__more>summary:focus>div,.md-search-result__more>summary:hover>div{background-color:var(--md-accent-fg-color--transparent);color:var(--md-accent-fg-color)}.md-search-result__more[open]>summary{background-color:var(--md-default-bg-color)}.md-search-result__article{overflow:hidden;padding:0 .8rem;position:relative}@media screen and (min-width:60em){[dir=ltr] .md-search-result__article{padding-left:2.2rem}[dir=rtl] .md-search-result__article{padding-right:2.2rem}}[dir=ltr] .md-search-result__icon{left:0}[dir=rtl] .md-search-result__icon{right:0}.md-search-result__icon{color:var(--md-default-fg-color--light);height:1.2rem;margin:.5rem;position:absolute;width:1.2rem}@media screen and (max-width:59.9375em){.md-search-result__icon{display:none}}.md-search-result__icon:after{background-color:currentcolor;content:"";display:inline-block;height:100%;-webkit-mask-image:var(--md-search-result-icon);mask-image:var(--md-search-result-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:100%}[dir=rtl] .md-search-result__icon:after{transform:scaleX(-1)}.md-search-result .md-typeset{color:var(--md-default-fg-color--light);font-size:.64rem;line-height:1.6}.md-search-result .md-typeset h1{color:var(--md-default-fg-color);font-size:.8rem;font-weight:400;line-height:1.4;margin:.55rem 0}.md-search-result .md-typeset h1 mark{text-decoration:none}.md-search-result .md-typeset h2{color:var(--md-default-fg-color);font-size:.64rem;font-weight:700;line-height:1.6;margin:.5em 0}.md-search-result .md-typeset h2 mark{text-decoration:none}.md-search-result__terms{color:var(--md-default-fg-color);display:block;font-size:.64rem;font-style:italic;margin:.5em 0}.md-search-result mark{background-color:initial;color:var(--md-accent-fg-color);text-decoration:underline}.md-select{position:relative;z-index:1}.md-select__inner{background-color:var(--md-default-bg-color);border-radius:.1rem;box-shadow:var(--md-shadow-z2);color:var(--md-default-fg-color);left:50%;margin-top:.2rem;max-height:0;opacity:0;position:absolute;top:calc(100% - .2rem);transform:translate3d(-50%,.3rem,0);transition:transform .25s 375ms,opacity .25s .25s,max-height 0ms .5s}.md-select:focus-within .md-select__inner,.md-select:hover .md-select__inner{max-height:10rem;opacity:1;transform:translate3d(-50%,0,0);transition:transform .25s cubic-bezier(.1,.7,.1,1),opacity .25s,max-height 0ms}.md-select__inner:after{border-bottom:.2rem solid #0000;border-bottom-color:var(--md-default-bg-color);border-left:.2rem solid #0000;border-right:.2rem solid #0000;border-top:0;content:"";height:0;left:50%;margin-left:-.2rem;margin-top:-.2rem;position:absolute;top:0;width:0}.md-select__list{border-radius:.1rem;font-size:.8rem;list-style-type:none;margin:0;max-height:inherit;overflow:auto;padding:0}.md-select__item{line-height:1.8rem}[dir=ltr] .md-select__link{padding-left:.6rem;padding-right:1.2rem}[dir=rtl] .md-select__link{padding-left:1.2rem;padding-right:.6rem}.md-select__link{cursor:pointer;display:block;outline:none;scroll-snap-align:start;transition:background-color .25s,color .25s;width:100%}.md-select__link:focus,.md-select__link:hover{color:var(--md-accent-fg-color)}.md-select__link:focus{background-color:var(--md-default-fg-color--lightest)}.md-sidebar{align-self:flex-start;flex-shrink:0;padding:1.2rem 0;position:sticky;top:2.4rem;width:12.1rem}@media print{.md-sidebar{display:none}}@media screen and (max-width:76.1875em){[dir=ltr] .md-sidebar--primary{left:-12.1rem}[dir=rtl] .md-sidebar--primary{right:-12.1rem}.md-sidebar--primary{background-color:var(--md-default-bg-color);display:block;height:100%;position:fixed;top:0;transform:translateX(0);transition:transform .25s cubic-bezier(.4,0,.2,1),box-shadow .25s;width:12.1rem;z-index:5}[data-md-toggle=drawer]:checked~.md-container .md-sidebar--primary{box-shadow:var(--md-shadow-z3);transform:translateX(12.1rem)}[dir=rtl] [data-md-toggle=drawer]:checked~.md-container .md-sidebar--primary{transform:translateX(-12.1rem)}.md-sidebar--primary .md-sidebar__scrollwrap{bottom:0;left:0;margin:0;overflow:hidden;position:absolute;right:0;scroll-snap-type:none;top:0}}@media screen and (min-width:76.25em){.md-sidebar{height:0}.no-js .md-sidebar{height:auto}.md-header--lifted~.md-container .md-sidebar{top:4.8rem}}.md-sidebar--secondary{display:none;order:2}@media screen and (min-width:60em){.md-sidebar--secondary{height:0}.no-js .md-sidebar--secondary{height:auto}.md-sidebar--secondary:not([hidden]){display:block}.md-sidebar--secondary .md-sidebar__scrollwrap{touch-action:pan-y}}.md-sidebar__scrollwrap{scrollbar-gutter:stable;-webkit-backface-visibility:hidden;backface-visibility:hidden;margin:0 .2rem;overflow-y:auto;scrollbar-color:var(--md-default-fg-color--lighter) #0000;scrollbar-width:thin}.md-sidebar__scrollwrap::-webkit-scrollbar{height:.2rem;width:.2rem}.md-sidebar__scrollwrap:focus-within,.md-sidebar__scrollwrap:hover{scrollbar-color:var(--md-accent-fg-color) #0000}.md-sidebar__scrollwrap:focus-within::-webkit-scrollbar-thumb,.md-sidebar__scrollwrap:hover::-webkit-scrollbar-thumb{background-color:var(--md-default-fg-color--lighter)}.md-sidebar__scrollwrap:focus-within::-webkit-scrollbar-thumb:hover,.md-sidebar__scrollwrap:hover::-webkit-scrollbar-thumb:hover{background-color:var(--md-accent-fg-color)}@supports selector(::-webkit-scrollbar){.md-sidebar__scrollwrap{scrollbar-gutter:auto}[dir=ltr] .md-sidebar__inner{padding-right:calc(100% - 11.5rem)}[dir=rtl] .md-sidebar__inner{padding-left:calc(100% - 11.5rem)}}@media screen and (max-width:76.1875em){.md-overlay{background-color:#0000008a;height:0;opacity:0;position:fixed;top:0;transition:width 0ms .25s,height 0ms .25s,opacity .25s;width:0;z-index:5}[data-md-toggle=drawer]:checked~.md-overlay{height:100%;opacity:1;transition:width 0ms,height 0ms,opacity .25s;width:100%}}@keyframes facts{0%{height:0}to{height:.65rem}}@keyframes fact{0%{opacity:0;transform:translateY(100%)}50%{opacity:0}to{opacity:1;transform:translateY(0)}}:root{--md-source-forks-icon:url('data:image/svg+xml;charset=utf-8,');--md-source-repositories-icon:url('data:image/svg+xml;charset=utf-8,');--md-source-stars-icon:url('data:image/svg+xml;charset=utf-8,');--md-source-version-icon:url('data:image/svg+xml;charset=utf-8,')}.md-source{-webkit-backface-visibility:hidden;backface-visibility:hidden;display:block;font-size:.65rem;line-height:1.2;outline-color:var(--md-accent-fg-color);transition:opacity .25s;white-space:nowrap}.md-source:hover{opacity:.7}.md-source__icon{display:inline-block;height:2.4rem;vertical-align:middle;width:2rem}[dir=ltr] .md-source__icon svg{margin-left:.6rem}[dir=rtl] .md-source__icon svg{margin-right:.6rem}.md-source__icon svg{margin-top:.6rem}[dir=ltr] .md-source__icon+.md-source__repository{padding-left:2rem}[dir=rtl] .md-source__icon+.md-source__repository{padding-right:2rem}[dir=ltr] .md-source__icon+.md-source__repository{margin-left:-2rem}[dir=rtl] .md-source__icon+.md-source__repository{margin-right:-2rem}[dir=ltr] .md-source__repository{margin-left:.6rem}[dir=rtl] .md-source__repository{margin-right:.6rem}.md-source__repository{display:inline-block;max-width:calc(100% - 1.2rem);overflow:hidden;text-overflow:ellipsis;vertical-align:middle}.md-source__facts{display:flex;font-size:.55rem;gap:.4rem;list-style-type:none;margin:.1rem 0 0;opacity:.75;overflow:hidden;padding:0;width:100%}.md-source__repository--active .md-source__facts{animation:facts .25s ease-in}.md-source__fact{overflow:hidden;text-overflow:ellipsis}.md-source__repository--active .md-source__fact{animation:fact .4s ease-out}[dir=ltr] .md-source__fact:before{margin-right:.1rem}[dir=rtl] .md-source__fact:before{margin-left:.1rem}.md-source__fact:before{background-color:currentcolor;content:"";display:inline-block;height:.6rem;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;vertical-align:text-top;width:.6rem}.md-source__fact:nth-child(1n+2){flex-shrink:0}.md-source__fact--version:before{-webkit-mask-image:var(--md-source-version-icon);mask-image:var(--md-source-version-icon)}.md-source__fact--stars:before{-webkit-mask-image:var(--md-source-stars-icon);mask-image:var(--md-source-stars-icon)}.md-source__fact--forks:before{-webkit-mask-image:var(--md-source-forks-icon);mask-image:var(--md-source-forks-icon)}.md-source__fact--repositories:before{-webkit-mask-image:var(--md-source-repositories-icon);mask-image:var(--md-source-repositories-icon)}:root{--md-status:url('data:image/svg+xml;charset=utf-8,');--md-status--new:url('data:image/svg+xml;charset=utf-8,');--md-status--deprecated:url('data:image/svg+xml;charset=utf-8,');--md-status--encrypted:url('data:image/svg+xml;charset=utf-8,')}.md-status{margin-left:.2rem}.md-status:after{background-color:var(--md-default-fg-color--light);content:"";display:inline-block;height:1.125em;-webkit-mask-image:var(--md-status);mask-image:var(--md-status);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;vertical-align:text-bottom;width:1.125em}.md-status:hover:after{background-color:currentcolor}.md-status--new:after{-webkit-mask-image:var(--md-status--new);mask-image:var(--md-status--new)}.md-status--deprecated:after{-webkit-mask-image:var(--md-status--deprecated);mask-image:var(--md-status--deprecated)}.md-status--encrypted:after{-webkit-mask-image:var(--md-status--encrypted);mask-image:var(--md-status--encrypted)}.md-tabs{background-color:var(--md-primary-fg-color);color:var(--md-primary-bg-color);display:block;line-height:1.3;overflow:auto;width:100%;z-index:3}@media print{.md-tabs{display:none}}@media screen and (max-width:76.1875em){.md-tabs{display:none}}.md-tabs[hidden]{pointer-events:none}[dir=ltr] .md-tabs__list{margin-left:.2rem}[dir=rtl] .md-tabs__list{margin-right:.2rem}.md-tabs__list{contain:content;display:flex;list-style:none;margin:0;overflow:auto;padding:0;scrollbar-width:none;white-space:nowrap}.md-tabs__list::-webkit-scrollbar{display:none}.md-tabs__item{height:2.4rem;padding-left:.6rem;padding-right:.6rem}.md-tabs__item--active .md-tabs__link{color:inherit;opacity:1}.md-tabs__link{-webkit-backface-visibility:hidden;backface-visibility:hidden;display:flex;font-size:.7rem;margin-top:.8rem;opacity:.7;outline-color:var(--md-accent-fg-color);outline-offset:.2rem;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .25s}.md-tabs__link:focus,.md-tabs__link:hover{color:inherit;opacity:1}[dir=ltr] .md-tabs__link svg{margin-right:.4rem}[dir=rtl] .md-tabs__link svg{margin-left:.4rem}.md-tabs__link svg{fill:currentcolor;height:1.3em}.md-tabs__item:nth-child(2) .md-tabs__link{transition-delay:20ms}.md-tabs__item:nth-child(3) .md-tabs__link{transition-delay:40ms}.md-tabs__item:nth-child(4) .md-tabs__link{transition-delay:60ms}.md-tabs__item:nth-child(5) .md-tabs__link{transition-delay:80ms}.md-tabs__item:nth-child(6) .md-tabs__link{transition-delay:.1s}.md-tabs__item:nth-child(7) .md-tabs__link{transition-delay:.12s}.md-tabs__item:nth-child(8) .md-tabs__link{transition-delay:.14s}.md-tabs__item:nth-child(9) .md-tabs__link{transition-delay:.16s}.md-tabs__item:nth-child(10) .md-tabs__link{transition-delay:.18s}.md-tabs__item:nth-child(11) .md-tabs__link{transition-delay:.2s}.md-tabs__item:nth-child(12) .md-tabs__link{transition-delay:.22s}.md-tabs__item:nth-child(13) .md-tabs__link{transition-delay:.24s}.md-tabs__item:nth-child(14) .md-tabs__link{transition-delay:.26s}.md-tabs__item:nth-child(15) .md-tabs__link{transition-delay:.28s}.md-tabs__item:nth-child(16) .md-tabs__link{transition-delay:.3s}.md-tabs[hidden] .md-tabs__link{opacity:0;transform:translateY(50%);transition:transform 0ms .1s,opacity .1s}:root{--md-tag-icon:url('data:image/svg+xml;charset=utf-8,')}.md-typeset .md-tags{margin-bottom:.75em;margin-top:-.125em}[dir=ltr] .md-typeset .md-tag{margin-right:.5em}[dir=rtl] .md-typeset .md-tag{margin-left:.5em}.md-typeset .md-tag{background:var(--md-default-fg-color--lightest);border-radius:2.4rem;display:inline-block;font-size:.64rem;font-weight:700;letter-spacing:normal;line-height:1.6;margin-bottom:.5em;padding:.3125em .9375em;vertical-align:middle}.md-typeset .md-tag[href]{-webkit-tap-highlight-color:transparent;color:inherit;outline:none;transition:color 125ms,background-color 125ms}.md-typeset .md-tag[href]:focus,.md-typeset .md-tag[href]:hover{background-color:var(--md-accent-fg-color);color:var(--md-accent-bg-color)}[id]>.md-typeset .md-tag{vertical-align:text-top}.md-typeset .md-tag-icon:before{background-color:var(--md-default-fg-color--lighter);content:"";display:inline-block;height:1.2em;margin-right:.4em;-webkit-mask-image:var(--md-tag-icon);mask-image:var(--md-tag-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background-color 125ms;vertical-align:text-bottom;width:1.2em}.md-typeset .md-tag-icon[href]:focus:before,.md-typeset .md-tag-icon[href]:hover:before{background-color:var(--md-accent-bg-color)}@keyframes pulse{0%{transform:scale(.95)}75%{transform:scale(1)}to{transform:scale(.95)}}:root{--md-annotation-bg-icon:url('data:image/svg+xml;charset=utf-8,');--md-annotation-icon:url('data:image/svg+xml;charset=utf-8,');--md-tooltip-width:20rem}.md-tooltip{-webkit-backface-visibility:hidden;backface-visibility:hidden;background-color:var(--md-default-bg-color);border-radius:.1rem;box-shadow:var(--md-shadow-z2);color:var(--md-default-fg-color);font-family:var(--md-text-font-family);left:clamp(var(--md-tooltip-0,0rem) + .8rem,var(--md-tooltip-x),100vw + var(--md-tooltip-0,0rem) + .8rem - var(--md-tooltip-width) - 2 * .8rem);max-width:calc(100vw - 1.6rem);opacity:0;position:absolute;top:var(--md-tooltip-y);transform:translateY(-.4rem);transition:transform 0ms .25s,opacity .25s,z-index .25s;width:var(--md-tooltip-width);z-index:0}.md-tooltip--active{opacity:1;transform:translateY(0);transition:transform .25s cubic-bezier(.1,.7,.1,1),opacity .25s,z-index 0ms;z-index:2}.focus-visible>.md-tooltip,.md-tooltip:target{outline:var(--md-accent-fg-color) auto}.md-tooltip__inner{font-size:.64rem;padding:.8rem}.md-tooltip__inner.md-typeset>:first-child{margin-top:0}.md-tooltip__inner.md-typeset>:last-child{margin-bottom:0}.md-annotation{font-weight:400;outline:none;vertical-align:text-bottom;white-space:normal}[dir=rtl] .md-annotation{direction:rtl}code .md-annotation{font-family:var(--md-code-font-family);font-size:inherit}.md-annotation:not([hidden]){display:inline-block;line-height:1.25}.md-annotation__index{border-radius:.01px;cursor:pointer;display:inline-block;margin-left:.4ch;margin-right:.4ch;outline:none;overflow:hidden;position:relative;-webkit-user-select:none;user-select:none;vertical-align:text-top;z-index:0}.md-annotation .md-annotation__index{transition:z-index .25s}@media screen{.md-annotation__index{width:2.2ch}[data-md-visible]>.md-annotation__index{animation:pulse 2s infinite}.md-annotation__index:before{background:var(--md-default-bg-color);-webkit-mask-image:var(--md-annotation-bg-icon);mask-image:var(--md-annotation-bg-icon)}.md-annotation__index:after,.md-annotation__index:before{content:"";height:2.2ch;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;position:absolute;top:-.1ch;width:2.2ch;z-index:-1}.md-annotation__index:after{background-color:var(--md-default-fg-color--lighter);-webkit-mask-image:var(--md-annotation-icon);mask-image:var(--md-annotation-icon);transform:scale(1.0001);transition:background-color .25s,transform .25s}.md-tooltip--active+.md-annotation__index:after{transform:rotate(45deg)}.md-tooltip--active+.md-annotation__index:after,:hover>.md-annotation__index:after{background-color:var(--md-accent-fg-color)}}.md-tooltip--active+.md-annotation__index{animation-play-state:paused;transition-duration:0ms;z-index:2}.md-annotation__index [data-md-annotation-id]{display:inline-block}@media print{.md-annotation__index [data-md-annotation-id]{background:var(--md-default-fg-color--lighter);border-radius:2ch;color:var(--md-default-bg-color);font-weight:700;padding:0 .6ch;white-space:nowrap}.md-annotation__index [data-md-annotation-id]:after{content:attr(data-md-annotation-id)}}.md-typeset .md-annotation-list{counter-reset:xxx;list-style:none}.md-typeset .md-annotation-list li{position:relative}[dir=ltr] .md-typeset .md-annotation-list li:before{left:-2.125em}[dir=rtl] .md-typeset .md-annotation-list li:before{right:-2.125em}.md-typeset .md-annotation-list li:before{background:var(--md-default-fg-color--lighter);border-radius:2ch;color:var(--md-default-bg-color);content:counter(xxx);counter-increment:xxx;font-size:.8875em;font-weight:700;height:2ch;line-height:1.25;min-width:2ch;padding:0 .6ch;position:absolute;text-align:center;top:.25em}[dir=ltr] .md-top{margin-left:50%}[dir=rtl] .md-top{margin-right:50%}.md-top{background-color:var(--md-default-bg-color);border-radius:1.6rem;box-shadow:var(--md-shadow-z2);color:var(--md-default-fg-color--light);cursor:pointer;display:block;font-size:.7rem;outline:none;padding:.4rem .8rem;position:fixed;top:3.2rem;transform:translate(-50%);transition:color 125ms,background-color 125ms,transform 125ms cubic-bezier(.4,0,.2,1),opacity 125ms;z-index:2}@media print{.md-top{display:none}}[dir=rtl] .md-top{transform:translate(50%)}.md-top[hidden]{opacity:0;pointer-events:none;transform:translate(-50%,.2rem);transition-duration:0ms}[dir=rtl] .md-top[hidden]{transform:translate(50%,.2rem)}.md-top:focus,.md-top:hover{background-color:var(--md-accent-fg-color);color:var(--md-accent-bg-color)}.md-top svg{display:inline-block;vertical-align:-.5em}@keyframes hoverfix{0%{pointer-events:none}}:root{--md-version-icon:url('data:image/svg+xml;charset=utf-8,')}.md-version{flex-shrink:0;font-size:.8rem;height:2.4rem}[dir=ltr] .md-version__current{margin-left:1.4rem;margin-right:.4rem}[dir=rtl] .md-version__current{margin-left:.4rem;margin-right:1.4rem}.md-version__current{color:inherit;cursor:pointer;outline:none;position:relative;top:.05rem}[dir=ltr] .md-version__current:after{margin-left:.4rem}[dir=rtl] .md-version__current:after{margin-right:.4rem}.md-version__current:after{background-color:currentcolor;content:"";display:inline-block;height:.6rem;-webkit-mask-image:var(--md-version-icon);mask-image:var(--md-version-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:.4rem}.md-version__list{background-color:var(--md-default-bg-color);border-radius:.1rem;box-shadow:var(--md-shadow-z2);color:var(--md-default-fg-color);list-style-type:none;margin:.2rem .8rem;max-height:0;opacity:0;overflow:auto;padding:0;position:absolute;scroll-snap-type:y mandatory;top:.15rem;transition:max-height 0ms .5s,opacity .25s .25s;z-index:3}.md-version:focus-within .md-version__list,.md-version:hover .md-version__list{max-height:10rem;opacity:1;transition:max-height 0ms,opacity .25s}@media (hover:none),(pointer:coarse){.md-version:hover .md-version__list{animation:hoverfix .25s forwards}.md-version:focus-within .md-version__list{animation:none}}.md-version__item{line-height:1.8rem}[dir=ltr] .md-version__link{padding-left:.6rem;padding-right:1.2rem}[dir=rtl] .md-version__link{padding-left:1.2rem;padding-right:.6rem}.md-version__link{cursor:pointer;display:block;outline:none;scroll-snap-align:start;transition:color .25s,background-color .25s;white-space:nowrap;width:100%}.md-version__link:focus,.md-version__link:hover{color:var(--md-accent-fg-color)}.md-version__link:focus{background-color:var(--md-default-fg-color--lightest)}:root{--md-admonition-icon--note:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--abstract:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--info:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--tip:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--success:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--question:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--warning:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--failure:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--danger:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--bug:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--example:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--quote:url('data:image/svg+xml;charset=utf-8,')}.md-typeset .admonition,.md-typeset details{background-color:var(--md-admonition-bg-color);border:.05rem solid #448aff;border-radius:.2rem;box-shadow:var(--md-shadow-z1);color:var(--md-admonition-fg-color);display:flow-root;font-size:.64rem;margin:1.5625em 0;padding:0 .6rem;page-break-inside:avoid;transition:box-shadow 125ms}@media print{.md-typeset .admonition,.md-typeset details{box-shadow:none}}.md-typeset .admonition:focus-within,.md-typeset details:focus-within{box-shadow:0 0 0 .2rem #448aff1a}.md-typeset .admonition>*,.md-typeset details>*{box-sizing:border-box}.md-typeset .admonition .admonition,.md-typeset .admonition details,.md-typeset details .admonition,.md-typeset details details{margin-bottom:1em;margin-top:1em}.md-typeset .admonition .md-typeset__scrollwrap,.md-typeset details .md-typeset__scrollwrap{margin:1em -.6rem}.md-typeset .admonition .md-typeset__table,.md-typeset details .md-typeset__table{padding:0 .6rem}.md-typeset .admonition>.tabbed-set:only-child,.md-typeset details>.tabbed-set:only-child{margin-top:0}html .md-typeset .admonition>:last-child,html .md-typeset details>:last-child{margin-bottom:.6rem}[dir=ltr] .md-typeset .admonition-title,[dir=ltr] .md-typeset summary{padding-left:2rem;padding-right:.6rem}[dir=rtl] .md-typeset .admonition-title,[dir=rtl] .md-typeset summary{padding-left:.6rem;padding-right:2rem}[dir=ltr] .md-typeset .admonition-title,[dir=ltr] .md-typeset summary{border-left-width:.2rem}[dir=rtl] .md-typeset .admonition-title,[dir=rtl] .md-typeset summary{border-right-width:.2rem}[dir=ltr] .md-typeset .admonition-title,[dir=ltr] .md-typeset summary{border-top-left-radius:.1rem}[dir=ltr] .md-typeset .admonition-title,[dir=ltr] .md-typeset summary,[dir=rtl] .md-typeset .admonition-title,[dir=rtl] .md-typeset summary{border-top-right-radius:.1rem}[dir=rtl] .md-typeset .admonition-title,[dir=rtl] .md-typeset summary{border-top-left-radius:.1rem}.md-typeset .admonition-title,.md-typeset summary{background-color:#448aff1a;border:none;font-weight:700;margin:0 -.6rem;padding-bottom:.4rem;padding-top:.4rem;position:relative}html .md-typeset .admonition-title:last-child,html .md-typeset summary:last-child{margin-bottom:0}[dir=ltr] .md-typeset .admonition-title:before,[dir=ltr] .md-typeset summary:before{left:.6rem}[dir=rtl] .md-typeset .admonition-title:before,[dir=rtl] .md-typeset summary:before{right:.6rem}.md-typeset .admonition-title:before,.md-typeset summary:before{background-color:#448aff;content:"";height:1rem;-webkit-mask-image:var(--md-admonition-icon--note);mask-image:var(--md-admonition-icon--note);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;position:absolute;top:.625em;width:1rem}.md-typeset .admonition-title code,.md-typeset summary code{box-shadow:0 0 0 .05rem var(--md-default-fg-color--lightest)}.md-typeset .admonition.note,.md-typeset details.note{border-color:#448aff}.md-typeset .admonition.note:focus-within,.md-typeset details.note:focus-within{box-shadow:0 0 0 .2rem #448aff1a}.md-typeset .note>.admonition-title,.md-typeset .note>summary{background-color:#448aff1a}.md-typeset .note>.admonition-title:before,.md-typeset .note>summary:before{background-color:#448aff;-webkit-mask-image:var(--md-admonition-icon--note);mask-image:var(--md-admonition-icon--note)}.md-typeset .note>.admonition-title:after,.md-typeset .note>summary:after{color:#448aff}.md-typeset .admonition.abstract,.md-typeset details.abstract{border-color:#00b0ff}.md-typeset .admonition.abstract:focus-within,.md-typeset details.abstract:focus-within{box-shadow:0 0 0 .2rem #00b0ff1a}.md-typeset .abstract>.admonition-title,.md-typeset .abstract>summary{background-color:#00b0ff1a}.md-typeset .abstract>.admonition-title:before,.md-typeset .abstract>summary:before{background-color:#00b0ff;-webkit-mask-image:var(--md-admonition-icon--abstract);mask-image:var(--md-admonition-icon--abstract)}.md-typeset .abstract>.admonition-title:after,.md-typeset .abstract>summary:after{color:#00b0ff}.md-typeset .admonition.info,.md-typeset details.info{border-color:#00b8d4}.md-typeset .admonition.info:focus-within,.md-typeset details.info:focus-within{box-shadow:0 0 0 .2rem #00b8d41a}.md-typeset .info>.admonition-title,.md-typeset .info>summary{background-color:#00b8d41a}.md-typeset .info>.admonition-title:before,.md-typeset .info>summary:before{background-color:#00b8d4;-webkit-mask-image:var(--md-admonition-icon--info);mask-image:var(--md-admonition-icon--info)}.md-typeset .info>.admonition-title:after,.md-typeset .info>summary:after{color:#00b8d4}.md-typeset .admonition.tip,.md-typeset details.tip{border-color:#00bfa5}.md-typeset .admonition.tip:focus-within,.md-typeset details.tip:focus-within{box-shadow:0 0 0 .2rem #00bfa51a}.md-typeset .tip>.admonition-title,.md-typeset .tip>summary{background-color:#00bfa51a}.md-typeset .tip>.admonition-title:before,.md-typeset .tip>summary:before{background-color:#00bfa5;-webkit-mask-image:var(--md-admonition-icon--tip);mask-image:var(--md-admonition-icon--tip)}.md-typeset .tip>.admonition-title:after,.md-typeset .tip>summary:after{color:#00bfa5}.md-typeset .admonition.success,.md-typeset details.success{border-color:#00c853}.md-typeset .admonition.success:focus-within,.md-typeset details.success:focus-within{box-shadow:0 0 0 .2rem #00c8531a}.md-typeset .success>.admonition-title,.md-typeset .success>summary{background-color:#00c8531a}.md-typeset .success>.admonition-title:before,.md-typeset .success>summary:before{background-color:#00c853;-webkit-mask-image:var(--md-admonition-icon--success);mask-image:var(--md-admonition-icon--success)}.md-typeset .success>.admonition-title:after,.md-typeset .success>summary:after{color:#00c853}.md-typeset .admonition.question,.md-typeset details.question{border-color:#64dd17}.md-typeset .admonition.question:focus-within,.md-typeset details.question:focus-within{box-shadow:0 0 0 .2rem #64dd171a}.md-typeset .question>.admonition-title,.md-typeset .question>summary{background-color:#64dd171a}.md-typeset .question>.admonition-title:before,.md-typeset .question>summary:before{background-color:#64dd17;-webkit-mask-image:var(--md-admonition-icon--question);mask-image:var(--md-admonition-icon--question)}.md-typeset .question>.admonition-title:after,.md-typeset .question>summary:after{color:#64dd17}.md-typeset .admonition.warning,.md-typeset details.warning{border-color:#ff9100}.md-typeset .admonition.warning:focus-within,.md-typeset details.warning:focus-within{box-shadow:0 0 0 .2rem #ff91001a}.md-typeset .warning>.admonition-title,.md-typeset .warning>summary{background-color:#ff91001a}.md-typeset .warning>.admonition-title:before,.md-typeset .warning>summary:before{background-color:#ff9100;-webkit-mask-image:var(--md-admonition-icon--warning);mask-image:var(--md-admonition-icon--warning)}.md-typeset .warning>.admonition-title:after,.md-typeset .warning>summary:after{color:#ff9100}.md-typeset .admonition.failure,.md-typeset details.failure{border-color:#ff5252}.md-typeset .admonition.failure:focus-within,.md-typeset details.failure:focus-within{box-shadow:0 0 0 .2rem #ff52521a}.md-typeset .failure>.admonition-title,.md-typeset .failure>summary{background-color:#ff52521a}.md-typeset .failure>.admonition-title:before,.md-typeset .failure>summary:before{background-color:#ff5252;-webkit-mask-image:var(--md-admonition-icon--failure);mask-image:var(--md-admonition-icon--failure)}.md-typeset .failure>.admonition-title:after,.md-typeset .failure>summary:after{color:#ff5252}.md-typeset .admonition.danger,.md-typeset details.danger{border-color:#ff1744}.md-typeset .admonition.danger:focus-within,.md-typeset details.danger:focus-within{box-shadow:0 0 0 .2rem #ff17441a}.md-typeset .danger>.admonition-title,.md-typeset .danger>summary{background-color:#ff17441a}.md-typeset .danger>.admonition-title:before,.md-typeset .danger>summary:before{background-color:#ff1744;-webkit-mask-image:var(--md-admonition-icon--danger);mask-image:var(--md-admonition-icon--danger)}.md-typeset .danger>.admonition-title:after,.md-typeset .danger>summary:after{color:#ff1744}.md-typeset .admonition.bug,.md-typeset details.bug{border-color:#f50057}.md-typeset .admonition.bug:focus-within,.md-typeset details.bug:focus-within{box-shadow:0 0 0 .2rem #f500571a}.md-typeset .bug>.admonition-title,.md-typeset .bug>summary{background-color:#f500571a}.md-typeset .bug>.admonition-title:before,.md-typeset .bug>summary:before{background-color:#f50057;-webkit-mask-image:var(--md-admonition-icon--bug);mask-image:var(--md-admonition-icon--bug)}.md-typeset .bug>.admonition-title:after,.md-typeset .bug>summary:after{color:#f50057}.md-typeset .admonition.example,.md-typeset details.example{border-color:#7c4dff}.md-typeset .admonition.example:focus-within,.md-typeset details.example:focus-within{box-shadow:0 0 0 .2rem #7c4dff1a}.md-typeset .example>.admonition-title,.md-typeset .example>summary{background-color:#7c4dff1a}.md-typeset .example>.admonition-title:before,.md-typeset .example>summary:before{background-color:#7c4dff;-webkit-mask-image:var(--md-admonition-icon--example);mask-image:var(--md-admonition-icon--example)}.md-typeset .example>.admonition-title:after,.md-typeset .example>summary:after{color:#7c4dff}.md-typeset .admonition.quote,.md-typeset details.quote{border-color:#9e9e9e}.md-typeset .admonition.quote:focus-within,.md-typeset details.quote:focus-within{box-shadow:0 0 0 .2rem #9e9e9e1a}.md-typeset .quote>.admonition-title,.md-typeset .quote>summary{background-color:#9e9e9e1a}.md-typeset .quote>.admonition-title:before,.md-typeset .quote>summary:before{background-color:#9e9e9e;-webkit-mask-image:var(--md-admonition-icon--quote);mask-image:var(--md-admonition-icon--quote)}.md-typeset .quote>.admonition-title:after,.md-typeset .quote>summary:after{color:#9e9e9e}:root{--md-footnotes-icon:url('data:image/svg+xml;charset=utf-8,')}.md-typeset .footnote{color:var(--md-default-fg-color--light);font-size:.64rem}[dir=ltr] .md-typeset .footnote>ol{margin-left:0}[dir=rtl] .md-typeset .footnote>ol{margin-right:0}.md-typeset .footnote>ol>li{transition:color 125ms}.md-typeset .footnote>ol>li:target{color:var(--md-default-fg-color)}.md-typeset .footnote>ol>li:focus-within .footnote-backref{opacity:1;transform:translateX(0);transition:none}.md-typeset .footnote>ol>li:hover .footnote-backref,.md-typeset .footnote>ol>li:target .footnote-backref{opacity:1;transform:translateX(0)}.md-typeset .footnote>ol>li>:first-child{margin-top:0}.md-typeset .footnote-ref{font-size:.75em;font-weight:700}html .md-typeset .footnote-ref{outline-offset:.1rem}.md-typeset [id^="fnref:"]:target>.footnote-ref{outline:auto}.md-typeset .footnote-backref{color:var(--md-typeset-a-color);display:inline-block;font-size:0;opacity:0;transform:translateX(.25rem);transition:color .25s,transform .25s .25s,opacity 125ms .25s;vertical-align:text-bottom}@media print{.md-typeset .footnote-backref{color:var(--md-typeset-a-color);opacity:1;transform:translateX(0)}}[dir=rtl] .md-typeset .footnote-backref{transform:translateX(-.25rem)}.md-typeset .footnote-backref:hover{color:var(--md-accent-fg-color)}.md-typeset .footnote-backref:before{background-color:currentcolor;content:"";display:inline-block;height:.8rem;-webkit-mask-image:var(--md-footnotes-icon);mask-image:var(--md-footnotes-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:.8rem}[dir=rtl] .md-typeset .footnote-backref:before svg{transform:scaleX(-1)}[dir=ltr] .md-typeset .headerlink{margin-left:.5rem}[dir=rtl] .md-typeset .headerlink{margin-right:.5rem}.md-typeset .headerlink{color:var(--md-default-fg-color--lighter);display:inline-block;opacity:0;transition:color .25s,opacity 125ms}@media print{.md-typeset .headerlink{display:none}}.md-typeset .headerlink:focus,.md-typeset :hover>.headerlink,.md-typeset :target>.headerlink{opacity:1;transition:color .25s,opacity 125ms}.md-typeset .headerlink:focus,.md-typeset .headerlink:hover,.md-typeset :target>.headerlink{color:var(--md-accent-fg-color)}.md-typeset :target{--md-scroll-margin:3.6rem;--md-scroll-offset:0rem;scroll-margin-top:calc(var(--md-scroll-margin) - var(--md-scroll-offset))}@media screen and (min-width:76.25em){.md-header--lifted~.md-container .md-typeset :target{--md-scroll-margin:6rem}}.md-typeset h1:target,.md-typeset h2:target,.md-typeset h3:target{--md-scroll-offset:0.2rem}.md-typeset h4:target{--md-scroll-offset:0.15rem}.md-typeset div.arithmatex{overflow:auto}@media screen and (max-width:44.9375em){.md-typeset div.arithmatex{margin:0 -.8rem}}.md-typeset div.arithmatex>*{margin-left:auto!important;margin-right:auto!important;padding:0 .8rem;touch-action:auto;width:-webkit-min-content;width:min-content}.md-typeset div.arithmatex>* mjx-container{margin:0!important}.md-typeset del.critic{background-color:var(--md-typeset-del-color)}.md-typeset del.critic,.md-typeset ins.critic{-webkit-box-decoration-break:clone;box-decoration-break:clone}.md-typeset ins.critic{background-color:var(--md-typeset-ins-color)}.md-typeset .critic.comment{-webkit-box-decoration-break:clone;box-decoration-break:clone;color:var(--md-code-hl-comment-color)}.md-typeset .critic.comment:before{content:"/* "}.md-typeset .critic.comment:after{content:" */"}.md-typeset .critic.block{box-shadow:none;display:block;margin:1em 0;overflow:auto;padding-left:.8rem;padding-right:.8rem}.md-typeset .critic.block>:first-child{margin-top:.5em}.md-typeset .critic.block>:last-child{margin-bottom:.5em}:root{--md-details-icon:url('data:image/svg+xml;charset=utf-8,')}.md-typeset details{display:flow-root;overflow:visible;padding-top:0}.md-typeset details[open]>summary:after{transform:rotate(90deg)}.md-typeset details:not([open]){box-shadow:none;padding-bottom:0}.md-typeset details:not([open])>summary{border-radius:.1rem}[dir=ltr] .md-typeset summary{padding-right:1.8rem}[dir=rtl] .md-typeset summary{padding-left:1.8rem}[dir=ltr] .md-typeset summary{border-top-left-radius:.1rem}[dir=ltr] .md-typeset summary,[dir=rtl] .md-typeset summary{border-top-right-radius:.1rem}[dir=rtl] .md-typeset summary{border-top-left-radius:.1rem}.md-typeset summary{cursor:pointer;display:block;min-height:1rem}.md-typeset summary.focus-visible{outline-color:var(--md-accent-fg-color);outline-offset:.2rem}.md-typeset summary:not(.focus-visible){-webkit-tap-highlight-color:transparent;outline:none}[dir=ltr] .md-typeset summary:after{right:.4rem}[dir=rtl] .md-typeset summary:after{left:.4rem}.md-typeset summary:after{background-color:currentcolor;content:"";height:1rem;-webkit-mask-image:var(--md-details-icon);mask-image:var(--md-details-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;position:absolute;top:.625em;transform:rotate(0deg);transition:transform .25s;width:1rem}[dir=rtl] .md-typeset summary:after{transform:rotate(180deg)}.md-typeset summary::marker{display:none}.md-typeset summary::-webkit-details-marker{display:none}.md-typeset .emojione,.md-typeset .gemoji,.md-typeset .twemoji{display:inline-flex;height:1.125em;vertical-align:text-top}.md-typeset .emojione svg,.md-typeset .gemoji svg,.md-typeset .twemoji svg{fill:currentcolor;max-height:100%;width:1.125em}.highlight .o,.highlight .ow{color:var(--md-code-hl-operator-color)}.highlight .p{color:var(--md-code-hl-punctuation-color)}.highlight .cpf,.highlight .l,.highlight .s,.highlight .s1,.highlight .s2,.highlight .sb,.highlight .sc,.highlight .si,.highlight .ss{color:var(--md-code-hl-string-color)}.highlight .cp,.highlight .se,.highlight .sh,.highlight .sr,.highlight .sx{color:var(--md-code-hl-special-color)}.highlight .il,.highlight .m,.highlight .mb,.highlight .mf,.highlight .mh,.highlight .mi,.highlight .mo{color:var(--md-code-hl-number-color)}.highlight .k,.highlight .kd,.highlight .kn,.highlight .kp,.highlight .kr,.highlight .kt{color:var(--md-code-hl-keyword-color)}.highlight .kc,.highlight .n{color:var(--md-code-hl-name-color)}.highlight .bp,.highlight .nb,.highlight .no{color:var(--md-code-hl-constant-color)}.highlight .nc,.highlight .ne,.highlight .nf,.highlight .nn{color:var(--md-code-hl-function-color)}.highlight .nd,.highlight .ni,.highlight .nl,.highlight .nt{color:var(--md-code-hl-keyword-color)}.highlight .c,.highlight .c1,.highlight .ch,.highlight .cm,.highlight .cs,.highlight .sd{color:var(--md-code-hl-comment-color)}.highlight .na,.highlight .nv,.highlight .vc,.highlight .vg,.highlight .vi{color:var(--md-code-hl-variable-color)}.highlight .ge,.highlight .gh,.highlight .go,.highlight .gp,.highlight .gr,.highlight .gs,.highlight .gt,.highlight .gu{color:var(--md-code-hl-generic-color)}.highlight .gd,.highlight .gi{border-radius:.1rem;margin:0 -.125em;padding:0 .125em}.highlight .gd{background-color:var(--md-typeset-del-color)}.highlight .gi{background-color:var(--md-typeset-ins-color)}.highlight .hll{background-color:var(--md-code-hl-color);display:block;margin:0 -1.1764705882em;padding:0 1.1764705882em}.highlight span.filename{background-color:var(--md-code-bg-color);border-bottom:.05rem solid var(--md-default-fg-color--lightest);border-top-left-radius:.1rem;border-top-right-radius:.1rem;display:flow-root;font-size:.85em;font-weight:700;margin-top:1em;padding:.6617647059em 1.1764705882em;position:relative}.highlight span.filename+pre{margin-top:0}.highlight span.filename+pre>code{border-top-left-radius:0;border-top-right-radius:0}.highlight [data-linenos]:before{background-color:var(--md-code-bg-color);box-shadow:-.05rem 0 var(--md-default-fg-color--lightest) inset;color:var(--md-default-fg-color--light);content:attr(data-linenos);float:left;left:-1.1764705882em;margin-left:-1.1764705882em;margin-right:1.1764705882em;padding-left:1.1764705882em;position:sticky;-webkit-user-select:none;user-select:none;z-index:3}.highlight code a[id]{position:absolute;visibility:hidden}.highlight code[data-md-copying] .hll{display:contents}.highlight code[data-md-copying] .md-annotation{display:none}.highlighttable{display:flow-root}.highlighttable tbody,.highlighttable td{display:block;padding:0}.highlighttable tr{display:flex}.highlighttable pre{margin:0}.highlighttable th.filename{flex-grow:1;padding:0;text-align:left}.highlighttable th.filename span.filename{margin-top:0}.highlighttable .linenos{background-color:var(--md-code-bg-color);border-bottom-left-radius:.1rem;border-top-left-radius:.1rem;font-size:.85em;padding:.7720588235em 0 .7720588235em 1.1764705882em;-webkit-user-select:none;user-select:none}.highlighttable .linenodiv{box-shadow:-.05rem 0 var(--md-default-fg-color--lightest) inset;padding-right:.5882352941em}.highlighttable .linenodiv pre{color:var(--md-default-fg-color--light);text-align:right}.highlighttable .code{flex:1;min-width:0}.linenodiv a{color:inherit}.md-typeset .highlighttable{direction:ltr;margin:1em 0}.md-typeset .highlighttable>tbody>tr>.code>div>pre>code{border-bottom-left-radius:0;border-top-left-radius:0}.md-typeset .highlight+.result{border:.05rem solid var(--md-code-bg-color);border-bottom-left-radius:.1rem;border-bottom-right-radius:.1rem;border-top-width:.1rem;margin-top:-1.125em;overflow:visible;padding:0 1em}.md-typeset .highlight+.result:after{clear:both;content:"";display:block}@media screen and (max-width:44.9375em){.md-content__inner>.highlight{margin:1em -.8rem}.md-content__inner>.highlight>.filename,.md-content__inner>.highlight>.highlighttable>tbody>tr>.code>div>pre>code,.md-content__inner>.highlight>.highlighttable>tbody>tr>.filename span.filename,.md-content__inner>.highlight>.highlighttable>tbody>tr>.linenos,.md-content__inner>.highlight>pre>code{border-radius:0}.md-content__inner>.highlight+.result{border-left-width:0;border-radius:0;border-right-width:0;margin-left:-.8rem;margin-right:-.8rem}}.md-typeset .keys kbd:after,.md-typeset .keys kbd:before{-moz-osx-font-smoothing:initial;-webkit-font-smoothing:initial;color:inherit;margin:0;position:relative}.md-typeset .keys span{color:var(--md-default-fg-color--light);padding:0 .2em}.md-typeset .keys .key-alt:before,.md-typeset .keys .key-left-alt:before,.md-typeset .keys .key-right-alt:before{content:"⎇";padding-right:.4em}.md-typeset .keys .key-command:before,.md-typeset .keys .key-left-command:before,.md-typeset .keys .key-right-command:before{content:"⌘";padding-right:.4em}.md-typeset .keys .key-control:before,.md-typeset .keys .key-left-control:before,.md-typeset .keys .key-right-control:before{content:"⌃";padding-right:.4em}.md-typeset .keys .key-left-meta:before,.md-typeset .keys .key-meta:before,.md-typeset .keys .key-right-meta:before{content:"◆";padding-right:.4em}.md-typeset .keys .key-left-option:before,.md-typeset .keys .key-option:before,.md-typeset .keys .key-right-option:before{content:"⌥";padding-right:.4em}.md-typeset .keys .key-left-shift:before,.md-typeset .keys .key-right-shift:before,.md-typeset .keys .key-shift:before{content:"⇧";padding-right:.4em}.md-typeset .keys .key-left-super:before,.md-typeset .keys .key-right-super:before,.md-typeset .keys .key-super:before{content:"❖";padding-right:.4em}.md-typeset .keys .key-left-windows:before,.md-typeset .keys .key-right-windows:before,.md-typeset .keys .key-windows:before{content:"⊞";padding-right:.4em}.md-typeset .keys .key-arrow-down:before{content:"↓";padding-right:.4em}.md-typeset .keys .key-arrow-left:before{content:"←";padding-right:.4em}.md-typeset .keys .key-arrow-right:before{content:"→";padding-right:.4em}.md-typeset .keys .key-arrow-up:before{content:"↑";padding-right:.4em}.md-typeset .keys .key-backspace:before{content:"⌫";padding-right:.4em}.md-typeset .keys .key-backtab:before{content:"⇤";padding-right:.4em}.md-typeset .keys .key-caps-lock:before{content:"⇪";padding-right:.4em}.md-typeset .keys .key-clear:before{content:"⌧";padding-right:.4em}.md-typeset .keys .key-context-menu:before{content:"☰";padding-right:.4em}.md-typeset .keys .key-delete:before{content:"⌦";padding-right:.4em}.md-typeset .keys .key-eject:before{content:"⏏";padding-right:.4em}.md-typeset .keys .key-end:before{content:"⤓";padding-right:.4em}.md-typeset .keys .key-escape:before{content:"⎋";padding-right:.4em}.md-typeset .keys .key-home:before{content:"⤒";padding-right:.4em}.md-typeset .keys .key-insert:before{content:"⎀";padding-right:.4em}.md-typeset .keys .key-page-down:before{content:"⇟";padding-right:.4em}.md-typeset .keys .key-page-up:before{content:"⇞";padding-right:.4em}.md-typeset .keys .key-print-screen:before{content:"⎙";padding-right:.4em}.md-typeset .keys .key-tab:after{content:"⇥";padding-left:.4em}.md-typeset .keys .key-num-enter:after{content:"⌤";padding-left:.4em}.md-typeset .keys .key-enter:after{content:"⏎";padding-left:.4em}:root{--md-tabbed-icon--prev:url('data:image/svg+xml;charset=utf-8,');--md-tabbed-icon--next:url('data:image/svg+xml;charset=utf-8,')}.md-typeset .tabbed-set{border-radius:.1rem;display:flex;flex-flow:column wrap;margin:1em 0;position:relative}.md-typeset .tabbed-set>input{height:0;opacity:0;position:absolute;width:0}.md-typeset .tabbed-set>input:target{--md-scroll-offset:0.625em}.md-typeset .tabbed-labels{-ms-overflow-style:none;box-shadow:0 -.05rem var(--md-default-fg-color--lightest) inset;display:flex;max-width:100%;overflow:auto;scrollbar-width:none}@media print{.md-typeset .tabbed-labels{display:contents}}@media screen{.js .md-typeset .tabbed-labels{position:relative}.js .md-typeset .tabbed-labels:before{background:var(--md-accent-fg-color);bottom:0;content:"";display:block;height:2px;left:0;position:absolute;transform:translateX(var(--md-indicator-x));transition:width 225ms,transform .25s;transition-timing-function:cubic-bezier(.4,0,.2,1);width:var(--md-indicator-width)}}.md-typeset .tabbed-labels::-webkit-scrollbar{display:none}.md-typeset .tabbed-labels>label{border-bottom:.1rem solid #0000;border-radius:.1rem .1rem 0 0;color:var(--md-default-fg-color--light);cursor:pointer;flex-shrink:0;font-size:.64rem;font-weight:700;padding:.78125em 1.25em .625em;scroll-margin-inline-start:1rem;transition:background-color .25s,color .25s;white-space:nowrap;width:auto}@media print{.md-typeset .tabbed-labels>label:first-child{order:1}.md-typeset .tabbed-labels>label:nth-child(2){order:2}.md-typeset .tabbed-labels>label:nth-child(3){order:3}.md-typeset .tabbed-labels>label:nth-child(4){order:4}.md-typeset .tabbed-labels>label:nth-child(5){order:5}.md-typeset .tabbed-labels>label:nth-child(6){order:6}.md-typeset .tabbed-labels>label:nth-child(7){order:7}.md-typeset .tabbed-labels>label:nth-child(8){order:8}.md-typeset .tabbed-labels>label:nth-child(9){order:9}.md-typeset .tabbed-labels>label:nth-child(10){order:10}.md-typeset .tabbed-labels>label:nth-child(11){order:11}.md-typeset .tabbed-labels>label:nth-child(12){order:12}.md-typeset .tabbed-labels>label:nth-child(13){order:13}.md-typeset .tabbed-labels>label:nth-child(14){order:14}.md-typeset .tabbed-labels>label:nth-child(15){order:15}.md-typeset .tabbed-labels>label:nth-child(16){order:16}.md-typeset .tabbed-labels>label:nth-child(17){order:17}.md-typeset .tabbed-labels>label:nth-child(18){order:18}.md-typeset .tabbed-labels>label:nth-child(19){order:19}.md-typeset .tabbed-labels>label:nth-child(20){order:20}}.md-typeset .tabbed-labels>label:hover{color:var(--md-accent-fg-color)}.md-typeset .tabbed-content{width:100%}@media print{.md-typeset .tabbed-content{display:contents}}.md-typeset .tabbed-block{display:none}@media print{.md-typeset .tabbed-block{display:block}.md-typeset .tabbed-block:first-child{order:1}.md-typeset .tabbed-block:nth-child(2){order:2}.md-typeset .tabbed-block:nth-child(3){order:3}.md-typeset .tabbed-block:nth-child(4){order:4}.md-typeset .tabbed-block:nth-child(5){order:5}.md-typeset .tabbed-block:nth-child(6){order:6}.md-typeset .tabbed-block:nth-child(7){order:7}.md-typeset .tabbed-block:nth-child(8){order:8}.md-typeset .tabbed-block:nth-child(9){order:9}.md-typeset .tabbed-block:nth-child(10){order:10}.md-typeset .tabbed-block:nth-child(11){order:11}.md-typeset .tabbed-block:nth-child(12){order:12}.md-typeset .tabbed-block:nth-child(13){order:13}.md-typeset .tabbed-block:nth-child(14){order:14}.md-typeset .tabbed-block:nth-child(15){order:15}.md-typeset .tabbed-block:nth-child(16){order:16}.md-typeset .tabbed-block:nth-child(17){order:17}.md-typeset .tabbed-block:nth-child(18){order:18}.md-typeset .tabbed-block:nth-child(19){order:19}.md-typeset .tabbed-block:nth-child(20){order:20}}.md-typeset .tabbed-block>.highlight:first-child>pre,.md-typeset .tabbed-block>pre:first-child{margin:0}.md-typeset .tabbed-block>.highlight:first-child>pre>code,.md-typeset .tabbed-block>pre:first-child>code{border-top-left-radius:0;border-top-right-radius:0}.md-typeset .tabbed-block>.highlight:first-child>.filename{border-top-left-radius:0;border-top-right-radius:0;margin:0}.md-typeset .tabbed-block>.highlight:first-child>.highlighttable{margin:0}.md-typeset .tabbed-block>.highlight:first-child>.highlighttable>tbody>tr>.filename span.filename,.md-typeset .tabbed-block>.highlight:first-child>.highlighttable>tbody>tr>.linenos{border-top-left-radius:0;border-top-right-radius:0;margin:0}.md-typeset .tabbed-block>.highlight:first-child>.highlighttable>tbody>tr>.code>div>pre>code{border-top-left-radius:0;border-top-right-radius:0}.md-typeset .tabbed-block>.highlight:first-child+.result{margin-top:-.125em}.md-typeset .tabbed-block>.tabbed-set{margin:0}.md-typeset .tabbed-button{align-self:center;border-radius:100%;color:var(--md-default-fg-color--light);cursor:pointer;display:block;height:.9rem;margin-top:.1rem;pointer-events:auto;transition:background-color .25s;width:.9rem}.md-typeset .tabbed-button:hover{background-color:var(--md-accent-fg-color--transparent);color:var(--md-accent-fg-color)}.md-typeset .tabbed-button:after{background-color:currentcolor;content:"";display:block;height:100%;-webkit-mask-image:var(--md-tabbed-icon--prev);mask-image:var(--md-tabbed-icon--prev);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background-color .25s,transform .25s;width:100%}.md-typeset .tabbed-control{background:linear-gradient(to right,var(--md-default-bg-color) 60%,#0000);display:flex;height:1.9rem;justify-content:start;pointer-events:none;position:absolute;transition:opacity 125ms;width:1.2rem}[dir=rtl] .md-typeset .tabbed-control{transform:rotate(180deg)}.md-typeset .tabbed-control[hidden]{opacity:0}.md-typeset .tabbed-control--next{background:linear-gradient(to left,var(--md-default-bg-color) 60%,#0000);justify-content:end;right:0}.md-typeset .tabbed-control--next .tabbed-button:after{-webkit-mask-image:var(--md-tabbed-icon--next);mask-image:var(--md-tabbed-icon--next)}@media screen and (max-width:44.9375em){[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels{padding-left:.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels{padding-right:.8rem}.md-content__inner>.tabbed-set .tabbed-labels{margin:0 -.8rem;max-width:100vw;scroll-padding-inline-start:.8rem}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels:after{padding-right:.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels:after{padding-left:.8rem}.md-content__inner>.tabbed-set .tabbed-labels:after{content:""}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{padding-left:.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{padding-right:.8rem}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{margin-left:-.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{margin-right:-.8rem}.md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{width:2rem}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{padding-right:.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{padding-left:.8rem}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{margin-right:-.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{margin-left:-.8rem}.md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{width:2rem}}@media screen{.md-typeset .tabbed-set>input:first-child:checked~.tabbed-labels>:first-child,.md-typeset .tabbed-set>input:nth-child(10):checked~.tabbed-labels>:nth-child(10),.md-typeset .tabbed-set>input:nth-child(11):checked~.tabbed-labels>:nth-child(11),.md-typeset .tabbed-set>input:nth-child(12):checked~.tabbed-labels>:nth-child(12),.md-typeset .tabbed-set>input:nth-child(13):checked~.tabbed-labels>:nth-child(13),.md-typeset .tabbed-set>input:nth-child(14):checked~.tabbed-labels>:nth-child(14),.md-typeset .tabbed-set>input:nth-child(15):checked~.tabbed-labels>:nth-child(15),.md-typeset .tabbed-set>input:nth-child(16):checked~.tabbed-labels>:nth-child(16),.md-typeset .tabbed-set>input:nth-child(17):checked~.tabbed-labels>:nth-child(17),.md-typeset .tabbed-set>input:nth-child(18):checked~.tabbed-labels>:nth-child(18),.md-typeset .tabbed-set>input:nth-child(19):checked~.tabbed-labels>:nth-child(19),.md-typeset .tabbed-set>input:nth-child(2):checked~.tabbed-labels>:nth-child(2),.md-typeset .tabbed-set>input:nth-child(20):checked~.tabbed-labels>:nth-child(20),.md-typeset .tabbed-set>input:nth-child(3):checked~.tabbed-labels>:nth-child(3),.md-typeset .tabbed-set>input:nth-child(4):checked~.tabbed-labels>:nth-child(4),.md-typeset .tabbed-set>input:nth-child(5):checked~.tabbed-labels>:nth-child(5),.md-typeset .tabbed-set>input:nth-child(6):checked~.tabbed-labels>:nth-child(6),.md-typeset .tabbed-set>input:nth-child(7):checked~.tabbed-labels>:nth-child(7),.md-typeset .tabbed-set>input:nth-child(8):checked~.tabbed-labels>:nth-child(8),.md-typeset .tabbed-set>input:nth-child(9):checked~.tabbed-labels>:nth-child(9){color:var(--md-accent-fg-color)}.md-typeset .no-js .tabbed-set>input:first-child:checked~.tabbed-labels>:first-child,.md-typeset .no-js .tabbed-set>input:nth-child(10):checked~.tabbed-labels>:nth-child(10),.md-typeset .no-js .tabbed-set>input:nth-child(11):checked~.tabbed-labels>:nth-child(11),.md-typeset .no-js .tabbed-set>input:nth-child(12):checked~.tabbed-labels>:nth-child(12),.md-typeset .no-js .tabbed-set>input:nth-child(13):checked~.tabbed-labels>:nth-child(13),.md-typeset .no-js .tabbed-set>input:nth-child(14):checked~.tabbed-labels>:nth-child(14),.md-typeset .no-js .tabbed-set>input:nth-child(15):checked~.tabbed-labels>:nth-child(15),.md-typeset .no-js .tabbed-set>input:nth-child(16):checked~.tabbed-labels>:nth-child(16),.md-typeset .no-js .tabbed-set>input:nth-child(17):checked~.tabbed-labels>:nth-child(17),.md-typeset .no-js .tabbed-set>input:nth-child(18):checked~.tabbed-labels>:nth-child(18),.md-typeset .no-js .tabbed-set>input:nth-child(19):checked~.tabbed-labels>:nth-child(19),.md-typeset .no-js .tabbed-set>input:nth-child(2):checked~.tabbed-labels>:nth-child(2),.md-typeset .no-js .tabbed-set>input:nth-child(20):checked~.tabbed-labels>:nth-child(20),.md-typeset .no-js .tabbed-set>input:nth-child(3):checked~.tabbed-labels>:nth-child(3),.md-typeset .no-js .tabbed-set>input:nth-child(4):checked~.tabbed-labels>:nth-child(4),.md-typeset .no-js .tabbed-set>input:nth-child(5):checked~.tabbed-labels>:nth-child(5),.md-typeset .no-js .tabbed-set>input:nth-child(6):checked~.tabbed-labels>:nth-child(6),.md-typeset .no-js .tabbed-set>input:nth-child(7):checked~.tabbed-labels>:nth-child(7),.md-typeset .no-js .tabbed-set>input:nth-child(8):checked~.tabbed-labels>:nth-child(8),.md-typeset .no-js .tabbed-set>input:nth-child(9):checked~.tabbed-labels>:nth-child(9),.no-js .md-typeset .tabbed-set>input:first-child:checked~.tabbed-labels>:first-child,.no-js .md-typeset .tabbed-set>input:nth-child(10):checked~.tabbed-labels>:nth-child(10),.no-js .md-typeset .tabbed-set>input:nth-child(11):checked~.tabbed-labels>:nth-child(11),.no-js .md-typeset .tabbed-set>input:nth-child(12):checked~.tabbed-labels>:nth-child(12),.no-js .md-typeset .tabbed-set>input:nth-child(13):checked~.tabbed-labels>:nth-child(13),.no-js .md-typeset .tabbed-set>input:nth-child(14):checked~.tabbed-labels>:nth-child(14),.no-js .md-typeset .tabbed-set>input:nth-child(15):checked~.tabbed-labels>:nth-child(15),.no-js .md-typeset .tabbed-set>input:nth-child(16):checked~.tabbed-labels>:nth-child(16),.no-js .md-typeset .tabbed-set>input:nth-child(17):checked~.tabbed-labels>:nth-child(17),.no-js .md-typeset .tabbed-set>input:nth-child(18):checked~.tabbed-labels>:nth-child(18),.no-js .md-typeset .tabbed-set>input:nth-child(19):checked~.tabbed-labels>:nth-child(19),.no-js .md-typeset .tabbed-set>input:nth-child(2):checked~.tabbed-labels>:nth-child(2),.no-js .md-typeset .tabbed-set>input:nth-child(20):checked~.tabbed-labels>:nth-child(20),.no-js .md-typeset .tabbed-set>input:nth-child(3):checked~.tabbed-labels>:nth-child(3),.no-js .md-typeset .tabbed-set>input:nth-child(4):checked~.tabbed-labels>:nth-child(4),.no-js .md-typeset .tabbed-set>input:nth-child(5):checked~.tabbed-labels>:nth-child(5),.no-js .md-typeset .tabbed-set>input:nth-child(6):checked~.tabbed-labels>:nth-child(6),.no-js .md-typeset .tabbed-set>input:nth-child(7):checked~.tabbed-labels>:nth-child(7),.no-js .md-typeset .tabbed-set>input:nth-child(8):checked~.tabbed-labels>:nth-child(8),.no-js .md-typeset .tabbed-set>input:nth-child(9):checked~.tabbed-labels>:nth-child(9){border-color:var(--md-accent-fg-color)}}.md-typeset .tabbed-set>input:first-child.focus-visible~.tabbed-labels>:first-child,.md-typeset .tabbed-set>input:nth-child(10).focus-visible~.tabbed-labels>:nth-child(10),.md-typeset .tabbed-set>input:nth-child(11).focus-visible~.tabbed-labels>:nth-child(11),.md-typeset .tabbed-set>input:nth-child(12).focus-visible~.tabbed-labels>:nth-child(12),.md-typeset .tabbed-set>input:nth-child(13).focus-visible~.tabbed-labels>:nth-child(13),.md-typeset .tabbed-set>input:nth-child(14).focus-visible~.tabbed-labels>:nth-child(14),.md-typeset .tabbed-set>input:nth-child(15).focus-visible~.tabbed-labels>:nth-child(15),.md-typeset .tabbed-set>input:nth-child(16).focus-visible~.tabbed-labels>:nth-child(16),.md-typeset .tabbed-set>input:nth-child(17).focus-visible~.tabbed-labels>:nth-child(17),.md-typeset .tabbed-set>input:nth-child(18).focus-visible~.tabbed-labels>:nth-child(18),.md-typeset .tabbed-set>input:nth-child(19).focus-visible~.tabbed-labels>:nth-child(19),.md-typeset .tabbed-set>input:nth-child(2).focus-visible~.tabbed-labels>:nth-child(2),.md-typeset .tabbed-set>input:nth-child(20).focus-visible~.tabbed-labels>:nth-child(20),.md-typeset .tabbed-set>input:nth-child(3).focus-visible~.tabbed-labels>:nth-child(3),.md-typeset .tabbed-set>input:nth-child(4).focus-visible~.tabbed-labels>:nth-child(4),.md-typeset .tabbed-set>input:nth-child(5).focus-visible~.tabbed-labels>:nth-child(5),.md-typeset .tabbed-set>input:nth-child(6).focus-visible~.tabbed-labels>:nth-child(6),.md-typeset .tabbed-set>input:nth-child(7).focus-visible~.tabbed-labels>:nth-child(7),.md-typeset .tabbed-set>input:nth-child(8).focus-visible~.tabbed-labels>:nth-child(8),.md-typeset .tabbed-set>input:nth-child(9).focus-visible~.tabbed-labels>:nth-child(9){background-color:var(--md-accent-fg-color--transparent)}.md-typeset .tabbed-set>input:first-child:checked~.tabbed-content>:first-child,.md-typeset .tabbed-set>input:nth-child(10):checked~.tabbed-content>:nth-child(10),.md-typeset .tabbed-set>input:nth-child(11):checked~.tabbed-content>:nth-child(11),.md-typeset .tabbed-set>input:nth-child(12):checked~.tabbed-content>:nth-child(12),.md-typeset .tabbed-set>input:nth-child(13):checked~.tabbed-content>:nth-child(13),.md-typeset .tabbed-set>input:nth-child(14):checked~.tabbed-content>:nth-child(14),.md-typeset .tabbed-set>input:nth-child(15):checked~.tabbed-content>:nth-child(15),.md-typeset .tabbed-set>input:nth-child(16):checked~.tabbed-content>:nth-child(16),.md-typeset .tabbed-set>input:nth-child(17):checked~.tabbed-content>:nth-child(17),.md-typeset .tabbed-set>input:nth-child(18):checked~.tabbed-content>:nth-child(18),.md-typeset .tabbed-set>input:nth-child(19):checked~.tabbed-content>:nth-child(19),.md-typeset .tabbed-set>input:nth-child(2):checked~.tabbed-content>:nth-child(2),.md-typeset .tabbed-set>input:nth-child(20):checked~.tabbed-content>:nth-child(20),.md-typeset .tabbed-set>input:nth-child(3):checked~.tabbed-content>:nth-child(3),.md-typeset .tabbed-set>input:nth-child(4):checked~.tabbed-content>:nth-child(4),.md-typeset .tabbed-set>input:nth-child(5):checked~.tabbed-content>:nth-child(5),.md-typeset .tabbed-set>input:nth-child(6):checked~.tabbed-content>:nth-child(6),.md-typeset .tabbed-set>input:nth-child(7):checked~.tabbed-content>:nth-child(7),.md-typeset .tabbed-set>input:nth-child(8):checked~.tabbed-content>:nth-child(8),.md-typeset .tabbed-set>input:nth-child(9):checked~.tabbed-content>:nth-child(9){display:block}:root{--md-tasklist-icon:url('data:image/svg+xml;charset=utf-8,');--md-tasklist-icon--checked:url('data:image/svg+xml;charset=utf-8,')}.md-typeset .task-list-item{list-style-type:none;position:relative}[dir=ltr] .md-typeset .task-list-item [type=checkbox]{left:-2em}[dir=rtl] .md-typeset .task-list-item [type=checkbox]{right:-2em}.md-typeset .task-list-item [type=checkbox]{position:absolute;top:.45em}.md-typeset .task-list-control [type=checkbox]{opacity:0;z-index:-1}[dir=ltr] .md-typeset .task-list-indicator:before{left:-1.5em}[dir=rtl] .md-typeset .task-list-indicator:before{right:-1.5em}.md-typeset .task-list-indicator:before{background-color:var(--md-default-fg-color--lightest);content:"";height:1.25em;-webkit-mask-image:var(--md-tasklist-icon);mask-image:var(--md-tasklist-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;position:absolute;top:.15em;width:1.25em}.md-typeset [type=checkbox]:checked+.task-list-indicator:before{background-color:#00e676;-webkit-mask-image:var(--md-tasklist-icon--checked);mask-image:var(--md-tasklist-icon--checked)}:root>*{--md-mermaid-font-family:var(--md-text-font-family),sans-serif;--md-mermaid-edge-color:var(--md-code-fg-color);--md-mermaid-node-bg-color:var(--md-accent-fg-color--transparent);--md-mermaid-node-fg-color:var(--md-accent-fg-color);--md-mermaid-label-bg-color:var(--md-default-bg-color);--md-mermaid-label-fg-color:var(--md-code-fg-color);--md-mermaid-sequence-actor-bg-color:var(--md-mermaid-label-bg-color);--md-mermaid-sequence-actor-fg-color:var(--md-mermaid-label-fg-color);--md-mermaid-sequence-actor-border-color:var(--md-mermaid-node-fg-color);--md-mermaid-sequence-actor-line-color:var(--md-default-fg-color--lighter);--md-mermaid-sequence-actorman-bg-color:var(--md-mermaid-label-bg-color);--md-mermaid-sequence-actorman-line-color:var(--md-mermaid-node-fg-color);--md-mermaid-sequence-box-bg-color:var(--md-mermaid-node-bg-color);--md-mermaid-sequence-box-fg-color:var(--md-mermaid-edge-color);--md-mermaid-sequence-label-bg-color:var(--md-mermaid-node-bg-color);--md-mermaid-sequence-label-fg-color:var(--md-mermaid-node-fg-color);--md-mermaid-sequence-loop-bg-color:var(--md-mermaid-node-bg-color);--md-mermaid-sequence-loop-fg-color:var(--md-mermaid-edge-color);--md-mermaid-sequence-loop-border-color:var(--md-mermaid-node-fg-color);--md-mermaid-sequence-message-fg-color:var(--md-mermaid-edge-color);--md-mermaid-sequence-message-line-color:var(--md-mermaid-edge-color);--md-mermaid-sequence-note-bg-color:var(--md-mermaid-label-bg-color);--md-mermaid-sequence-note-fg-color:var(--md-mermaid-edge-color);--md-mermaid-sequence-note-border-color:var(--md-mermaid-label-fg-color);--md-mermaid-sequence-number-bg-color:var(--md-mermaid-node-fg-color);--md-mermaid-sequence-number-fg-color:var(--md-accent-bg-color)}.mermaid{line-height:normal;margin:1em 0}@media screen and (min-width:45em){[dir=ltr] .md-typeset .inline{float:left}[dir=rtl] .md-typeset .inline{float:right}[dir=ltr] .md-typeset .inline{margin-right:.8rem}[dir=rtl] .md-typeset .inline{margin-left:.8rem}.md-typeset .inline{margin-bottom:.8rem;margin-top:0;width:11.7rem}[dir=ltr] .md-typeset .inline.end{float:right}[dir=rtl] .md-typeset .inline.end{float:left}[dir=ltr] .md-typeset .inline.end{margin-left:.8rem;margin-right:0}[dir=rtl] .md-typeset .inline.end{margin-left:0;margin-right:.8rem}} \ No newline at end of file diff --git a/devel/assets/stylesheets/main.046329b4.min.css.map b/devel/assets/stylesheets/main.046329b4.min.css.map deleted file mode 100644 index d84ca0091..000000000 --- a/devel/assets/stylesheets/main.046329b4.min.css.map +++ /dev/null @@ -1 +0,0 @@ -{"version":3,"sources":["src/assets/stylesheets/main/components/_meta.scss","../../../src/assets/stylesheets/main.scss","src/assets/stylesheets/main/_resets.scss","src/assets/stylesheets/main/_colors.scss","src/assets/stylesheets/main/_icons.scss","src/assets/stylesheets/main/_typeset.scss","src/assets/stylesheets/utilities/_break.scss","src/assets/stylesheets/main/components/_author.scss","src/assets/stylesheets/main/components/_banner.scss","src/assets/stylesheets/main/components/_base.scss","src/assets/stylesheets/main/components/_clipboard.scss","src/assets/stylesheets/main/components/_consent.scss","src/assets/stylesheets/main/components/_content.scss","src/assets/stylesheets/main/components/_dialog.scss","src/assets/stylesheets/main/components/_feedback.scss","src/assets/stylesheets/main/components/_footer.scss","src/assets/stylesheets/main/components/_form.scss","src/assets/stylesheets/main/components/_header.scss","node_modules/material-design-color/material-color.scss","src/assets/stylesheets/main/components/_nav.scss","src/assets/stylesheets/main/components/_pagination.scss","src/assets/stylesheets/main/components/_post.scss","src/assets/stylesheets/main/components/_search.scss","src/assets/stylesheets/main/components/_select.scss","src/assets/stylesheets/main/components/_sidebar.scss","src/assets/stylesheets/main/components/_source.scss","src/assets/stylesheets/main/components/_status.scss","src/assets/stylesheets/main/components/_tabs.scss","src/assets/stylesheets/main/components/_tag.scss","src/assets/stylesheets/main/components/_tooltip.scss","src/assets/stylesheets/main/components/_top.scss","src/assets/stylesheets/main/components/_version.scss","src/assets/stylesheets/main/extensions/markdown/_admonition.scss","src/assets/stylesheets/main/extensions/markdown/_footnotes.scss","src/assets/stylesheets/main/extensions/markdown/_toc.scss","src/assets/stylesheets/main/extensions/pymdownx/_arithmatex.scss","src/assets/stylesheets/main/extensions/pymdownx/_critic.scss","src/assets/stylesheets/main/extensions/pymdownx/_details.scss","src/assets/stylesheets/main/extensions/pymdownx/_emoji.scss","src/assets/stylesheets/main/extensions/pymdownx/_highlight.scss","src/assets/stylesheets/main/extensions/pymdownx/_keys.scss","src/assets/stylesheets/main/extensions/pymdownx/_tabbed.scss","src/assets/stylesheets/main/extensions/pymdownx/_tasklist.scss","src/assets/stylesheets/main/integrations/_mermaid.scss","src/assets/stylesheets/main/_modifiers.scss"],"names":[],"mappings":"AA0CE,gBC6xCF,CC3yCA,KAEE,6BAAA,CAAA,0BAAA,CAAA,qBAAA,CADA,qBDzBF,CC8BA,iBAGE,kBD3BF,CC8BE,gCANF,iBAOI,yBDzBF,CACF,CC6BA,KACE,QD1BF,CC8BA,qBAIE,uCD3BF,CC+BA,EACE,aAAA,CACA,oBD5BF,CCgCA,GAME,QAAA,CALA,kBAAA,CACA,aAAA,CACA,aAAA,CAEA,gBAAA,CADA,SD3BF,CCiCA,MACE,aD9BF,CCkCA,QAEE,eD/BF,CCmCA,IACE,iBDhCF,CCoCA,MAEE,uBAAA,CADA,gBDhCF,CCqCA,MAEE,eAAA,CACA,kBDlCF,CCsCA,OAKE,gBAAA,CACA,QAAA,CAHA,mBAAA,CACA,iBAAA,CAFA,QAAA,CADA,SD9BF,CCuCA,MACE,QAAA,CACA,YDpCF,CErDA,MAIE,6BAAA,CACA,oCAAA,CACA,mCAAA,CACA,0BAAA,CACA,sCAAA,CAGA,4BAAA,CACA,2CAAA,CACA,yBAAA,CACA,qCFmDF,CE7CA,+BAIE,kBF6CF,CE1CE,oHAEE,YF4CJ,CEnCA,qCAGE,+BAAA,CACA,sCAAA,CACA,wCAAA,CACA,yCAAA,CACA,0BAAA,CACA,sCAAA,CACA,wCAAA,CACA,yCAAA,CAGA,0BAAA,CACA,0BAAA,CAGA,4BAAA,CACA,iCAAA,CACA,kCAAA,CACA,mCAAA,CACA,mCAAA,CACA,kCAAA,CACA,iCAAA,CACA,+CAAA,CACA,6DAAA,CACA,gEAAA,CACA,4DAAA,CACA,4DAAA,CACA,6DAAA,CAGA,6CAAA,CAGA,+CAAA,CAGA,iCAAA,CAGA,gCAAA,CACA,gCAAA,CAGA,8BAAA,CACA,kCAAA,CACA,qCAAA,CAGA,kCAAA,CACA,gDAAA,CAGA,mDAAA,CACA,mDAAA,CAGA,+BAAA,CACA,0BAAA,CAGA,yBAAA,CACA,qCAAA,CACA,uCAAA,CACA,8BAAA,CACA,oCAAA,CAGA,8DAAA,CAKA,8DAAA,CAKA,0DFUF,CG5HE,aAIE,iBAAA,CAHA,aAAA,CAEA,aAAA,CADA,YHiIJ,CItIA,KACE,kCAAA,CACA,iCAAA,CAGA,uGAAA,CAKA,mFJuIF,CIjIA,iBAIE,mCAAA,CACA,6BAAA,CAFA,sCJsIF,CIhIA,aAIE,4BAAA,CADA,sCJoIF,CI3HA,MACE,0NAAA,CACA,mNAAA,CACA,oNJ8HF,CIvHA,YAGE,gCAAA,CAAA,kBAAA,CAFA,eAAA,CACA,eJ2HF,CItHE,aAPF,YAQI,gBJyHF,CACF,CItHE,uGAME,iBAAA,CAAA,cJwHJ,CIpHE,eAKE,uCAAA,CAHA,aAAA,CAEA,eAAA,CAHA,iBJ2HJ,CIlHE,8BAPE,eAAA,CAGA,qBJ6HJ,CIzHE,eAEE,kBAAA,CAEA,eAAA,CAHA,oBJwHJ,CIhHE,eAEE,gBAAA,CACA,eAAA,CAEA,qBAAA,CADA,eAAA,CAHA,mBJsHJ,CI9GE,kBACE,eJgHJ,CI5GE,eAEE,eAAA,CACA,qBAAA,CAFA,YJgHJ,CI1GE,8BAKE,uCAAA,CAFA,cAAA,CACA,eAAA,CAEA,qBAAA,CAJA,eJgHJ,CIxGE,eACE,wBJ0GJ,CItGE,eAGE,+DAAA,CAFA,iBAAA,CACA,cJyGJ,CIpGE,cACE,+BAAA,CACA,qBJsGJ,CInGI,mCAEE,sBJoGN,CIhGI,wCACE,+BJkGN,CI/FM,kDACE,uDJiGR,CI5FI,mBACE,kBAAA,CACA,iCJ8FN,CI1FI,4BACE,uCAAA,CACA,oBJ4FN,CIvFE,iDAIE,6BAAA,CACA,aAAA,CAFA,2BJ2FJ,CItFI,aARF,iDASI,oBJ2FJ,CACF,CIvFE,iBAIE,wCAAA,CACA,mBAAA,CACA,kCAAA,CAAA,0BAAA,CAJA,eAAA,CADA,uBAAA,CAEA,qBJ4FJ,CItFI,qCAEE,uCAAA,CADA,YJyFN,CInFE,gBAEE,iBAAA,CACA,eAAA,CAFA,iBJuFJ,CIlFI,qBASE,kCAAA,CAAA,0BAAA,CADA,eAAA,CAPA,aAAA,CAEA,QAAA,CAIA,uCAAA,CAHA,aAAA,CAFA,oCAAA,CASA,yDAAA,CADA,oBAAA,CAJA,iBAAA,CADA,iBJ0FN,CIjFM,2BACE,+CJmFR,CI/EM,wCAEE,YAAA,CADA,WJkFR,CI7EM,8CACE,oDJ+ER,CI5EQ,oDACE,0CJ8EV,CIvEE,gBAOE,4CAAA,CACA,mBAAA,CACA,mKACE,CANF,gCAAA,CAHA,oBAAA,CAEA,eAAA,CADA,uBAAA,CAIA,uBAAA,CADA,qBJ6EJ,CIlEE,iBAGE,6CAAA,CACA,kCAAA,CAAA,0BAAA,CAHA,aAAA,CACA,qBJsEJ,CIhEE,iBAGE,6DAAA,CADA,WAAA,CADA,oBJoEJ,CI/DI,oBAGE,wEAQE,2CAAA,CACA,mBAAA,CACA,8BAAA,CAJA,gCAAA,CACA,mBAAA,CAFA,eAAA,CAHA,UAAA,CAEA,cAAA,CADA,mBAAA,CAFA,iBAAA,CACA,WJuEN,CACF,CI1DE,kBACE,WJ4DJ,CIxDE,oDAEE,qBJ0DJ,CI5DE,oDAEE,sBJ0DJ,CItDE,iCACE,kBJ2DJ,CI5DE,iCACE,mBJ2DJ,CI5DE,iCAIE,2DJwDJ,CI5DE,iCAIE,4DJwDJ,CI5DE,uBAGE,uCAAA,CADA,aAAA,CAAA,cJ0DJ,CIpDE,eACE,oBJsDJ,CIlDE,kDAGE,kBJoDJ,CIvDE,kDAGE,mBJoDJ,CIvDE,8BAEE,SJqDJ,CIjDI,0DACE,iBJoDN,CIhDI,oCACE,2BJmDN,CIhDM,0CACE,2BJmDR,CI9CI,wDACE,kBJkDN,CInDI,wDACE,mBJkDN,CInDI,oCAEE,kBJiDN,CI9CM,kGAEE,aJkDR,CI9CM,0DACE,eJiDR,CI7CM,4HAEE,kBJgDR,CIlDM,4HAEE,mBJgDR,CIlDM,oFACE,kBAAA,CAAA,eJiDR,CI1CE,yBAEE,mBJ4CJ,CI9CE,yBAEE,oBJ4CJ,CI9CE,eACE,mBAAA,CAAA,cJ6CJ,CIxCE,kDAIE,WAAA,CADA,cJ2CJ,CInCI,4BAEE,oBJqCN,CIjCI,6BAEE,oBJmCN,CI/BI,kCACE,YJiCN,CI5BE,mBACE,iBAAA,CAGA,eAAA,CADA,cAAA,CAEA,iBAAA,CAHA,yBAAA,CAAA,sBAAA,CAAA,iBJiCJ,CI3BI,uBACE,aJ6BN,CIxBE,uBAGE,iBAAA,CADA,eAAA,CADA,eJ4BJ,CItBE,mBACE,cJwBJ,CIpBE,+BAME,2CAAA,CACA,iDAAA,CACA,mBAAA,CAPA,oBAAA,CAGA,gBAAA,CAFA,cAAA,CACA,aAAA,CAEA,iBJyBJ,CInBI,aAXF,+BAYI,aJsBJ,CACF,CIjBI,iCACE,gBJmBN,CIZM,8FACE,YJcR,CIVM,4FACE,eJYR,CIPI,8FACE,eJSN,CINM,kHACE,gBJQR,CIHI,kCAGE,eAAA,CAFA,cAAA,CACA,sBAAA,CAEA,kBJKN,CIDI,kCAGE,qDAAA,CAFA,sBAAA,CACA,kBJIN,CICI,wCACE,iCJCN,CIEM,8CACE,qDAAA,CACA,sDJAR,CIKI,iCACE,iBJHN,CIQE,wCACE,cJNJ,CISI,wDAIE,gBJDN,CIHI,wDAIE,iBJDN,CIHI,8CAME,UAAA,CALA,oBAAA,CAEA,YAAA,CAKA,oDAAA,CAAA,4CAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAHA,iCAAA,CAFA,0BAAA,CAHA,WJCN,CIWI,oDACE,oDJTN,CIaI,mEACE,kDAAA,CACA,yDAAA,CAAA,iDJXN,CIeI,oEACE,kDAAA,CACA,0DAAA,CAAA,kDJbN,CIkBE,wBACE,iBAAA,CACA,eAAA,CACA,iBJhBJ,CIoBE,mBACE,oBAAA,CAEA,kBAAA,CADA,eJjBJ,CIqBI,aANF,mBAOI,aJlBJ,CACF,CIqBI,8BACE,aAAA,CAEA,QAAA,CACA,eAAA,CAFA,UJjBN,CK9VI,wCD8XF,uBACE,iBJ5BF,CI+BE,4BACE,eJ7BJ,CACF,CM7hBE,uBAEE,aAAA,CACA,aAAA,CAEA,aAAA,CACA,eAAA,CALA,iBAAA,CAMA,sCACE,CAJF,YNkiBJ,CM1hBI,2BAEE,kBAAA,CADA,aN6hBN,CMxhBI,6BAME,+CAAA,CAFA,yCAAA,CAHA,eAAA,CACA,eAAA,CACA,kBAAA,CAEA,iBN2hBN,CMthBI,6BAEE,aAAA,CADA,YNyhBN,CMnhBE,wBACE,kBNqhBJ,CMlhBI,4BACE,mCAAA,CACA,uBNohBN,CMhhBI,4DAEE,oBAAA,CADA,SNmhBN,CM/gBM,oEACE,mBNihBR,COvkBA,WAGE,0CAAA,CADA,+BAAA,CADA,aP4kBF,COvkBE,aANF,WAOI,YP0kBF,CACF,COvkBE,oBAEE,2CAAA,CADA,gCP0kBJ,COrkBE,kBAGE,eAAA,CADA,iBAAA,CADA,ePykBJ,COnkBE,6BACE,WPwkBJ,COzkBE,6BACE,UPwkBJ,COzkBE,mBAEE,aAAA,CACA,cAAA,CACA,uBPqkBJ,COlkBI,0BACE,YPokBN,COhkBI,yBACE,UPkkBN,CQvmBA,KASE,cAAA,CARA,WAAA,CACA,iBR2mBF,CKvcI,oCGtKJ,KAaI,gBRomBF,CACF,CK5cI,oCGtKJ,KAkBI,cRomBF,CACF,CQ/lBA,KASE,2CAAA,CAPA,YAAA,CACA,qBAAA,CAKA,eAAA,CAHA,eAAA,CAJA,iBAAA,CAGA,URqmBF,CQ7lBE,aAZF,KAaI,aRgmBF,CACF,CK7cI,wCGhJF,yBAII,cR6lBJ,CACF,CQplBA,SAEE,gBAAA,CAAA,iBAAA,CADA,eRwlBF,CQnlBA,cACE,YAAA,CACA,qBAAA,CACA,WRslBF,CQnlBE,aANF,cAOI,aRslBF,CACF,CQllBA,SACE,WRqlBF,CQllBE,gBACE,YAAA,CACA,WAAA,CACA,iBRolBJ,CQ/kBA,aACE,eAAA,CACA,sBRklBF,CQzkBA,WACE,YR4kBF,CQvkBA,WAGE,QAAA,CACA,SAAA,CAHA,iBAAA,CACA,OR4kBF,CQvkBE,uCACE,aRykBJ,CQrkBE,+BAEE,uCAAA,CADA,kBRwkBJ,CQlkBA,SASE,2CAAA,CACA,mBAAA,CAFA,gCAAA,CADA,gBAAA,CADA,YAAA,CAMA,SAAA,CADA,uCAAA,CANA,mBAAA,CAJA,cAAA,CAYA,2BAAA,CATA,UR4kBF,CQhkBE,eAEE,SAAA,CAIA,uBAAA,CAHA,oEACE,CAHF,URqkBJ,CQvjBA,MACE,WR0jBF,CSntBA,MACE,+PTqtBF,CS/sBA,cASE,mBAAA,CAFA,0CAAA,CACA,cAAA,CAFA,YAAA,CAIA,uCAAA,CACA,oBAAA,CAVA,iBAAA,CAEA,UAAA,CADA,QAAA,CAUA,qBAAA,CAPA,WAAA,CADA,ST0tBF,CS/sBE,aAfF,cAgBI,YTktBF,CACF,CS/sBE,kCAEE,uCAAA,CADA,YTktBJ,CS7sBE,qBACE,uCT+sBJ,CS3sBE,wCACE,+BT6sBJ,CSxsBE,oBAME,6BAAA,CADA,UAAA,CAJA,aAAA,CAEA,cAAA,CACA,aAAA,CAGA,2CAAA,CAAA,mCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CARA,aTktBJ,CStsBE,sBACE,cTwsBJ,CSrsBI,2BACE,2CTusBN,CSjsBI,kEAEE,uDAAA,CADA,+BTosBN,CU1wBA,mBACE,GACE,SAAA,CACA,0BV6wBF,CU1wBA,GACE,SAAA,CACA,uBV4wBF,CACF,CUxwBA,mBACE,GACE,SV0wBF,CUvwBA,GACE,SVywBF,CACF,CU9vBE,qBASE,2BAAA,CADA,mCAAA,CAAA,2BAAA,CAFA,0BAAA,CADA,WAAA,CAEA,SAAA,CANA,cAAA,CACA,KAAA,CAEA,UAAA,CADA,SVswBJ,CU5vBE,mBAcE,mDAAA,CANA,2CAAA,CACA,QAAA,CACA,mBAAA,CARA,QAAA,CASA,kDACE,CAPF,eAAA,CAEA,aAAA,CADA,SAAA,CALA,cAAA,CAGA,UAAA,CADA,SVuwBJ,CUxvBE,kBACE,aV0vBJ,CUtvBE,sBACE,YAAA,CACA,YVwvBJ,CUrvBI,oCACE,aVuvBN,CUlvBE,sBACE,mBVovBJ,CUjvBI,6CACE,cVmvBN,CK7oBI,wCKvGA,6CAKI,aAAA,CAEA,gBAAA,CACA,iBAAA,CAFA,UVqvBN,CACF,CU9uBE,kBACE,cVgvBJ,CWj1BA,YACE,WAAA,CAIA,WXi1BF,CW90BE,mBAEE,qBAAA,CADA,iBXi1BJ,CKprBI,sCMtJE,4EACE,kBX60BN,CWz0BI,0JACE,mBX20BN,CW50BI,8EACE,kBX20BN,CACF,CWt0BI,0BAGE,UAAA,CAFA,aAAA,CACA,YXy0BN,CWp0BI,+BACE,eXs0BN,CWh0BE,8BACE,WXq0BJ,CWt0BE,8BACE,UXq0BJ,CWt0BE,8BAIE,iBXk0BJ,CWt0BE,8BAIE,kBXk0BJ,CWt0BE,oBAGE,cAAA,CADA,SXo0BJ,CW/zBI,aAPF,oBAQI,YXk0BJ,CACF,CW/zBI,gCACE,yCXi0BN,CW7zBI,wBACE,cAAA,CACA,kBX+zBN,CW5zBM,kCACE,oBX8zBR,CY/3BA,qBAeE,WZg4BF,CY/4BA,qBAeE,UZg4BF,CY/4BA,WAOE,2CAAA,CACA,mBAAA,CANA,YAAA,CAOA,8BAAA,CALA,iBAAA,CAMA,SAAA,CALA,mBAAA,CACA,mBAAA,CALA,cAAA,CAaA,0BAAA,CAHA,wCACE,CATF,SZ44BF,CY73BE,aAlBF,WAmBI,YZg4BF,CACF,CY73BE,mBAEE,SAAA,CADA,mBAAA,CAKA,uBAAA,CAHA,kEZg4BJ,CYz3BE,kBAEE,gCAAA,CADA,eZ43BJ,Ca95BA,aACE,gBAAA,CACA,iBbi6BF,Ca95BE,sBAGE,WAAA,CADA,QAAA,CADA,Sbk6BJ,Ca55BE,oBAEE,eAAA,CADA,eb+5BJ,Ca15BE,oBACE,iBb45BJ,Cax5BE,mBAIE,sBAAA,CAFA,YAAA,CACA,cAAA,CAEA,sBAAA,CAJA,iBb85BJ,Cav5BI,iDACE,yCby5BN,Car5BI,6BACE,iBbu5BN,Cal5BE,mBAGE,uCAAA,CACA,cAAA,CAHA,aAAA,CACA,cAAA,CAGA,sBbo5BJ,Caj5BI,gDACE,+Bbm5BN,Ca/4BI,4BACE,0CAAA,CACA,mBbi5BN,Ca54BE,mBAEE,SAAA,CADA,iBAAA,CAKA,2BAAA,CAHA,8Db+4BJ,Caz4BI,qBAEE,aAAA,CADA,eb44BN,Cav4BI,6BACE,SAAA,CACA,uBby4BN,Ccx9BA,WAEE,0CAAA,CADA,+Bd49BF,Ccx9BE,aALF,WAMI,Yd29BF,CACF,Ccx9BE,kBACE,6BAAA,CAEA,aAAA,CADA,ad29BJ,Ccv9BI,gCACE,Ydy9BN,Ccp9BE,iBAOE,eAAA,CANA,YAAA,CAKA,cAAA,CAGA,mBAAA,CAAA,eAAA,CADA,cAAA,CAGA,uCAAA,CADA,eAAA,CAEA,uBdk9BJ,Cc/8BI,8CACE,Udi9BN,Cc78BI,+BACE,oBd+8BN,CKj0BI,wCSvIE,uBACE,ad28BN,Ccx8BO,yCACC,Yd08BR,CACF,Ccr8BI,iCACE,gBdw8BN,Ccz8BI,iCACE,iBdw8BN,Ccz8BI,uBAEE,gBdu8BN,Ccp8BM,iCACE,eds8BR,Cch8BE,kBACE,WAAA,CAIA,eAAA,CADA,mBAAA,CAFA,6BAAA,CACA,cAAA,CAGA,kBdk8BJ,Cc97BE,mBAEE,YAAA,CADA,adi8BJ,Cc57BE,sBACE,gBAAA,CACA,Ud87BJ,Ccz7BA,gBACE,gDd47BF,Ccz7BE,uBACE,YAAA,CACA,cAAA,CACA,6BAAA,CACA,ad27BJ,Ccv7BE,kCACE,sCdy7BJ,Cct7BI,gFACE,+Bdw7BN,Cch7BA,cAKE,wCAAA,CADA,gBAAA,CADA,iBAAA,CADA,eAAA,CADA,Udu7BF,CK34BI,mCS7CJ,cASI,Udm7BF,CACF,Cc/6BE,yBACE,sCdi7BJ,Cc16BA,WACE,mBAAA,CACA,SAAA,CAEA,cAAA,CADA,qBd86BF,CK15BI,mCSvBJ,WAQI,ed66BF,CACF,Cc16BE,iBACE,oBAAA,CAEA,aAAA,CACA,iBAAA,CAFA,Yd86BJ,Ccz6BI,wBACE,ed26BN,Ccv6BI,qBAGE,iBAAA,CAFA,gBAAA,CACA,mBd06BN,CehlCE,uBAME,kBAAA,CACA,mBAAA,CAHA,gCAAA,CACA,cAAA,CAJA,oBAAA,CAEA,eAAA,CADA,kBAAA,CAMA,gEfmlCJ,Ce7kCI,gCAEE,2CAAA,CACA,uCAAA,CAFA,gCfilCN,Ce3kCI,0DAEE,0CAAA,CACA,sCAAA,CAFA,+Bf+kCN,CexkCE,gCAKE,4Bf6kCJ,CellCE,gEAME,6Bf4kCJ,CellCE,gCAME,4Bf4kCJ,CellCE,sBAIE,6DAAA,CAGA,8BAAA,CAJA,eAAA,CAFA,aAAA,CACA,eAAA,CAMA,sCf0kCJ,CerkCI,wDACE,6CAAA,CACA,8BfukCN,CenkCI,+BACE,UfqkCN,CgBxnCA,WAOE,2CAAA,CAGA,8CACE,CALF,gCAAA,CADA,aAAA,CAHA,MAAA,CADA,eAAA,CACA,OAAA,CACA,KAAA,CACA,ShB+nCF,CgBpnCE,aAfF,WAgBI,YhBunCF,CACF,CgBpnCE,mBAIE,2BAAA,CAHA,iEhBunCJ,CgBhnCE,mBACE,kDACE,CAEF,kEhBgnCJ,CgB1mCE,kBAEE,kBAAA,CADA,YAAA,CAEA,ehB4mCJ,CgBxmCE,mBAKE,kBAAA,CAEA,cAAA,CAHA,YAAA,CAIA,uCAAA,CALA,aAAA,CAFA,iBAAA,CAQA,uBAAA,CAHA,qBAAA,CAJA,ShBinCJ,CgBvmCI,yBACE,UhBymCN,CgBrmCI,iCACE,oBhBumCN,CgBnmCI,uCAEE,uCAAA,CADA,YhBsmCN,CgBjmCI,2BAEE,YAAA,CADA,ahBomCN,CKt/BI,wCW/GA,2BAMI,YhBmmCN,CACF,CgBhmCM,8DAIE,iBAAA,CAHA,aAAA,CAEA,aAAA,CADA,UhBomCR,CKphCI,mCWzEA,iCAII,YhB6lCN,CACF,CgB1lCM,wCACE,YhB4lCR,CgBxlCM,+CACE,oBhB0lCR,CK/hCI,sCWtDA,iCAII,YhBqlCN,CACF,CgBhlCE,kBAEE,YAAA,CACA,cAAA,CAFA,iBAAA,CAIA,8DACE,CAFF,kBhBmlCJ,CgB7kCI,oCAGE,SAAA,CADA,mBAAA,CAKA,6BAAA,CAHA,8DACE,CAJF,UhBmlCN,CgB1kCM,8CACE,8BhB4kCR,CgBvkCI,8BACE,ehBykCN,CgBpkCE,4BAGE,gBhBykCJ,CgB5kCE,4BAGE,iBhBykCJ,CgB5kCE,4BAIE,kBhBwkCJ,CgB5kCE,4BAIE,iBhBwkCJ,CgB5kCE,kBACE,WAAA,CAIA,eAAA,CAHA,aAAA,CAIA,kBhBskCJ,CgBnkCI,4CAGE,SAAA,CADA,mBAAA,CAKA,8BAAA,CAHA,8DACE,CAJF,UhBykCN,CgBhkCM,sDACE,6BhBkkCR,CgB9jCM,8DAGE,SAAA,CADA,mBAAA,CAKA,uBAAA,CAHA,8DACE,CAJF,ShBokCR,CgBzjCI,uCAGE,WAAA,CAFA,iBAAA,CACA,UhB4jCN,CgBtjCE,mBACE,YAAA,CACA,aAAA,CACA,cAAA,CAEA,+CACE,CAFF,kBhByjCJ,CgBnjCI,8DACE,WAAA,CACA,SAAA,CACA,oChBqjCN,CgB5iCI,yBACE,QhB8iCN,CgBziCE,mBACE,YhB2iCJ,CKxmCI,mCW4DF,6BAQI,gBhB2iCJ,CgBnjCA,6BAQI,iBhB2iCJ,CgBnjCA,mBAKI,aAAA,CAEA,iBAAA,CADA,ahB6iCJ,CACF,CKhnCI,sCW4DF,6BAaI,kBhB2iCJ,CgBxjCA,6BAaI,mBhB2iCJ,CACF,CD3xCA,SAGE,uCAAA,CAFA,eAAA,CACA,eC+xCF,CD3xCE,eACE,mBAAA,CACA,cAAA,CAGA,eAAA,CADA,QAAA,CADA,SC+xCJ,CDzxCE,sCAEE,WAAA,CADA,iBAAA,CAAA,kBC4xCJ,CDvxCE,eACE,+BCyxCJ,CDtxCI,0CACE,+BCwxCN,CDlxCA,UAKE,wBkBaa,ClBZb,oBAAA,CAFA,UAAA,CAHA,oBAAA,CAEA,eAAA,CADA,0BAAA,CAAA,2BCyxCF,CkB3zCA,MACE,0MAAA,CACA,gMAAA,CACA,yNlB8zCF,CkBxzCA,QACE,eAAA,CACA,elB2zCF,CkBxzCE,eAKE,uCAAA,CAJA,aAAA,CAGA,eAAA,CADA,eAAA,CADA,eAAA,CAIA,sBlB0zCJ,CkBvzCI,+BACE,YlByzCN,CkBtzCM,mCAEE,WAAA,CADA,UlByzCR,CkBjzCQ,sFAME,iBAAA,CALA,aAAA,CAGA,aAAA,CADA,cAAA,CAEA,kBAAA,CAHA,UlBuzCV,CkB5yCE,cAGE,eAAA,CADA,QAAA,CADA,SlBgzCJ,CkB1yCE,cACE,elB4yCJ,CkBzyCI,sCACE,elB2yCN,CkB5yCI,sCACE,clB2yCN,CkBtyCE,cAEE,sBAAA,CADA,YAAA,CAEA,iBAAA,CAEA,uBAAA,CADA,sBlByyCJ,CkBryCI,sBACE,uClBuyCN,CkBhyCM,6EAEE,+BlBkyCR,CkB7xCI,2BAIE,iBlB4xCN,CkBxxCI,kCACE,gBlB0xCN,CkBtxCI,kBAGE,iBAAA,CAFA,aAAA,CACA,YlByxCN,CkBrxCM,8BACE,iBlBuxCR,CkBxxCM,8BACE,kBlBuxCR,CkBlxCI,wFACE,+BAAA,CACA,clBoxCN,CkBhxCI,4BACE,uCAAA,CACA,oBlBkxCN,CkB9wCI,0CACE,YlBgxCN,CkB7wCM,yDAKE,6BAAA,CAJA,aAAA,CAEA,WAAA,CACA,qCAAA,CAAA,6BAAA,CAFA,UlBkxCR,CkB3wCM,kDACE,YlB6wCR,CkBvwCE,iCACE,YlBywCJ,CkBtwCI,6CACE,WlBwwCN,CkBnwCE,cACE,alBqwCJ,CkBjwCE,gBACE,YlBmwCJ,CK5uCI,wCahBA,0CASE,2CAAA,CAHA,YAAA,CACA,qBAAA,CACA,WAAA,CALA,MAAA,CADA,iBAAA,CACA,OAAA,CACA,KAAA,CACA,SlBkwCJ,CkBvvCI,+DACE,eAAA,CACA,elByvCN,CkBrvCI,gCAQE,qDAAA,CAHA,uCAAA,CAEA,cAAA,CALA,aAAA,CAEA,kBAAA,CADA,wBAAA,CAFA,iBAAA,CAKA,kBlByvCN,CkBpvCM,wDAGE,UlB0vCR,CkB7vCM,wDAGE,WlB0vCR,CkB7vCM,8CAIE,aAAA,CAEA,aAAA,CACA,YAAA,CANA,iBAAA,CACA,SAAA,CAGA,YlBwvCR,CkBnvCQ,oDAKE,6BAAA,CADA,UAAA,CAHA,aAAA,CAEA,WAAA,CAGA,2CAAA,CAAA,mCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAPA,UlB4vCV,CkBhvCM,8CAGE,2CAAA,CACA,gEACE,CAJF,eAAA,CAKA,4BAAA,CAJA,kBlBqvCR,CkB9uCQ,2DACE,YlBgvCV,CkB3uCM,8CAGE,2CAAA,CADA,gCAAA,CADA,elB+uCR,CkBzuCM,yCAIE,aAAA,CAFA,UAAA,CAIA,YAAA,CADA,aAAA,CAJA,iBAAA,CACA,WAAA,CACA,SlB8uCR,CkBtuCI,+BACE,MlBwuCN,CkBpuCI,+BAEE,4DAAA,CADA,SlBuuCN,CkBnuCM,qDACE,+BlBquCR,CkBluCQ,sHACE,+BlBouCV,CkB9tCI,+BAEE,YAAA,CADA,mBlBiuCN,CkB7tCM,mCACE,elB+tCR,CkB3tCM,6CACE,SlB6tCR,CkBztCM,uDAGE,mBlB4tCR,CkB/tCM,uDAGE,kBlB4tCR,CkB/tCM,6CAIE,gBAAA,CAFA,aAAA,CADA,YlB8tCR,CkBxtCQ,mDAKE,6BAAA,CADA,UAAA,CAHA,aAAA,CAEA,WAAA,CAGA,2CAAA,CAAA,mCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAPA,UlBiuCV,CkBjtCM,+CACE,mBlBmtCR,CkB3sCM,4CAEE,wBAAA,CADA,elB8sCR,CkB1sCQ,oEACE,mBlB4sCV,CkB7sCQ,oEACE,oBlB4sCV,CkBxsCQ,4EACE,iBlB0sCV,CkB3sCQ,4EACE,kBlB0sCV,CkBtsCQ,oFACE,mBlBwsCV,CkBzsCQ,oFACE,oBlBwsCV,CkBpsCQ,4FACE,mBlBssCV,CkBvsCQ,4FACE,oBlBssCV,CkB/rCE,mBACE,wBlBisCJ,CkB7rCE,wBACE,YAAA,CACA,SAAA,CAIA,0BAAA,CAHA,oElBgsCJ,CkB1rCI,kCACE,2BlB4rCN,CkBvrCE,gCACE,SAAA,CAIA,uBAAA,CAHA,qElB0rCJ,CkBprCI,8CAEE,kCAAA,CAAA,0BlBqrCN,CACF,CKh4CI,wCamNA,0CACE,YlBgrCJ,CkB7qCI,yDACE,UlB+qCN,CkB3qCI,wDACE,YlB6qCN,CkBzqCI,kDACE,YlB2qCN,CkBtqCE,gBAIE,iDAAA,CADA,gCAAA,CAFA,aAAA,CACA,elB0qCJ,CACF,CK77CM,6Da4RF,6CACE,YlBoqCJ,CkBjqCI,4DACE,UlBmqCN,CkB/pCI,2DACE,YlBiqCN,CkB7pCI,qDACE,YlB+pCN,CACF,CKr7CI,mCa8RA,kCAME,qCAAA,CACA,qDAAA,CANA,eAAA,CACA,KAAA,CAGA,SlB0pCJ,CkBrpCI,6CACE,uBlBupCN,CkBnpCI,gDACE,YlBqpCN,CACF,CKp8CI,sCa7JJ,QAkdI,oDlBmpCF,CkBhpCE,gCAME,qCAAA,CACA,qDAAA,CANA,eAAA,CACA,KAAA,CAGA,SlBkpCJ,CkB7oCI,8CACE,uBlB+oCN,CkBroCE,sEACE,YlB0oCJ,CkBtoCE,sEACE,alBwoCJ,CkBpoCE,6CACE,YlBsoCJ,CkBloCE,uBACE,aAAA,CACA,elBooCJ,CkBjoCI,kCACE,elBmoCN,CkB/nCI,qCACE,elBioCN,CkB9nCM,0CACE,uClBgoCR,CkB5nCM,6DACE,mBlB8nCR,CkB1nCM,mDACE,YlB4nCR,CkBvnCI,+BACE,alBynCN,CkBtnCM,2DACE,SlBwnCR,CkBlnCE,cAGE,kBAAA,CADA,YAAA,CAEA,gCAAA,CAHA,WlBunCJ,CkBjnCI,oBACE,uDlBmnCN,CkB/mCI,oBAME,6BAAA,CACA,kBAAA,CAFA,UAAA,CAJA,oBAAA,CAEA,WAAA,CAMA,2CAAA,CAAA,mCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAJA,yBAAA,CAJA,qBAAA,CAFA,UlB2nCN,CkB9mCM,8BACE,wBlBgnCR,CkB5mCM,sKAEE,uBlB6mCR,CkB9lCI,+HACE,YlBomCN,CkBjmCM,oDACE,aAAA,CACA,SlBmmCR,CkBhmCQ,kEAOE,qCAAA,CACA,qDAAA,CAFA,eAAA,CADA,YAAA,CADA,eAAA,CAHA,eAAA,CACA,KAAA,CACA,SlBumCV,CkB/lCU,0FACE,mBlBimCZ,CkB7lCU,gFACE,YlB+lCZ,CkBzlCM,kDACE,uClB2lCR,CkBrlCI,2CACE,alBulCN,CkBplCM,iFACE,mBlBslCR,CkBvlCM,iFACE,kBlBslCR,CkB7kCI,mFACE,elB+kCN,CkB5kCM,iGACE,SlB8kCR,CkBzkCI,qFAGE,mDlB2kCN,CkB9kCI,qFAGE,oDlB2kCN,CkB9kCI,2EACE,aAAA,CACA,oBlB4kCN,CkBxkCM,0FACE,YlB0kCR,CACF,CmB7uDA,eAKE,eAAA,CACA,eAAA,CAJA,SnBovDF,CmB7uDE,gCANA,kBAAA,CAFA,YAAA,CAGA,sBnB2vDF,CmBtvDE,iBAOE,mBAAA,CAFA,aAAA,CADA,gBAAA,CAEA,iBnBgvDJ,CmB3uDE,wBAEE,qDAAA,CADA,uCnB8uDJ,CmBzuDE,qBACE,6CnB2uDJ,CmBtuDI,sDAEE,uDAAA,CADA,+BnByuDN,CmBruDM,8DACE,+BnBuuDR,CmBluDI,mCACE,uCAAA,CACA,oBnBouDN,CmBhuDI,yBAKE,iBAAA,CADA,yCAAA,CAHA,aAAA,CAEA,eAAA,CADA,YnBquDN,CoBrxDE,eAGE,+DAAA,CADA,oBAAA,CADA,qBpB0xDJ,CKrmDI,wCetLF,eAOI,YpBwxDJ,CACF,CoBlxDM,6BACE,oBpBoxDR,CoB9wDE,kBACE,YAAA,CACA,qBAAA,CACA,SAAA,CACA,cpBgxDJ,CoBzwDI,0BACE,sBpB2wDN,CoBxwDM,gEACE,+BpB0wDR,CoBpwDE,kBACE,oBpBswDJ,CoBnwDI,mCAGE,kBAAA,CAFA,YAAA,CACA,SAAA,CAEA,iBpBqwDN,CoBjwDI,oCAIE,kBAAA,CAHA,mBAAA,CACA,kBAAA,CACA,SAAA,CAGA,QAAA,CADA,iBpBowDN,CoB/vDI,0DACE,kBpBiwDN,CoBlwDI,0DACE,iBpBiwDN,CoB7vDI,iDACE,uBAAA,CAEA,YpB8vDN,CoBzvDE,uEAEE,YpB2vDJ,CoBpvDA,YAGE,kBAAA,CAFA,YAAA,CAIA,eAAA,CAHA,SAAA,CAIA,eAAA,CAFA,UpByvDF,CoBpvDE,yBACE,WpBsvDJ,CoB/uDA,kBACE,YpBkvDF,CKrqDI,wCe9EJ,kBAKI,wBpBkvDF,CACF,CoB/uDE,qCACE,WpBivDJ,CKhsDI,sCelDF,+CAKI,kBpBivDJ,CoBtvDA,+CAKI,mBpBivDJ,CACF,CKlrDI,wCe1DJ,6BAII,SpB6uDF,CACF,CqBj3DA,MACE,igBrBo3DF,CqB92DA,WACE,iBrBi3DF,CKntDI,mCgB/JJ,WAKI,erBi3DF,CACF,CqB92DE,kBACE,YrBg3DJ,CqB52DE,oBAEE,SAAA,CADA,SrB+2DJ,CK5sDI,wCgBpKF,8BAkBI,YrB42DJ,CqB93DA,8BAkBI,arB42DJ,CqB93DA,oBAYI,2CAAA,CACA,kBAAA,CAJA,WAAA,CACA,eAAA,CACA,mBAAA,CALA,iBAAA,CACA,SAAA,CAUA,uBAAA,CAHA,4CACE,CAPF,UrBs3DJ,CqBz2DI,+DACE,SAAA,CACA,oCrB22DN,CACF,CKlvDI,mCgBjJF,8BAyCI,MrBq2DJ,CqB94DA,8BAyCI,OrBq2DJ,CqB94DA,oBAoCI,0BAAA,CADA,cAAA,CADA,QAAA,CAHA,cAAA,CACA,KAAA,CAKA,sDACE,CALF,OrB62DJ,CqBl2DI,+DAME,YAAA,CACA,SAAA,CACA,4CACE,CARF,UrBu2DN,CACF,CKjvDI,wCgBxGA,+DAII,mBrBy1DN,CACF,CK/xDM,6DgB/DF,+DASI,mBrBy1DN,CACF,CKpyDM,6DgB/DF,+DAcI,mBrBy1DN,CACF,CqBp1DE,kBAEE,kCAAA,CAAA,0BrBq1DJ,CKnwDI,wCgBpFF,4BAmBI,MrBi1DJ,CqBp2DA,4BAmBI,OrBi1DJ,CqBp2DA,kBAUI,QAAA,CAEA,SAAA,CADA,eAAA,CALA,cAAA,CACA,KAAA,CAWA,wBAAA,CALA,qGACE,CALF,OAAA,CADA,SrB41DJ,CqB90DI,4BACE,yBrBg1DN,CqB50DI,6DAEE,WAAA,CACA,SAAA,CAMA,uBAAA,CALA,sGACE,CAJF,UrBk1DN,CACF,CK9yDI,mCgBjEF,4BA2CI,WrB40DJ,CqBv3DA,4BA2CI,UrB40DJ,CqBv3DA,kBA6CI,eAAA,CAHA,iBAAA,CAIA,8CAAA,CAFA,arB20DJ,CACF,CK70DM,6DgBOF,6DAII,arBs0DN,CACF,CK5zDI,sCgBfA,6DASI,arBs0DN,CACF,CqBj0DE,iBAIE,2CAAA,CACA,0BAAA,CAFA,aAAA,CAFA,iBAAA,CAKA,2CACE,CALF,SrBu0DJ,CKz0DI,mCgBAF,iBAaI,0BAAA,CACA,mBAAA,CAFA,arBm0DJ,CqB9zDI,uBACE,0BrBg0DN,CACF,CqB5zDI,4DAEE,2CAAA,CACA,6BAAA,CACA,8BAAA,CAHA,gCrBi0DN,CqBzzDE,4BAKE,mBAAA,CAAA,oBrB8zDJ,CqBn0DE,4BAKE,mBAAA,CAAA,oBrB8zDJ,CqBn0DE,kBAQE,gBAAA,CAFA,eAAA,CAFA,WAAA,CAHA,iBAAA,CAMA,sBAAA,CAJA,UAAA,CADA,SrBi0DJ,CqBxzDI,+BACE,qBrB0zDN,CqBtzDI,kEAEE,uCrBuzDN,CqBnzDI,6BACE,YrBqzDN,CKz1DI,wCgBaF,kBA8BI,eAAA,CADA,aAAA,CADA,UrBszDJ,CACF,CKn3DI,mCgBgCF,4BAmCI,mBrBszDJ,CqBz1DA,4BAmCI,oBrBszDJ,CqBz1DA,kBAqCI,aAAA,CADA,erBqzDJ,CqBjzDI,+BACE,uCrBmzDN,CqB/yDI,mCACE,gCrBizDN,CqB7yDI,6DACE,kBrB+yDN,CqB5yDM,8EACE,uCrB8yDR,CqB1yDM,0EACE,WrB4yDR,CACF,CqBtyDE,iBAIE,cAAA,CAHA,oBAAA,CAEA,aAAA,CAEA,kCACE,CAJF,YrB2yDJ,CqBnyDI,uBACE,UrBqyDN,CqBjyDI,yCAGE,UrBoyDN,CqBvyDI,yCAGE,WrBoyDN,CqBvyDI,+BACE,iBAAA,CACA,SAAA,CAEA,SrBmyDN,CqBhyDM,6CACE,oBrBkyDR,CKz4DI,wCgB+FA,yCAcI,UrBiyDN,CqB/yDE,yCAcI,WrBiyDN,CqB/yDE,+BAaI,SrBkyDN,CqB9xDM,+CACE,YrBgyDR,CACF,CKr6DI,mCgBkHA,+BAwBI,mBrB+xDN,CqB5xDM,8CACE,YrB8xDR,CACF,CqBxxDE,8BAGE,WrB4xDJ,CqB/xDE,8BAGE,UrB4xDJ,CqB/xDE,oBAKE,mBAAA,CAJA,iBAAA,CACA,SAAA,CAEA,SrB2xDJ,CKj6DI,wCgBkIF,8BAUI,WrB0xDJ,CqBpyDA,8BAUI,UrB0xDJ,CqBpyDA,oBASI,SrB2xDJ,CACF,CqBvxDI,uCACE,iBrB6xDN,CqB9xDI,uCACE,kBrB6xDN,CqB9xDI,6BAEE,uCAAA,CACA,SAAA,CAIA,oBAAA,CAHA,+DrB0xDN,CqBpxDM,iDAEE,uCAAA,CADA,YrBuxDR,CqBlxDM,gGAGE,SAAA,CADA,mBAAA,CAEA,kBrBmxDR,CqBhxDQ,sGACE,UrBkxDV,CqB3wDE,8BAOE,mBAAA,CAAA,oBrBkxDJ,CqBzxDE,8BAOE,mBAAA,CAAA,oBrBkxDJ,CqBzxDE,oBAIE,kBAAA,CAKA,yCAAA,CANA,YAAA,CAKA,eAAA,CAFA,WAAA,CAKA,SAAA,CAVA,iBAAA,CACA,KAAA,CAUA,uBAAA,CAFA,kBAAA,CALA,UrBoxDJ,CK39DI,mCgBkMF,8BAgBI,mBrB8wDJ,CqB9xDA,8BAgBI,oBrB8wDJ,CqB9xDA,oBAiBI,erB6wDJ,CACF,CqB1wDI,+DACE,SAAA,CACA,0BrB4wDN,CqBvwDE,6BAKE,+BrB0wDJ,CqB/wDE,0DAME,gCrBywDJ,CqB/wDE,6BAME,+BrBywDJ,CqB/wDE,mBAIE,eAAA,CAHA,iBAAA,CAEA,UAAA,CADA,SrB6wDJ,CK19DI,wCgB2MF,mBAWI,QAAA,CADA,UrB0wDJ,CACF,CKn/DI,mCgB8NF,mBAiBI,SAAA,CADA,UAAA,CAEA,sBrBywDJ,CqBtwDI,8DACE,8BAAA,CACA,SrBwwDN,CACF,CqBnwDE,uBASE,kCAAA,CAAA,0BAAA,CAFA,2CAAA,CANA,WAAA,CACA,eAAA,CAIA,kBrBowDJ,CqB9vDI,iEAZF,uBAaI,uBrBiwDJ,CACF,CKhiEM,6DgBiRJ,uBAkBI,arBiwDJ,CACF,CK/gEI,sCgB2PF,uBAuBI,arBiwDJ,CACF,CKphEI,mCgB2PF,uBA4BI,YAAA,CAEA,yDAAA,CADA,oBrBkwDJ,CqB9vDI,kEACE,erBgwDN,CqB5vDI,6BACE,+CrB8vDN,CqB1vDI,0CAEE,YAAA,CADA,WrB6vDN,CqBxvDI,gDACE,oDrB0vDN,CqBvvDM,sDACE,0CrByvDR,CACF,CqBlvDA,kBACE,gCAAA,CACA,qBrBqvDF,CqBlvDE,wBAKE,qDAAA,CADA,uCAAA,CAFA,gBAAA,CACA,kBAAA,CAFA,eAAA,CAKA,uBrBovDJ,CKxjEI,mCgB8TF,kCAUI,mBrBovDJ,CqB9vDA,kCAUI,oBrBovDJ,CACF,CqBhvDE,wBAGE,eAAA,CADA,QAAA,CADA,SAAA,CAIA,wBAAA,CAAA,gBrBivDJ,CqB7uDE,wBACE,yDrB+uDJ,CqB5uDI,oCACE,erB8uDN,CqBzuDE,wBACE,aAAA,CACA,YAAA,CAEA,uBAAA,CADA,gCrB4uDJ,CqBxuDI,4DACE,uDrB0uDN,CqBtuDI,gDACE,mBrBwuDN,CqBnuDE,gCAKE,cAAA,CADA,aAAA,CAEA,YAAA,CALA,eAAA,CAMA,uBAAA,CALA,KAAA,CACA,SrByuDJ,CqBluDI,wCACE,YrBouDN,CqB/tDI,wDACE,YrBiuDN,CqB7tDI,oCAGE,+BAAA,CADA,gBAAA,CADA,mBAAA,CAGA,2CrB+tDN,CK1mEI,mCgBuYA,8CAUI,mBrB6tDN,CqBvuDE,8CAUI,oBrB6tDN,CACF,CqBztDI,oFAEE,uDAAA,CADA,+BrB4tDN,CqBttDE,sCACE,2CrBwtDJ,CqBntDE,2BAGE,eAAA,CADA,eAAA,CADA,iBrButDJ,CK3nEI,mCgBmaF,qCAOI,mBrBqtDJ,CqB5tDA,qCAOI,oBrBqtDJ,CACF,CqBjtDE,kCAEE,MrButDJ,CqBztDE,kCAEE,OrButDJ,CqBztDE,wBAME,uCAAA,CAFA,aAAA,CACA,YAAA,CAJA,iBAAA,CAEA,YrBstDJ,CKrnEI,wCgB4ZF,wBAUI,YrBmtDJ,CACF,CqBhtDI,8BAKE,6BAAA,CADA,UAAA,CAHA,oBAAA,CAEA,WAAA,CAGA,+CAAA,CAAA,uCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAPA,UrBytDN,CqB/sDM,wCACE,oBrBitDR,CqB3sDE,8BAGE,uCAAA,CAFA,gBAAA,CACA,erB8sDJ,CqB1sDI,iCAKE,gCAAA,CAHA,eAAA,CACA,eAAA,CACA,eAAA,CAHA,erBgtDN,CqBzsDM,sCACE,oBrB2sDR,CqBtsDI,iCAKE,gCAAA,CAHA,gBAAA,CACA,eAAA,CACA,eAAA,CAHA,arB4sDN,CqBrsDM,sCACE,oBrBusDR,CqBjsDE,yBAKE,gCAAA,CAJA,aAAA,CAEA,gBAAA,CACA,iBAAA,CAFA,arBssDJ,CqB/rDE,uBAGE,wBAAA,CAFA,+BAAA,CACA,yBrBksDJ,CsBt2EA,WACE,iBAAA,CACA,StBy2EF,CsBt2EE,kBAOE,2CAAA,CACA,mBAAA,CACA,8BAAA,CAHA,gCAAA,CAHA,QAAA,CAEA,gBAAA,CADA,YAAA,CAMA,SAAA,CATA,iBAAA,CACA,sBAAA,CAaA,mCAAA,CAJA,oEtBy2EJ,CsBl2EI,6EACE,gBAAA,CACA,SAAA,CAKA,+BAAA,CAJA,8EtBq2EN,CsB71EI,wBAWE,+BAAA,CAAA,8CAAA,CAFA,6BAAA,CAAA,8BAAA,CACA,YAAA,CAFA,UAAA,CAHA,QAAA,CAFA,QAAA,CAIA,kBAAA,CADA,iBAAA,CALA,iBAAA,CACA,KAAA,CAEA,OtBs2EN,CsB11EE,iBAOE,mBAAA,CAFA,eAAA,CACA,oBAAA,CAHA,QAAA,CAFA,kBAAA,CAGA,aAAA,CAFA,StBi2EJ,CsBx1EE,iBACE,kBtB01EJ,CsBt1EE,2BAGE,kBAAA,CAAA,oBtB41EJ,CsB/1EE,2BAGE,mBAAA,CAAA,mBtB41EJ,CsB/1EE,iBAIE,cAAA,CAHA,aAAA,CAIA,YAAA,CAIA,uBAAA,CAHA,2CACE,CALF,UtB61EJ,CsBn1EI,8CACE,+BtBq1EN,CsBj1EI,uBACE,qDtBm1EN,CuBv6EA,YAIE,qBAAA,CADA,aAAA,CAGA,gBAAA,CALA,eAAA,CACA,UAAA,CAGA,avB26EF,CuBv6EE,aATF,YAUI,YvB06EF,CACF,CK5vEI,wCkB3KF,+BAeI,avBq6EJ,CuBp7EA,+BAeI,cvBq6EJ,CuBp7EA,qBAUI,2CAAA,CAHA,aAAA,CAEA,WAAA,CALA,cAAA,CACA,KAAA,CASA,uBAAA,CAHA,iEACE,CAJF,aAAA,CAFA,SvB86EJ,CuBl6EI,mEACE,8BAAA,CACA,6BvBo6EN,CuBj6EM,6EACE,8BvBm6ER,CuB95EI,6CAEE,QAAA,CAAA,MAAA,CACA,QAAA,CAEA,eAAA,CAJA,iBAAA,CACA,OAAA,CAEA,qBAAA,CAFA,KvBm6EN,CACF,CK3yEI,sCkBtKJ,YAuDI,QvB85EF,CuB35EE,mBACE,WvB65EJ,CuBz5EE,6CACE,UvB25EJ,CACF,CuBv5EE,uBACE,YAAA,CACA,OvBy5EJ,CK1zEI,mCkBjGF,uBAMI,QvBy5EJ,CuBt5EI,8BACE,WvBw5EN,CuBp5EI,qCACE,avBs5EN,CuBl5EI,+CACE,kBvBo5EN,CACF,CuB/4EE,wBAUE,uBAAA,CANA,kCAAA,CAAA,0BAAA,CAHA,cAAA,CACA,eAAA,CASA,yDAAA,CAFA,oBvB84EJ,CuBz4EI,2CAEE,YAAA,CADA,WvB44EN,CuBv4EI,mEACE,+CvBy4EN,CuBt4EM,qHACE,oDvBw4ER,CuBr4EQ,iIACE,0CvBu4EV,CuBx3EE,wCAGE,wBACE,qBvBw3EJ,CuBp3EE,6BACE,kCvBs3EJ,CuBv3EE,6BACE,iCvBs3EJ,CACF,CKl1EI,wCkB5BF,YAME,0BAAA,CADA,QAAA,CAEA,SAAA,CANA,cAAA,CACA,KAAA,CAMA,sDACE,CALF,OAAA,CADA,SvBu3EF,CuB52EE,4CAEE,WAAA,CACA,SAAA,CACA,4CACE,CAJF,UvBi3EJ,CACF,CwB9hFA,iBACE,GACE,QxBgiFF,CwB7hFA,GACE,axB+hFF,CACF,CwB3hFA,gBACE,GACE,SAAA,CACA,0BxB6hFF,CwB1hFA,IACE,SxB4hFF,CwBzhFA,GACE,SAAA,CACA,uBxB2hFF,CACF,CwBnhFA,MACE,+eAAA,CACA,ygBAAA,CACA,mmBAAA,CACA,sfxBqhFF,CwB/gFA,WAOE,kCAAA,CAAA,0BAAA,CANA,aAAA,CACA,gBAAA,CACA,eAAA,CAEA,uCAAA,CAGA,uBAAA,CAJA,kBxBqhFF,CwB9gFE,iBACE,UxBghFJ,CwB5gFE,iBACE,oBAAA,CAEA,aAAA,CACA,qBAAA,CAFA,UxBghFJ,CwB3gFI,+BACE,iBxB8gFN,CwB/gFI,+BACE,kBxB8gFN,CwB/gFI,qBAEE,gBxB6gFN,CwBzgFI,kDACE,iBxB4gFN,CwB7gFI,kDACE,kBxB4gFN,CwB7gFI,kDAEE,iBxB2gFN,CwB7gFI,kDAEE,kBxB2gFN,CwBtgFE,iCAGE,iBxB2gFJ,CwB9gFE,iCAGE,kBxB2gFJ,CwB9gFE,uBACE,oBAAA,CACA,6BAAA,CAEA,eAAA,CACA,sBAAA,CACA,qBxBwgFJ,CwBpgFE,kBACE,YAAA,CAMA,gBAAA,CALA,SAAA,CAMA,oBAAA,CAHA,gBAAA,CAIA,WAAA,CAHA,eAAA,CAFA,SAAA,CADA,UxB4gFJ,CwBngFI,iDACE,4BxBqgFN,CwBhgFE,iBACE,eAAA,CACA,sBxBkgFJ,CwB//EI,gDACE,2BxBigFN,CwB7/EI,kCAIE,kBxBqgFN,CwBzgFI,kCAIE,iBxBqgFN,CwBzgFI,wBAOE,6BAAA,CADA,UAAA,CALA,oBAAA,CAEA,YAAA,CAKA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CALA,uBAAA,CAHA,WxBugFN,CwB3/EI,iCACE,axB6/EN,CwBz/EI,iCACE,gDAAA,CAAA,wCxB2/EN,CwBv/EI,+BACE,8CAAA,CAAA,sCxBy/EN,CwBr/EI,+BACE,8CAAA,CAAA,sCxBu/EN,CwBn/EI,sCACE,qDAAA,CAAA,6CxBq/EN,CyB5oFA,MACE,mSAAA,CACA,oVAAA,CACA,mOAAA,CACA,qZzB+oFF,CyBzoFA,WACE,iBzB4oFF,CyBzoFE,iBAME,kDAAA,CADA,UAAA,CAJA,oBAAA,CAEA,cAAA,CAIA,mCAAA,CAAA,2BAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CANA,0BAAA,CAFA,azBmpFJ,CyBvoFE,uBACE,6BzByoFJ,CyBroFE,sBACE,wCAAA,CAAA,gCzBuoFJ,CyBnoFE,6BACE,+CAAA,CAAA,uCzBqoFJ,CyBjoFE,4BACE,8CAAA,CAAA,sCzBmoFJ,C0B/qFA,SASE,2CAAA,CADA,gCAAA,CAJA,aAAA,CAGA,eAAA,CADA,aAAA,CADA,UAAA,CAFA,S1BsrFF,C0B7qFE,aAZF,SAaI,Y1BgrFF,CACF,CKrgFI,wCqBzLJ,SAkBI,Y1BgrFF,CACF,C0B7qFE,iBACE,mB1B+qFJ,C0B3qFE,yBAIE,iB1BkrFJ,C0BtrFE,yBAIE,kB1BkrFJ,C0BtrFE,eAQE,eAAA,CAPA,YAAA,CAMA,eAAA,CAJA,QAAA,CAEA,aAAA,CAHA,SAAA,CAWA,oBAAA,CAPA,kB1BgrFJ,C0BtqFI,kCACE,Y1BwqFN,C0BnqFE,eACE,aAAA,CACA,kBAAA,CAAA,mB1BqqFJ,C0BlqFI,sCACE,aAAA,CACA,S1BoqFN,C0B9pFE,eAOE,kCAAA,CAAA,0BAAA,CANA,YAAA,CAEA,eAAA,CADA,gBAAA,CAMA,UAAA,CAJA,uCAAA,CACA,oBAAA,CAIA,8D1B+pFJ,C0B1pFI,0CACE,aAAA,CACA,S1B4pFN,C0BxpFI,6BAEE,kB1B2pFN,C0B7pFI,6BAEE,iB1B2pFN,C0B7pFI,mBAGE,iBAAA,CAFA,Y1B4pFN,C0BrpFM,2CACE,qB1BupFR,C0BxpFM,2CACE,qB1B0pFR,C0B3pFM,2CACE,qB1B6pFR,C0B9pFM,2CACE,qB1BgqFR,C0BjqFM,2CACE,oB1BmqFR,C0BpqFM,2CACE,qB1BsqFR,C0BvqFM,2CACE,qB1ByqFR,C0B1qFM,2CACE,qB1B4qFR,C0B7qFM,4CACE,qB1B+qFR,C0BhrFM,4CACE,oB1BkrFR,C0BnrFM,4CACE,qB1BqrFR,C0BtrFM,4CACE,qB1BwrFR,C0BzrFM,4CACE,qB1B2rFR,C0B5rFM,4CACE,qB1B8rFR,C0B/rFM,4CACE,oB1BisFR,C0B3rFI,gCACE,SAAA,CAIA,yBAAA,CAHA,wC1B8rFN,C2BjyFA,MACE,wS3BoyFF,C2B3xFE,qBAEE,mBAAA,CADA,kB3B+xFJ,C2B1xFE,8BAGE,iB3BoyFJ,C2BvyFE,8BAGE,gB3BoyFJ,C2BvyFE,oBAUE,+CAAA,CACA,oBAAA,CAVA,oBAAA,CAIA,gBAAA,CACA,eAAA,CAEA,qBAAA,CADA,eAAA,CAHA,kBAAA,CAFA,uBAAA,CAOA,qB3B8xFJ,C2BzxFI,0BAGE,uCAAA,CAFA,aAAA,CACA,YAAA,CAEA,6C3B2xFN,C2BtxFM,gEAEE,0CAAA,CADA,+B3ByxFR,C2BnxFI,yBACE,uB3BqxFN,C2B7wFI,gCAOE,oDAAA,CADA,UAAA,CALA,oBAAA,CAEA,YAAA,CACA,iBAAA,CAKA,qCAAA,CAAA,6BAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAJA,iCAAA,CAHA,0BAAA,CAHA,W3ByxFN,C2B3wFI,wFACE,0C3B6wFN,C4Br1FA,iBACE,GACE,oB5Bw1FF,C4Br1FA,IACE,kB5Bu1FF,C4Bp1FA,GACE,oB5Bs1FF,CACF,C4B90FA,MACE,0NAAA,CACA,uPAAA,CACA,wB5Bg1FF,C4B10FA,YA6BE,kCAAA,CAAA,0BAAA,CAVA,2CAAA,CACA,mBAAA,CACA,8BAAA,CAHA,gCAAA,CADA,sCAAA,CAdA,+IACE,CAYF,8BAAA,CAMA,SAAA,CArBA,iBAAA,CACA,uBAAA,CAyBA,4BAAA,CAJA,uDACE,CATF,6BAAA,CADA,S5Bq1FF,C4Bn0FE,oBAEE,SAAA,CAKA,uBAAA,CAJA,2EACE,CAHF,S5Bw0FJ,C4B9zFE,8CACE,sC5Bg0FJ,C4B5zFE,mBAEE,gBAAA,CADA,a5B+zFJ,C4B3zFI,2CACE,Y5B6zFN,C4BzzFI,0CACE,e5B2zFN,C4BnzFA,eACE,eAAA,CAGA,YAAA,CADA,0BAAA,CADA,kB5BwzFF,C4BnzFE,yBACE,a5BqzFJ,C4BjzFE,oBACE,sCAAA,CACA,iB5BmzFJ,C4B/yFE,6BACE,oBAAA,CAGA,gB5B+yFJ,C4B3yFE,sBAoBE,mBAAA,CAdA,cAAA,CAHA,oBAAA,CACA,gBAAA,CAAA,iBAAA,CAIA,YAAA,CAWA,eAAA,CAlBA,iBAAA,CAMA,wBAAA,CAAA,gBAAA,CAFA,uBAAA,CAHA,S5BqzFJ,C4B3yFI,qCACE,uB5B6yFN,C4BnyFI,cAvBF,sBAwBI,W5BsyFJ,C4BnyFI,wCACE,2B5BqyFN,C4BjyFI,6BAOE,qCAAA,CACA,+CAAA,CAAA,uC5BsyFN,C4B5xFI,yDAZE,UAAA,CADA,YAAA,CAIA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAVA,iBAAA,CACA,SAAA,CAEA,WAAA,CADA,U5B0zFN,C4B3yFI,4BAOE,oDAAA,CAMA,4CAAA,CAAA,oCAAA,CADA,uBAAA,CAJA,+C5BmyFN,C4BxxFM,gDACE,uB5B0xFR,C4BtxFM,mFACE,0C5BwxFR,CACF,C4BnxFI,0CAGE,2BAAA,CADA,uBAAA,CADA,S5BuxFN,C4BjxFI,8CACE,oB5BmxFN,C4BhxFM,aAJF,8CASI,8CAAA,CACA,iBAAA,CAHA,gCAAA,CADA,eAAA,CADA,cAAA,CAGA,kB5BqxFN,C4BhxFM,oDACE,mC5BkxFR,CACF,C4BtwFE,gCAEE,iBAAA,CADA,e5B0wFJ,C4BtwFI,mCACE,iB5BwwFN,C4BrwFM,oDAGE,a5BmxFR,C4BtxFM,oDAGE,c5BmxFR,C4BtxFM,0CAcE,8CAAA,CACA,iBAAA,CALA,gCAAA,CAEA,oBAAA,CACA,qBAAA,CANA,iBAAA,CACA,eAAA,CAHA,UAAA,CAIA,gBAAA,CALA,aAAA,CAEA,cAAA,CALA,iBAAA,CAUA,iBAAA,CATA,S5BoxFR,C6B3gGA,kBAME,e7BuhGF,C6B7hGA,kBAME,gB7BuhGF,C6B7hGA,QAUE,2CAAA,CACA,oBAAA,CAEA,8BAAA,CALA,uCAAA,CACA,cAAA,CALA,aAAA,CAGA,eAAA,CAKA,YAAA,CAPA,mBAAA,CAJA,cAAA,CACA,UAAA,CAiBA,yBAAA,CALA,mGACE,CAZF,S7B0hGF,C6BvgGE,aAtBF,QAuBI,Y7B0gGF,CACF,C6BvgGE,kBACE,wB7BygGJ,C6BrgGE,gBAEE,SAAA,CADA,mBAAA,CAGA,+BAAA,CADA,uB7BwgGJ,C6BpgGI,0BACE,8B7BsgGN,C6BjgGE,4BAEE,0CAAA,CADA,+B7BogGJ,C6B//FE,YACE,oBAAA,CACA,oB7BigGJ,C8BtjGA,oBACE,GACE,mB9ByjGF,CACF,C8BjjGA,MACE,wf9BmjGF,C8B7iGA,YACE,aAAA,CAEA,eAAA,CADA,a9BijGF,C8B7iGE,+BAOE,kBAAA,CAAA,kB9B8iGJ,C8BrjGE,+BAOE,iBAAA,CAAA,mB9B8iGJ,C8BrjGE,qBAQE,aAAA,CACA,cAAA,CACA,YAAA,CATA,iBAAA,CAKA,U9B+iGJ,C8BxiGI,qCAIE,iB9BgjGN,C8BpjGI,qCAIE,kB9BgjGN,C8BpjGI,2BAME,6BAAA,CADA,UAAA,CAJA,oBAAA,CAEA,YAAA,CAIA,yCAAA,CAAA,iCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CARA,W9BkjGN,C8BriGE,kBAUE,2CAAA,CACA,mBAAA,CACA,8BAAA,CAJA,gCAAA,CACA,oBAAA,CAHA,kBAAA,CAFA,YAAA,CASA,SAAA,CANA,aAAA,CAFA,SAAA,CAJA,iBAAA,CAgBA,4BAAA,CAfA,UAAA,CAYA,+CACE,CAZF,S9BmjGJ,C8BliGI,+EACE,gBAAA,CACA,SAAA,CACA,sC9BoiGN,C8B9hGI,qCAEE,oCACE,gC9B+hGN,C8B3hGI,2CACE,c9B6hGN,CACF,C8BxhGE,kBACE,kB9B0hGJ,C8BthGE,4BAGE,kBAAA,CAAA,oB9B6hGJ,C8BhiGE,4BAGE,mBAAA,CAAA,mB9B6hGJ,C8BhiGE,kBAKE,cAAA,CAJA,aAAA,CAKA,YAAA,CAIA,uBAAA,CAHA,2CACE,CAJF,kBAAA,CAFA,U9B8hGJ,C8BnhGI,gDACE,+B9BqhGN,C8BjhGI,wBACE,qD9BmhGN,C+BnnGA,MAEI,uWAAA,CAAA,8WAAA,CAAA,sPAAA,CAAA,8xBAAA,CAAA,0MAAA,CAAA,gbAAA,CAAA,gMAAA,CAAA,iQAAA,CAAA,0VAAA,CAAA,6aAAA,CAAA,8SAAA,CAAA,gM/B4oGJ,C+BhoGE,4CAME,8CAAA,CACA,2BAAA,CACA,mBAAA,CACA,8BAAA,CAJA,mCAAA,CAJA,iBAAA,CAGA,gBAAA,CADA,iBAAA,CADA,eAAA,CASA,uBAAA,CADA,2B/BooGJ,C+BhoGI,aAdF,4CAeI,e/BmoGJ,CACF,C+BhoGI,sEACE,gC/BkoGN,C+B7nGI,gDACE,qB/B+nGN,C+B3nGI,gIAEE,iBAAA,CADA,c/B8nGN,C+BznGI,4FACE,iB/B2nGN,C+BvnGI,kFACE,e/BynGN,C+BrnGI,0FACE,Y/BunGN,C+BnnGI,8EACE,mB/BqnGN,C+BhnGE,sEAGE,iBAAA,CAAA,mB/B0nGJ,C+B7nGE,sEAGE,kBAAA,CAAA,kB/B0nGJ,C+B7nGE,sEASE,uB/BonGJ,C+B7nGE,sEASE,wB/BonGJ,C+B7nGE,sEAUE,4B/BmnGJ,C+B7nGE,4IAWE,6B/BknGJ,C+B7nGE,sEAWE,4B/BknGJ,C+B7nGE,kDAOE,0BAAA,CACA,WAAA,CAFA,eAAA,CADA,eAAA,CAHA,oBAAA,CAAA,iBAAA,CADA,iB/B4nGJ,C+B/mGI,kFACE,e/BinGN,C+B7mGI,oFAOE,U/BmnGN,C+B1nGI,oFAOE,W/BmnGN,C+B1nGI,gEAME,wBdkIU,CcnIV,UAAA,CADA,WAAA,CAIA,kDAAA,CAAA,0CAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAVA,iBAAA,CACA,UAAA,CACA,U/BunGN,C+B3mGI,4DACE,4D/B6mGN,C+B/lGE,sDACE,oB/BkmGJ,C+B/lGI,gFACE,gC/BimGN,C+B5lGE,8DACE,0B/B+lGJ,C+B5lGI,4EACE,wBAlBG,CAmBH,kDAAA,CAAA,0C/B8lGN,C+B1lGI,0EACE,a/B4lGN,C+BjnGE,8DACE,oB/BonGJ,C+BjnGI,wFACE,gC/BmnGN,C+B9mGE,sEACE,0B/BinGJ,C+B9mGI,oFACE,wBAlBG,CAmBH,sDAAA,CAAA,8C/BgnGN,C+B5mGI,kFACE,a/B8mGN,C+BnoGE,sDACE,oB/BsoGJ,C+BnoGI,gFACE,gC/BqoGN,C+BhoGE,8DACE,0B/BmoGJ,C+BhoGI,4EACE,wBAlBG,CAmBH,kDAAA,CAAA,0C/BkoGN,C+B9nGI,0EACE,a/BgoGN,C+BrpGE,oDACE,oB/BwpGJ,C+BrpGI,8EACE,gC/BupGN,C+BlpGE,4DACE,0B/BqpGJ,C+BlpGI,0EACE,wBAlBG,CAmBH,iDAAA,CAAA,yC/BopGN,C+BhpGI,wEACE,a/BkpGN,C+BvqGE,4DACE,oB/B0qGJ,C+BvqGI,sFACE,gC/ByqGN,C+BpqGE,oEACE,0B/BuqGJ,C+BpqGI,kFACE,wBAlBG,CAmBH,qDAAA,CAAA,6C/BsqGN,C+BlqGI,gFACE,a/BoqGN,C+BzrGE,8DACE,oB/B4rGJ,C+BzrGI,wFACE,gC/B2rGN,C+BtrGE,sEACE,0B/ByrGJ,C+BtrGI,oFACE,wBAlBG,CAmBH,sDAAA,CAAA,8C/BwrGN,C+BprGI,kFACE,a/BsrGN,C+B3sGE,4DACE,oB/B8sGJ,C+B3sGI,sFACE,gC/B6sGN,C+BxsGE,oEACE,0B/B2sGJ,C+BxsGI,kFACE,wBAlBG,CAmBH,qDAAA,CAAA,6C/B0sGN,C+BtsGI,gFACE,a/BwsGN,C+B7tGE,4DACE,oB/BguGJ,C+B7tGI,sFACE,gC/B+tGN,C+B1tGE,oEACE,0B/B6tGJ,C+B1tGI,kFACE,wBAlBG,CAmBH,qDAAA,CAAA,6C/B4tGN,C+BxtGI,gFACE,a/B0tGN,C+B/uGE,0DACE,oB/BkvGJ,C+B/uGI,oFACE,gC/BivGN,C+B5uGE,kEACE,0B/B+uGJ,C+B5uGI,gFACE,wBAlBG,CAmBH,oDAAA,CAAA,4C/B8uGN,C+B1uGI,8EACE,a/B4uGN,C+BjwGE,oDACE,oB/BowGJ,C+BjwGI,8EACE,gC/BmwGN,C+B9vGE,4DACE,0B/BiwGJ,C+B9vGI,0EACE,wBAlBG,CAmBH,iDAAA,CAAA,yC/BgwGN,C+B5vGI,wEACE,a/B8vGN,C+BnxGE,4DACE,oB/BsxGJ,C+BnxGI,sFACE,gC/BqxGN,C+BhxGE,oEACE,0B/BmxGJ,C+BhxGI,kFACE,wBAlBG,CAmBH,qDAAA,CAAA,6C/BkxGN,C+B9wGI,gFACE,a/BgxGN,C+BryGE,wDACE,oB/BwyGJ,C+BryGI,kFACE,gC/BuyGN,C+BlyGE,gEACE,0B/BqyGJ,C+BlyGI,8EACE,wBAlBG,CAmBH,mDAAA,CAAA,2C/BoyGN,C+BhyGI,4EACE,a/BkyGN,CgCt8GA,MACE,wMhCy8GF,CgCh8GE,sBAEE,uCAAA,CADA,gBhCo8GJ,CgCh8GI,mCACE,ahCk8GN,CgCn8GI,mCACE,chCk8GN,CgC97GM,4BACE,sBhCg8GR,CgC77GQ,mCACE,gChC+7GV,CgC37GQ,2DACE,SAAA,CAEA,uBAAA,CADA,ehC87GV,CgCz7GQ,yGACE,SAAA,CACA,uBhC27GV,CgCv7GQ,yCACE,YhCy7GV,CgCl7GE,0BACE,eAAA,CACA,ehCo7GJ,CgCj7GI,+BACE,oBhCm7GN,CgC96GE,gDACE,YhCg7GJ,CgC56GE,8BAIE,+BAAA,CAHA,oBAAA,CAEA,WAAA,CAGA,SAAA,CAKA,4BAAA,CAJA,4DACE,CAHF,0BhCg7GJ,CgCv6GI,aAdF,8BAeI,+BAAA,CACA,SAAA,CACA,uBhC06GJ,CACF,CgCv6GI,wCACE,6BhCy6GN,CgCr6GI,oCACE,+BhCu6GN,CgCn6GI,qCAKE,6BAAA,CADA,UAAA,CAHA,oBAAA,CAEA,YAAA,CAGA,2CAAA,CAAA,mCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAPA,WhC46GN,CgC/5GQ,mDACE,oBhCi6GV,CiC/gHE,kCAEE,iBjCqhHJ,CiCvhHE,kCAEE,kBjCqhHJ,CiCvhHE,wBAGE,yCAAA,CAFA,oBAAA,CAGA,SAAA,CACA,mCjCkhHJ,CiC7gHI,aAVF,wBAWI,YjCghHJ,CACF,CiC5gHE,6FAEE,SAAA,CACA,mCjC8gHJ,CiCxgHE,4FAEE,+BjC0gHJ,CiCtgHE,oBACE,yBAAA,CACA,uBAAA,CAGA,yEjCsgHJ,CKv4GI,sC4BrHE,qDACE,uBjC+/GN,CACF,CiC1/GE,kEACE,yBjC4/GJ,CiCx/GE,sBACE,0BjC0/GJ,CkCrjHE,2BACE,alCwjHJ,CKn4GI,wC6BtLF,2BAKI,elCwjHJ,CACF,CkCrjHI,6BAGE,0BAAA,CAAA,2BAAA,CADA,eAAA,CAEA,iBAAA,CAHA,yBAAA,CAAA,iBlC0jHN,CkCpjHM,2CACE,kBlCsjHR,CmCvkHE,uBACE,4CnC2kHJ,CmCtkHE,8CAJE,kCAAA,CAAA,0BnC8kHJ,CmC1kHE,uBACE,4CnCykHJ,CmCpkHE,4BAEE,kCAAA,CAAA,0BAAA,CADA,qCnCukHJ,CmCnkHI,mCACE,anCqkHN,CmCjkHI,kCACE,anCmkHN,CmC9jHE,0BAKE,eAAA,CAJA,aAAA,CAEA,YAAA,CACA,aAAA,CAFA,kBAAA,CAAA,mBnCmkHJ,CmC7jHI,uCACE,enC+jHN,CmC3jHI,sCACE,kBnC6jHN,CoC1mHA,MACE,8LpC6mHF,CoCpmHE,oBAGE,iBAAA,CAEA,gBAAA,CADA,apCsmHJ,CoClmHI,wCACE,uBpComHN,CoChmHI,gCAEE,eAAA,CADA,gBpCmmHN,CoC5lHM,wCACE,mBpC8lHR,CoCxlHE,8BAKE,oBpC2lHJ,CoChmHE,8BAKE,mBpC2lHJ,CoChmHE,8BAOE,4BpCylHJ,CoChmHE,4DAQE,6BpCwlHJ,CoChmHE,8BAQE,4BpCwlHJ,CoChmHE,oBAME,cAAA,CAHA,aAAA,CACA,epC4lHJ,CoCrlHI,kCACE,uCAAA,CACA,oBpCulHN,CoCnlHI,wCAEE,uCAAA,CADA,YpCslHN,CoCjlHI,oCASE,WpCulHN,CoChmHI,oCASE,UpCulHN,CoChmHI,0BAME,6BAAA,CADA,UAAA,CADA,WAAA,CAMA,yCAAA,CAAA,iCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAZA,iBAAA,CACA,UAAA,CAMA,sBAAA,CADA,yBAAA,CAJA,UpC6lHN,CoChlHM,oCACE,wBpCklHR,CoC7kHI,4BACE,YpC+kHN,CoC1kHI,4CACE,YpC4kHN,CqCnqHE,+DACE,mBAAA,CACA,cAAA,CACA,uBrCsqHJ,CqCnqHI,2EAGE,iBAAA,CADA,eAAA,CADA,arCuqHN,CsC7qHE,6BACE,sCtCgrHJ,CsC7qHE,cACE,yCtC+qHJ,CsCnqHE,sIACE,oCtCqqHJ,CsC7pHE,2EACE,qCtC+pHJ,CsCrpHE,wGACE,oCtCupHJ,CsC9oHE,yFACE,qCtCgpHJ,CsC3oHE,6BACE,kCtC6oHJ,CsCvoHE,6CACE,sCtCyoHJ,CsCloHE,4DACE,sCtCooHJ,CsC7nHE,4DACE,qCtC+nHJ,CsCtnHE,yFACE,qCtCwnHJ,CsChnHE,2EACE,sCtCknHJ,CsCvmHE,wHACE,qCtCymHJ,CsCpmHE,8BAGE,mBAAA,CADA,gBAAA,CADA,gBtCwmHJ,CsCnmHE,eACE,4CtCqmHJ,CsClmHE,eACE,4CtComHJ,CsChmHE,gBAIE,wCAAA,CAHA,aAAA,CAEA,wBAAA,CADA,wBtComHJ,CsC9lHE,yBAOE,wCAAA,CACA,+DAAA,CACA,4BAAA,CACA,6BAAA,CARA,iBAAA,CAGA,eAAA,CACA,eAAA,CAFA,cAAA,CADA,oCAAA,CAFA,iBtCymHJ,CsC7lHI,6BACE,YtC+lHN,CsC5lHM,kCACE,wBAAA,CACA,yBtC8lHR,CsCxlHE,iCAaE,wCAAA,CACA,+DAAA,CAJA,uCAAA,CACA,0BAAA,CALA,UAAA,CAJA,oBAAA,CAOA,2BAAA,CADA,2BAAA,CADA,2BAAA,CANA,eAAA,CAWA,wBAAA,CAAA,gBAAA,CAPA,StCimHJ,CsC/kHE,sBACE,iBAAA,CACA,iBtCilHJ,CsCzkHI,sCACE,gBtC2kHN,CsCvkHI,gDACE,YtCykHN,CsC/jHA,gBACE,iBtCkkHF,CsC9jHE,yCACE,aAAA,CACA,StCgkHJ,CsC3jHE,mBACE,YtC6jHJ,CsCxjHE,oBACE,QtC0jHJ,CsCtjHE,4BACE,WAAA,CACA,SAAA,CACA,etCwjHJ,CsCrjHI,0CACE,YtCujHN,CsCjjHE,yBAKE,wCAAA,CAEA,+BAAA,CADA,4BAAA,CAHA,eAAA,CADA,oDAAA,CAEA,wBAAA,CAAA,gBtCsjHJ,CsC/iHE,2BAEE,+DAAA,CADA,2BtCkjHJ,CsC9iHI,+BACE,uCAAA,CACA,gBtCgjHN,CsC3iHE,sBACE,MAAA,CACA,WtC6iHJ,CsCxiHA,aACE,atC2iHF,CsCjiHE,4BAEE,aAAA,CADA,YtCqiHJ,CsCjiHI,wDAEE,2BAAA,CADA,wBtCoiHN,CsC9hHE,+BAKE,2CAAA,CAEA,+BAAA,CADA,gCAAA,CADA,sBAAA,CAHA,mBAAA,CACA,gBAAA,CAFA,atCsiHJ,CsC7hHI,qCAEE,UAAA,CACA,UAAA,CAFA,atCiiHN,CKlqHI,wCiCgJF,8BACE,iBtCshHF,CsC5gHE,wSAGE,etCkhHJ,CsC9gHE,sCAEE,mBAAA,CACA,eAAA,CADA,oBAAA,CADA,kBAAA,CAAA,mBtCkhHJ,CACF,CuCz2HI,yDAIE,+BAAA,CACA,8BAAA,CAFA,aAAA,CADA,QAAA,CADA,iBvC+2HN,CuCv2HI,uBAEE,uCAAA,CADA,cvC02HN,CuCrzHM,iHAEE,WAlDkB,CAiDlB,kBvCg0HR,CuCj0HM,6HAEE,WAlDkB,CAiDlB,kBvC40HR,CuC70HM,6HAEE,WAlDkB,CAiDlB,kBvCw1HR,CuCz1HM,oHAEE,WAlDkB,CAiDlB,kBvCo2HR,CuCr2HM,0HAEE,WAlDkB,CAiDlB,kBvCg3HR,CuCj3HM,uHAEE,WAlDkB,CAiDlB,kBvC43HR,CuC73HM,uHAEE,WAlDkB,CAiDlB,kBvCw4HR,CuCz4HM,6HAEE,WAlDkB,CAiDlB,kBvCo5HR,CuCr5HM,yCAEE,WAlDkB,CAiDlB,kBvCw5HR,CuCz5HM,yCAEE,WAlDkB,CAiDlB,kBvC45HR,CuC75HM,0CAEE,WAlDkB,CAiDlB,kBvCg6HR,CuCj6HM,uCAEE,WAlDkB,CAiDlB,kBvCo6HR,CuCr6HM,wCAEE,WAlDkB,CAiDlB,kBvCw6HR,CuCz6HM,sCAEE,WAlDkB,CAiDlB,kBvC46HR,CuC76HM,wCAEE,WAlDkB,CAiDlB,kBvCg7HR,CuCj7HM,oCAEE,WAlDkB,CAiDlB,kBvCo7HR,CuCr7HM,2CAEE,WAlDkB,CAiDlB,kBvCw7HR,CuCz7HM,qCAEE,WAlDkB,CAiDlB,kBvC47HR,CuC77HM,oCAEE,WAlDkB,CAiDlB,kBvCg8HR,CuCj8HM,kCAEE,WAlDkB,CAiDlB,kBvCo8HR,CuCr8HM,qCAEE,WAlDkB,CAiDlB,kBvCw8HR,CuCz8HM,mCAEE,WAlDkB,CAiDlB,kBvC48HR,CuC78HM,qCAEE,WAlDkB,CAiDlB,kBvCg9HR,CuCj9HM,wCAEE,WAlDkB,CAiDlB,kBvCo9HR,CuCr9HM,sCAEE,WAlDkB,CAiDlB,kBvCw9HR,CuCz9HM,2CAEE,WAlDkB,CAiDlB,kBvC49HR,CuCj9HM,iCAEE,WAPkB,CAMlB,iBvCo9HR,CuCr9HM,uCAEE,WAPkB,CAMlB,iBvCw9HR,CuCz9HM,mCAEE,WAPkB,CAMlB,iBvC49HR,CwC9iIA,MACE,qMAAA,CACA,mMxCijIF,CwCxiIE,wBAKE,mBAAA,CAHA,YAAA,CACA,qBAAA,CACA,YAAA,CAHA,iBxC+iIJ,CwCriII,8BAGE,QAAA,CACA,SAAA,CAHA,iBAAA,CACA,OxCyiIN,CwCpiIM,qCACE,0BxCsiIR,CwCvgIE,2BAKE,uBAAA,CADA,+DAAA,CAHA,YAAA,CACA,cAAA,CACA,aAAA,CAGA,oBxCygIJ,CwCtgII,aATF,2BAUI,gBxCygIJ,CACF,CwCtgII,cAGE,+BACE,iBxCsgIN,CwCngIM,sCAQE,oCAAA,CANA,QAAA,CAKA,UAAA,CAHA,aAAA,CAEA,UAAA,CAHA,MAAA,CAFA,iBAAA,CAYA,2CAAA,CAJA,qCACE,CAEF,kDAAA,CAPA,+BxC2gIR,CACF,CwC9/HI,8CACE,YxCggIN,CwC5/HI,iCASE,+BAAA,CACA,6BAAA,CAJA,uCAAA,CAEA,cAAA,CAPA,aAAA,CAGA,gBAAA,CACA,eAAA,CAFA,8BAAA,CAWA,+BAAA,CAHA,2CACE,CALF,kBAAA,CALA,UxCwgIN,CwCz/HM,aAII,6CACE,OxCw/HV,CwCz/HQ,8CACE,OxC2/HV,CwC5/HQ,8CACE,OxC8/HV,CwC//HQ,8CACE,OxCigIV,CwClgIQ,8CACE,OxCogIV,CwCrgIQ,8CACE,OxCugIV,CwCxgIQ,8CACE,OxC0gIV,CwC3gIQ,8CACE,OxC6gIV,CwC9gIQ,8CACE,OxCghIV,CwCjhIQ,+CACE,QxCmhIV,CwCphIQ,+CACE,QxCshIV,CwCvhIQ,+CACE,QxCyhIV,CwC1hIQ,+CACE,QxC4hIV,CwC7hIQ,+CACE,QxC+hIV,CwChiIQ,+CACE,QxCkiIV,CwCniIQ,+CACE,QxCqiIV,CwCtiIQ,+CACE,QxCwiIV,CwCziIQ,+CACE,QxC2iIV,CwC5iIQ,+CACE,QxC8iIV,CwC/iIQ,+CACE,QxCijIV,CACF,CwC5iIM,uCACE,+BxC8iIR,CwCxiIE,4BACE,UxC0iIJ,CwCviII,aAJF,4BAKI,gBxC0iIJ,CACF,CwCtiIE,0BACE,YxCwiIJ,CwCriII,aAJF,0BAKI,axCwiIJ,CwCpiIM,sCACE,OxCsiIR,CwCviIM,uCACE,OxCyiIR,CwC1iIM,uCACE,OxC4iIR,CwC7iIM,uCACE,OxC+iIR,CwChjIM,uCACE,OxCkjIR,CwCnjIM,uCACE,OxCqjIR,CwCtjIM,uCACE,OxCwjIR,CwCzjIM,uCACE,OxC2jIR,CwC5jIM,uCACE,OxC8jIR,CwC/jIM,wCACE,QxCikIR,CwClkIM,wCACE,QxCokIR,CwCrkIM,wCACE,QxCukIR,CwCxkIM,wCACE,QxC0kIR,CwC3kIM,wCACE,QxC6kIR,CwC9kIM,wCACE,QxCglIR,CwCjlIM,wCACE,QxCmlIR,CwCplIM,wCACE,QxCslIR,CwCvlIM,wCACE,QxCylIR,CwC1lIM,wCACE,QxC4lIR,CwC7lIM,wCACE,QxC+lIR,CACF,CwCzlII,+FAEE,QxC2lIN,CwCxlIM,yGACE,wBAAA,CACA,yBxC2lIR,CwCllIM,2DAEE,wBAAA,CACA,yBAAA,CAFA,QxCslIR,CwC/kIM,iEACE,QxCilIR,CwC9kIQ,qLAGE,wBAAA,CACA,yBAAA,CAFA,QxCklIV,CwC5kIQ,6FACE,wBAAA,CACA,yBxC8kIV,CwCzkIM,yDACE,kBxC2kIR,CwCtkII,sCACE,QxCwkIN,CwCnkIE,2BAEE,iBAAA,CAOA,kBAAA,CAHA,uCAAA,CAEA,cAAA,CAPA,aAAA,CAGA,YAAA,CACA,gBAAA,CAEA,mBAAA,CAGA,gCAAA,CAPA,WxC4kIJ,CwClkII,iCAEE,uDAAA,CADA,+BxCqkIN,CwChkII,iCAKE,6BAAA,CADA,UAAA,CAHA,aAAA,CAEA,WAAA,CAMA,8CAAA,CAAA,sCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CANA,+CACE,CALF,UxC0kIN,CwC3jIE,4BAOE,yEACE,CANF,YAAA,CAGA,aAAA,CAFA,qBAAA,CAGA,mBAAA,CALA,iBAAA,CAYA,wBAAA,CATA,YxCikIJ,CwCrjII,sCACE,wBxCujIN,CwCnjII,oCACE,SxCqjIN,CwCjjII,kCAGE,wEACE,CAFF,mBAAA,CADA,OxCqjIN,CwC3iIM,uDACE,8CAAA,CAAA,sCxC6iIR,CK7pII,wCmC8HF,wDAEE,kBxCqiIF,CwCviIA,wDAEE,mBxCqiIF,CwCviIA,8CAGE,eAAA,CAFA,eAAA,CAGA,iCxCmiIF,CwC/hIE,8DACE,mBxCkiIJ,CwCniIE,8DACE,kBxCkiIJ,CwCniIE,oDAEE,UxCiiIJ,CwC7hIE,8EAEE,kBxCgiIJ,CwCliIE,8EAEE,mBxCgiIJ,CwCliIE,8EAGE,kBxC+hIJ,CwCliIE,8EAGE,mBxC+hIJ,CwCliIE,oEACE,UxCiiIJ,CwC3hIE,8EAEE,mBxC8hIJ,CwChiIE,8EAEE,kBxC8hIJ,CwChiIE,8EAGE,mBxC6hIJ,CwChiIE,8EAGE,kBxC6hIJ,CwChiIE,oEACE,UxC+hIJ,CACF,CwCjhIE,cAHF,olDAII,+BxCohIF,CwCjhIE,g8GACE,sCxCmhIJ,CACF,CwC9gIA,4sDACE,uDxCihIF,CwC7gIA,wmDACE,axCghIF,CyC73IA,MACE,8WAAA,CAEA,uXzCi4IF,CyCv3IE,4BAEE,oBAAA,CADA,iBzC23IJ,CyCt3II,sDAGE,SzCw3IN,CyC33II,sDAGE,UzCw3IN,CyC33II,4CACE,iBAAA,CACA,SzCy3IN,CyCn3IE,+CAEE,SAAA,CADA,UzCs3IJ,CyCj3IE,kDAOE,WzCu3IJ,CyC93IE,kDAOE,YzCu3IJ,CyC93IE,wCAME,qDAAA,CADA,UAAA,CADA,aAAA,CAIA,0CAAA,CAAA,kCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAVA,iBAAA,CACA,SAAA,CACA,YzC23IJ,CyC/2IE,gEACE,wBxByWa,CwBxWb,mDAAA,CAAA,2CzCi3IJ,C0Cn6IA,QACE,8DAAA,CAGA,+CAAA,CACA,iEAAA,CACA,oDAAA,CACA,sDAAA,CACA,mDAAA,CAGA,qEAAA,CACA,qEAAA,CACA,wEAAA,CACA,0EAAA,CACA,wEAAA,CACA,yEAAA,CACA,kEAAA,CACA,+DAAA,CACA,oEAAA,CACA,oEAAA,CACA,mEAAA,CACA,gEAAA,CACA,uEAAA,CACA,mEAAA,CACA,qEAAA,CACA,oEAAA,CACA,gEAAA,CACA,wEAAA,CACA,qEAAA,CACA,+D1Ck6IF,C0C55IA,SAEE,kBAAA,CADA,Y1Cg6IF,CK/xII,mCsChKA,8BACE,U3Cu8IJ,C2Cx8IE,8BACE,W3Cu8IJ,C2Cx8IE,8BAGE,kB3Cq8IJ,C2Cx8IE,8BAGE,iB3Cq8IJ,C2Cx8IE,oBAKE,mBAAA,CADA,YAAA,CAFA,a3Cs8IJ,C2Ch8II,kCACE,W3Cm8IN,C2Cp8II,kCACE,U3Cm8IN,C2Cp8II,kCAEE,iBAAA,CAAA,c3Ck8IN,C2Cp8II,kCAEE,aAAA,CAAA,kB3Ck8IN,CACF","file":"main.css"} \ No newline at end of file diff --git a/devel/assets/stylesheets/main.30068a00.min.css b/devel/assets/stylesheets/main.30068a00.min.css new file mode 100644 index 000000000..f813e9a8c --- /dev/null +++ b/devel/assets/stylesheets/main.30068a00.min.css @@ -0,0 +1 @@ +@charset "UTF-8";html{-webkit-text-size-adjust:none;-moz-text-size-adjust:none;text-size-adjust:none;box-sizing:border-box}*,:after,:before{box-sizing:inherit}@media (prefers-reduced-motion){*,:after,:before{transition:none!important}}body{margin:0}a,button,input,label{-webkit-tap-highlight-color:transparent}a{color:inherit;text-decoration:none}hr{border:0;box-sizing:initial;display:block;height:.05rem;overflow:visible;padding:0}small{font-size:80%}sub,sup{line-height:1em}img{border-style:none}table{border-collapse:initial;border-spacing:0}td,th{font-weight:400;vertical-align:top}button{background:#0000;border:0;font-family:inherit;font-size:inherit;margin:0;padding:0}input{border:0;outline:none}:root{--md-primary-fg-color:#4051b5;--md-primary-fg-color--light:#5d6cc0;--md-primary-fg-color--dark:#303fa1;--md-primary-bg-color:#fff;--md-primary-bg-color--light:#ffffffb3;--md-accent-fg-color:#526cfe;--md-accent-fg-color--transparent:#526cfe1a;--md-accent-bg-color:#fff;--md-accent-bg-color--light:#ffffffb3}[data-md-color-scheme=default]{color-scheme:light}[data-md-color-scheme=default] img[src$="#gh-dark-mode-only"],[data-md-color-scheme=default] img[src$="#only-dark"]{display:none}:root,[data-md-color-scheme=default]{--md-default-fg-color:#000000de;--md-default-fg-color--light:#0000008a;--md-default-fg-color--lighter:#00000052;--md-default-fg-color--lightest:#00000012;--md-default-bg-color:#fff;--md-default-bg-color--light:#ffffffb3;--md-default-bg-color--lighter:#ffffff4d;--md-default-bg-color--lightest:#ffffff1f;--md-code-fg-color:#36464e;--md-code-bg-color:#f5f5f5;--md-code-hl-color:#4287ff;--md-code-hl-color--light:#4287ff1a;--md-code-hl-number-color:#d52a2a;--md-code-hl-special-color:#db1457;--md-code-hl-function-color:#a846b9;--md-code-hl-constant-color:#6e59d9;--md-code-hl-keyword-color:#3f6ec6;--md-code-hl-string-color:#1c7d4d;--md-code-hl-name-color:var(--md-code-fg-color);--md-code-hl-operator-color:var(--md-default-fg-color--light);--md-code-hl-punctuation-color:var(--md-default-fg-color--light);--md-code-hl-comment-color:var(--md-default-fg-color--light);--md-code-hl-generic-color:var(--md-default-fg-color--light);--md-code-hl-variable-color:var(--md-default-fg-color--light);--md-typeset-color:var(--md-default-fg-color);--md-typeset-a-color:var(--md-primary-fg-color);--md-typeset-mark-color:#ffff0080;--md-typeset-del-color:#f5503d26;--md-typeset-ins-color:#0bd57026;--md-typeset-kbd-color:#fafafa;--md-typeset-kbd-accent-color:#fff;--md-typeset-kbd-border-color:#b8b8b8;--md-typeset-table-color:#0000001f;--md-typeset-table-color--light:rgba(0,0,0,.035);--md-admonition-fg-color:var(--md-default-fg-color);--md-admonition-bg-color:var(--md-default-bg-color);--md-warning-fg-color:#000000de;--md-warning-bg-color:#ff9;--md-footer-fg-color:#fff;--md-footer-fg-color--light:#ffffffb3;--md-footer-fg-color--lighter:#ffffff73;--md-footer-bg-color:#000000de;--md-footer-bg-color--dark:#00000052;--md-shadow-z1:0 0.2rem 0.5rem #0000000d,0 0 0.05rem #0000001a;--md-shadow-z2:0 0.2rem 0.5rem #0000001a,0 0 0.05rem #00000040;--md-shadow-z3:0 0.2rem 0.5rem #0003,0 0 0.05rem #00000059}.md-icon svg{fill:currentcolor;display:block;height:1.2rem;width:1.2rem}body{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;--md-text-font-family:var(--md-text-font,_),-apple-system,BlinkMacSystemFont,Helvetica,Arial,sans-serif;--md-code-font-family:var(--md-code-font,_),SFMono-Regular,Consolas,Menlo,monospace}aside,body,input{font-feature-settings:"kern","liga";color:var(--md-typeset-color);font-family:var(--md-text-font-family)}code,kbd,pre{font-feature-settings:"kern";font-family:var(--md-code-font-family)}:root{--md-typeset-table-sort-icon:url('data:image/svg+xml;charset=utf-8,');--md-typeset-table-sort-icon--asc:url('data:image/svg+xml;charset=utf-8,');--md-typeset-table-sort-icon--desc:url('data:image/svg+xml;charset=utf-8,')}.md-typeset{-webkit-print-color-adjust:exact;color-adjust:exact;font-size:.8rem;line-height:1.6}@media print{.md-typeset{font-size:.68rem}}.md-typeset blockquote,.md-typeset dl,.md-typeset figure,.md-typeset ol,.md-typeset pre,.md-typeset ul{margin-bottom:1em;margin-top:1em}.md-typeset h1{color:var(--md-default-fg-color--light);font-size:2em;line-height:1.3;margin:0 0 1.25em}.md-typeset h1,.md-typeset h2{font-weight:300;letter-spacing:-.01em}.md-typeset h2{font-size:1.5625em;line-height:1.4;margin:1.6em 0 .64em}.md-typeset h3{font-size:1.25em;font-weight:400;letter-spacing:-.01em;line-height:1.5;margin:1.6em 0 .8em}.md-typeset h2+h3{margin-top:.8em}.md-typeset h4{font-weight:700;letter-spacing:-.01em;margin:1em 0}.md-typeset h5,.md-typeset h6{color:var(--md-default-fg-color--light);font-size:.8em;font-weight:700;letter-spacing:-.01em;margin:1.25em 0}.md-typeset h5{text-transform:uppercase}.md-typeset hr{border-bottom:.05rem solid var(--md-default-fg-color--lightest);display:flow-root;margin:1.5em 0}.md-typeset a{color:var(--md-typeset-a-color);word-break:break-word}.md-typeset a,.md-typeset a:before{transition:color 125ms}.md-typeset a:focus,.md-typeset a:hover{color:var(--md-accent-fg-color)}.md-typeset a:focus code,.md-typeset a:hover code{background-color:var(--md-accent-fg-color--transparent)}.md-typeset a code{color:currentcolor;transition:background-color 125ms}.md-typeset a.focus-visible{outline-color:var(--md-accent-fg-color);outline-offset:.2rem}.md-typeset code,.md-typeset kbd,.md-typeset pre{color:var(--md-code-fg-color);direction:ltr;font-variant-ligatures:none}@media print{.md-typeset code,.md-typeset kbd,.md-typeset pre{white-space:pre-wrap}}.md-typeset code{background-color:var(--md-code-bg-color);border-radius:.1rem;-webkit-box-decoration-break:clone;box-decoration-break:clone;font-size:.85em;padding:0 .2941176471em;word-break:break-word}.md-typeset code:not(.focus-visible){-webkit-tap-highlight-color:transparent;outline:none}.md-typeset pre{display:flow-root;line-height:1.4;position:relative}.md-typeset pre>code{-webkit-box-decoration-break:slice;box-decoration-break:slice;box-shadow:none;display:block;margin:0;outline-color:var(--md-accent-fg-color);overflow:auto;padding:.7720588235em 1.1764705882em;scrollbar-color:var(--md-default-fg-color--lighter) #0000;scrollbar-width:thin;touch-action:auto;word-break:normal}.md-typeset pre>code:hover{scrollbar-color:var(--md-accent-fg-color) #0000}.md-typeset pre>code::-webkit-scrollbar{height:.2rem;width:.2rem}.md-typeset pre>code::-webkit-scrollbar-thumb{background-color:var(--md-default-fg-color--lighter)}.md-typeset pre>code::-webkit-scrollbar-thumb:hover{background-color:var(--md-accent-fg-color)}.md-typeset kbd{background-color:var(--md-typeset-kbd-color);border-radius:.1rem;box-shadow:0 .1rem 0 .05rem var(--md-typeset-kbd-border-color),0 .1rem 0 var(--md-typeset-kbd-border-color),0 -.1rem .2rem var(--md-typeset-kbd-accent-color) inset;color:var(--md-default-fg-color);display:inline-block;font-size:.75em;padding:0 .6666666667em;vertical-align:text-top;word-break:break-word}.md-typeset mark{background-color:var(--md-typeset-mark-color);-webkit-box-decoration-break:clone;box-decoration-break:clone;color:inherit;word-break:break-word}.md-typeset abbr{border-bottom:.05rem dotted var(--md-default-fg-color--light);cursor:help;text-decoration:none}@media (hover:none){.md-typeset abbr[title]:focus:after,.md-typeset abbr[title]:hover:after{background-color:var(--md-default-fg-color);border-radius:.1rem;box-shadow:var(--md-shadow-z3);color:var(--md-default-bg-color);content:attr(title);font-size:.7rem;left:.8rem;margin-top:2em;padding:.2rem .3rem;position:absolute;right:.8rem}}.md-typeset small{opacity:.75}[dir=ltr] .md-typeset sub,[dir=ltr] .md-typeset sup{margin-left:.078125em}[dir=rtl] .md-typeset sub,[dir=rtl] .md-typeset sup{margin-right:.078125em}[dir=ltr] .md-typeset blockquote{padding-left:.6rem}[dir=rtl] .md-typeset blockquote{padding-right:.6rem}[dir=ltr] .md-typeset blockquote{border-left:.2rem solid var(--md-default-fg-color--lighter)}[dir=rtl] .md-typeset blockquote{border-right:.2rem solid var(--md-default-fg-color--lighter)}.md-typeset blockquote{color:var(--md-default-fg-color--light);margin-left:0;margin-right:0}.md-typeset ul{list-style-type:disc}[dir=ltr] .md-typeset ol,[dir=ltr] .md-typeset ul{margin-left:.625em}[dir=rtl] .md-typeset ol,[dir=rtl] .md-typeset ul{margin-right:.625em}.md-typeset ol,.md-typeset ul{padding:0}.md-typeset ol:not([hidden]),.md-typeset ul:not([hidden]){display:flow-root}.md-typeset ol ol,.md-typeset ul ol{list-style-type:lower-alpha}.md-typeset ol ol ol,.md-typeset ul ol ol{list-style-type:lower-roman}[dir=ltr] .md-typeset ol li,[dir=ltr] .md-typeset ul li{margin-left:1.25em}[dir=rtl] .md-typeset ol li,[dir=rtl] .md-typeset ul li{margin-right:1.25em}.md-typeset ol li,.md-typeset ul li{margin-bottom:.5em}.md-typeset ol li blockquote,.md-typeset ol li p,.md-typeset ul li blockquote,.md-typeset ul li p{margin:.5em 0}.md-typeset ol li:last-child,.md-typeset ul li:last-child{margin-bottom:0}[dir=ltr] .md-typeset ol li ol,[dir=ltr] .md-typeset ol li ul,[dir=ltr] .md-typeset ul li ol,[dir=ltr] .md-typeset ul li ul{margin-left:.625em}[dir=rtl] .md-typeset ol li ol,[dir=rtl] .md-typeset ol li ul,[dir=rtl] .md-typeset ul li ol,[dir=rtl] .md-typeset ul li ul{margin-right:.625em}.md-typeset ol li ol,.md-typeset ol li ul,.md-typeset ul li ol,.md-typeset ul li ul{margin-bottom:.5em;margin-top:.5em}[dir=ltr] .md-typeset dd{margin-left:1.875em}[dir=rtl] .md-typeset dd{margin-right:1.875em}.md-typeset dd{margin-bottom:1.5em;margin-top:1em}.md-typeset img,.md-typeset svg,.md-typeset video{height:auto;max-width:100%}.md-typeset img[align=left]{margin:1em 1em 1em 0}.md-typeset img[align=right]{margin:1em 0 1em 1em}.md-typeset img[align]:only-child{margin-top:0}.md-typeset figure{display:flow-root;margin:1em auto;max-width:100%;text-align:center;width:-webkit-fit-content;width:-moz-fit-content;width:fit-content}.md-typeset figure img{display:block}.md-typeset figcaption{font-style:italic;margin:1em auto;max-width:24rem}.md-typeset iframe{max-width:100%}.md-typeset table:not([class]){background-color:var(--md-default-bg-color);border:.05rem solid var(--md-typeset-table-color);border-radius:.1rem;display:inline-block;font-size:.64rem;max-width:100%;overflow:auto;touch-action:auto}@media print{.md-typeset table:not([class]){display:table}}.md-typeset table:not([class])+*{margin-top:1.5em}.md-typeset table:not([class]) td>:first-child,.md-typeset table:not([class]) th>:first-child{margin-top:0}.md-typeset table:not([class]) td>:last-child,.md-typeset table:not([class]) th>:last-child{margin-bottom:0}.md-typeset table:not([class]) td:not([align]),.md-typeset table:not([class]) th:not([align]){text-align:left}[dir=rtl] .md-typeset table:not([class]) td:not([align]),[dir=rtl] .md-typeset table:not([class]) th:not([align]){text-align:right}.md-typeset table:not([class]) th{font-weight:700;min-width:5rem;padding:.9375em 1.25em;vertical-align:top}.md-typeset table:not([class]) td{border-top:.05rem solid var(--md-typeset-table-color);padding:.9375em 1.25em;vertical-align:top}.md-typeset table:not([class]) tbody tr{transition:background-color 125ms}.md-typeset table:not([class]) tbody tr:hover{background-color:var(--md-typeset-table-color--light);box-shadow:0 .05rem 0 var(--md-default-bg-color) inset}.md-typeset table:not([class]) a{word-break:normal}.md-typeset table th[role=columnheader]{cursor:pointer}[dir=ltr] .md-typeset table th[role=columnheader]:after{margin-left:.5em}[dir=rtl] .md-typeset table th[role=columnheader]:after{margin-right:.5em}.md-typeset table th[role=columnheader]:after{content:"";display:inline-block;height:1.2em;-webkit-mask-image:var(--md-typeset-table-sort-icon);mask-image:var(--md-typeset-table-sort-icon);-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background-color 125ms;vertical-align:text-bottom;width:1.2em}.md-typeset table th[role=columnheader]:hover:after{background-color:var(--md-default-fg-color--lighter)}.md-typeset table th[role=columnheader][aria-sort=ascending]:after{background-color:var(--md-default-fg-color--light);-webkit-mask-image:var(--md-typeset-table-sort-icon--asc);mask-image:var(--md-typeset-table-sort-icon--asc)}.md-typeset table th[role=columnheader][aria-sort=descending]:after{background-color:var(--md-default-fg-color--light);-webkit-mask-image:var(--md-typeset-table-sort-icon--desc);mask-image:var(--md-typeset-table-sort-icon--desc)}.md-typeset__scrollwrap{margin:1em -.8rem;overflow-x:auto;touch-action:auto}.md-typeset__table{display:inline-block;margin-bottom:.5em;padding:0 .8rem}@media print{.md-typeset__table{display:block}}html .md-typeset__table table{display:table;margin:0;overflow:hidden;width:100%}@media screen and (max-width:44.9375em){.md-content__inner>pre{margin:1em -.8rem}.md-content__inner>pre code{border-radius:0}}.md-typeset .md-author{display:block;flex-shrink:0;height:1.6rem;overflow:hidden;position:relative;transition:color 125ms,transform 125ms;width:1.6rem}.md-typeset .md-author img{border-radius:100%;display:block}.md-typeset .md-author--more{background:var(--md-default-fg-color--lightest);color:var(--md-default-fg-color--lighter);font-size:.6rem;font-weight:700;line-height:1.6rem;text-align:center}.md-typeset .md-author--long{height:2.4rem;width:2.4rem}.md-typeset a.md-author{transform:scale(1)}.md-typeset a.md-author img{filter:grayscale(100%) opacity(75%);transition:filter 125ms}.md-typeset a.md-author:focus,.md-typeset a.md-author:hover{transform:scale(1.1);z-index:1}.md-typeset a.md-author:focus img,.md-typeset a.md-author:hover img{filter:grayscale(0)}.md-banner{background-color:var(--md-footer-bg-color);color:var(--md-footer-fg-color);overflow:auto}@media print{.md-banner{display:none}}.md-banner--warning{background-color:var(--md-warning-bg-color);color:var(--md-warning-fg-color)}.md-banner__inner{font-size:.7rem;margin:.6rem auto;padding:0 .8rem}[dir=ltr] .md-banner__button{float:right}[dir=rtl] .md-banner__button{float:left}.md-banner__button{color:inherit;cursor:pointer;transition:opacity .25s}.no-js .md-banner__button{display:none}.md-banner__button:hover{opacity:.7}html{font-size:125%;height:100%;overflow-x:hidden}@media screen and (min-width:100em){html{font-size:137.5%}}@media screen and (min-width:125em){html{font-size:150%}}body{background-color:var(--md-default-bg-color);display:flex;flex-direction:column;font-size:.5rem;min-height:100%;position:relative;width:100%}@media print{body{display:block}}@media screen and (max-width:59.9375em){body[data-md-scrolllock]{position:fixed}}.md-grid{margin-left:auto;margin-right:auto;max-width:61rem}.md-container{display:flex;flex-direction:column;flex-grow:1}@media print{.md-container{display:block}}.md-main{flex-grow:1}.md-main__inner{display:flex;height:100%;margin-top:1.5rem}.md-ellipsis{overflow:hidden;text-overflow:ellipsis}.md-toggle{display:none}.md-option{height:0;opacity:0;position:absolute;width:0}.md-option:checked+label:not([hidden]){display:block}.md-option.focus-visible+label{outline-color:var(--md-accent-fg-color);outline-style:auto}.md-skip{background-color:var(--md-default-fg-color);border-radius:.1rem;color:var(--md-default-bg-color);font-size:.64rem;margin:.5rem;opacity:0;outline-color:var(--md-accent-fg-color);padding:.3rem .5rem;position:fixed;transform:translateY(.4rem);z-index:-1}.md-skip:focus{opacity:1;transform:translateY(0);transition:transform .25s cubic-bezier(.4,0,.2,1),opacity 175ms 75ms;z-index:10}@page{margin:25mm}:root{--md-clipboard-icon:url('data:image/svg+xml;charset=utf-8,')}.md-clipboard{border-radius:.1rem;color:var(--md-default-fg-color--lightest);cursor:pointer;height:1.5em;outline-color:var(--md-accent-fg-color);outline-offset:.1rem;position:absolute;right:.5em;top:.5em;transition:color .25s;width:1.5em;z-index:1}@media print{.md-clipboard{display:none}}.md-clipboard:not(.focus-visible){-webkit-tap-highlight-color:transparent;outline:none}:hover>.md-clipboard{color:var(--md-default-fg-color--light)}.md-clipboard:focus,.md-clipboard:hover{color:var(--md-accent-fg-color)}.md-clipboard:after{background-color:currentcolor;content:"";display:block;height:1.125em;margin:0 auto;-webkit-mask-image:var(--md-clipboard-icon);mask-image:var(--md-clipboard-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:1.125em}.md-clipboard--inline{cursor:pointer}.md-clipboard--inline code{transition:color .25s,background-color .25s}.md-clipboard--inline:focus code,.md-clipboard--inline:hover code{background-color:var(--md-accent-fg-color--transparent);color:var(--md-accent-fg-color)}@keyframes consent{0%{opacity:0;transform:translateY(100%)}to{opacity:1;transform:translateY(0)}}@keyframes overlay{0%{opacity:0}to{opacity:1}}.md-consent__overlay{animation:overlay .25s both;-webkit-backdrop-filter:blur(.1rem);backdrop-filter:blur(.1rem);background-color:#0000008a;height:100%;opacity:1;position:fixed;top:0;width:100%;z-index:5}.md-consent__inner{animation:consent .5s cubic-bezier(.1,.7,.1,1) both;background-color:var(--md-default-bg-color);border:0;border-radius:.1rem;bottom:0;box-shadow:0 0 .2rem #0000001a,0 .2rem .4rem #0003;max-height:100%;overflow:auto;padding:0;position:fixed;width:100%;z-index:5}.md-consent__form{padding:.8rem}.md-consent__settings{display:none;margin:1em 0}input:checked+.md-consent__settings{display:block}.md-consent__controls{margin-bottom:.8rem}.md-typeset .md-consent__controls .md-button{display:inline}@media screen and (max-width:44.9375em){.md-typeset .md-consent__controls .md-button{display:block;margin-top:.4rem;text-align:center;width:100%}}.md-consent label{cursor:pointer}.md-content{flex-grow:1;min-width:0}.md-content__inner{margin:0 .8rem 1.2rem;padding-top:.6rem}@media screen and (min-width:76.25em){[dir=ltr] .md-sidebar--primary:not([hidden])~.md-content>.md-content__inner{margin-left:1.2rem}[dir=ltr] .md-sidebar--secondary:not([hidden])~.md-content>.md-content__inner,[dir=rtl] .md-sidebar--primary:not([hidden])~.md-content>.md-content__inner{margin-right:1.2rem}[dir=rtl] .md-sidebar--secondary:not([hidden])~.md-content>.md-content__inner{margin-left:1.2rem}}.md-content__inner:before{content:"";display:block;height:.4rem}.md-content__inner>:last-child{margin-bottom:0}[dir=ltr] .md-content__button{float:right}[dir=rtl] .md-content__button{float:left}[dir=ltr] .md-content__button{margin-left:.4rem}[dir=rtl] .md-content__button{margin-right:.4rem}.md-content__button{margin:.4rem 0;padding:0}@media print{.md-content__button{display:none}}.md-typeset .md-content__button{color:var(--md-default-fg-color--lighter)}.md-content__button svg{display:inline;vertical-align:top}[dir=rtl] .md-content__button svg{transform:scaleX(-1)}[dir=ltr] .md-dialog{right:.8rem}[dir=rtl] .md-dialog{left:.8rem}.md-dialog{background-color:var(--md-default-fg-color);border-radius:.1rem;bottom:.8rem;box-shadow:var(--md-shadow-z3);min-width:11.1rem;opacity:0;padding:.4rem .6rem;pointer-events:none;position:fixed;transform:translateY(100%);transition:transform 0ms .4s,opacity .4s;z-index:4}@media print{.md-dialog{display:none}}.md-dialog--active{opacity:1;pointer-events:auto;transform:translateY(0);transition:transform .4s cubic-bezier(.075,.85,.175,1),opacity .4s}.md-dialog__inner{color:var(--md-default-bg-color);font-size:.7rem}.md-feedback{margin:2em 0 1em;text-align:center}.md-feedback fieldset{border:none;margin:0;padding:0}.md-feedback__title{font-weight:700;margin:1em auto}.md-feedback__inner{position:relative}.md-feedback__list{align-content:baseline;display:flex;flex-wrap:wrap;justify-content:center;position:relative}.md-feedback__list:hover .md-icon:not(:disabled){color:var(--md-default-fg-color--lighter)}:disabled .md-feedback__list{min-height:1.8rem}.md-feedback__icon{color:var(--md-default-fg-color--light);cursor:pointer;flex-shrink:0;margin:0 .1rem;transition:color 125ms}.md-feedback__icon:not(:disabled).md-icon:hover{color:var(--md-accent-fg-color)}.md-feedback__icon:disabled{color:var(--md-default-fg-color--lightest);pointer-events:none}.md-feedback__note{opacity:0;position:relative;transform:translateY(.4rem);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s}.md-feedback__note>*{margin:0 auto;max-width:16rem}:disabled .md-feedback__note{opacity:1;transform:translateY(0)}.md-footer{background-color:var(--md-footer-bg-color);color:var(--md-footer-fg-color)}@media print{.md-footer{display:none}}.md-footer__inner{justify-content:space-between;overflow:auto;padding:.2rem}.md-footer__inner:not([hidden]){display:flex}.md-footer__link{align-items:end;display:flex;flex-grow:0.01;margin-bottom:.4rem;margin-top:1rem;max-width:100%;outline-color:var(--md-accent-fg-color);overflow:hidden;transition:opacity .25s}.md-footer__link:focus,.md-footer__link:hover{opacity:.7}[dir=rtl] .md-footer__link svg{transform:scaleX(-1)}@media screen and (max-width:44.9375em){.md-footer__link--prev{flex-shrink:0}.md-footer__link--prev .md-footer__title{display:none}}[dir=ltr] .md-footer__link--next{margin-left:auto}[dir=rtl] .md-footer__link--next{margin-right:auto}.md-footer__link--next{text-align:right}[dir=rtl] .md-footer__link--next{text-align:left}.md-footer__title{flex-grow:1;font-size:.9rem;margin-bottom:.7rem;max-width:calc(100% - 2.4rem);padding:0 1rem;white-space:nowrap}.md-footer__button{margin:.2rem;padding:.4rem}.md-footer__direction{font-size:.64rem;opacity:.7}.md-footer-meta{background-color:var(--md-footer-bg-color--dark)}.md-footer-meta__inner{display:flex;flex-wrap:wrap;justify-content:space-between;padding:.2rem}html .md-footer-meta.md-typeset a{color:var(--md-footer-fg-color--light)}html .md-footer-meta.md-typeset a:focus,html .md-footer-meta.md-typeset a:hover{color:var(--md-footer-fg-color)}.md-copyright{color:var(--md-footer-fg-color--lighter);font-size:.64rem;margin:auto .6rem;padding:.4rem 0;width:100%}@media screen and (min-width:45em){.md-copyright{width:auto}}.md-copyright__highlight{color:var(--md-footer-fg-color--light)}.md-social{display:inline-flex;gap:.2rem;margin:0 .4rem;padding:.2rem 0 .6rem}@media screen and (min-width:45em){.md-social{padding:.6rem 0}}.md-social__link{display:inline-block;height:1.6rem;text-align:center;width:1.6rem}.md-social__link:before{line-height:1.9}.md-social__link svg{fill:currentcolor;max-height:.8rem;vertical-align:-25%}.md-typeset .md-button{border:.1rem solid;border-radius:.1rem;color:var(--md-primary-fg-color);cursor:pointer;display:inline-block;font-weight:700;padding:.625em 2em;transition:color 125ms,background-color 125ms,border-color 125ms}.md-typeset .md-button--primary{background-color:var(--md-primary-fg-color);border-color:var(--md-primary-fg-color);color:var(--md-primary-bg-color)}.md-typeset .md-button:focus,.md-typeset .md-button:hover{background-color:var(--md-accent-fg-color);border-color:var(--md-accent-fg-color);color:var(--md-accent-bg-color)}[dir=ltr] .md-typeset .md-input{border-top-left-radius:.1rem}[dir=ltr] .md-typeset .md-input,[dir=rtl] .md-typeset .md-input{border-top-right-radius:.1rem}[dir=rtl] .md-typeset .md-input{border-top-left-radius:.1rem}.md-typeset .md-input{border-bottom:.1rem solid var(--md-default-fg-color--lighter);box-shadow:var(--md-shadow-z1);font-size:.8rem;height:1.8rem;padding:0 .6rem;transition:border .25s,box-shadow .25s}.md-typeset .md-input:focus,.md-typeset .md-input:hover{border-bottom-color:var(--md-accent-fg-color);box-shadow:var(--md-shadow-z2)}.md-typeset .md-input--stretch{width:100%}.md-header{background-color:var(--md-primary-fg-color);box-shadow:0 0 .2rem #0000,0 .2rem .4rem #0000;color:var(--md-primary-bg-color);display:block;left:0;position:sticky;right:0;top:0;z-index:4}@media print{.md-header{display:none}}.md-header[hidden]{transform:translateY(-100%);transition:transform .25s cubic-bezier(.8,0,.6,1),box-shadow .25s}.md-header--shadow{box-shadow:0 0 .2rem #0000001a,0 .2rem .4rem #0003;transition:transform .25s cubic-bezier(.1,.7,.1,1),box-shadow .25s}.md-header__inner{align-items:center;display:flex;padding:0 .2rem}.md-header__button{color:currentcolor;cursor:pointer;margin:.2rem;outline-color:var(--md-accent-fg-color);padding:.4rem;position:relative;transition:opacity .25s;vertical-align:middle;z-index:1}.md-header__button:hover{opacity:.7}.md-header__button:not([hidden]){display:inline-block}.md-header__button:not(.focus-visible){-webkit-tap-highlight-color:transparent;outline:none}.md-header__button.md-logo{margin:.2rem;padding:.4rem}@media screen and (max-width:76.1875em){.md-header__button.md-logo{display:none}}.md-header__button.md-logo img,.md-header__button.md-logo svg{fill:currentcolor;display:block;height:1.2rem;width:auto}@media screen and (min-width:60em){.md-header__button[for=__search]{display:none}}.no-js .md-header__button[for=__search]{display:none}[dir=rtl] .md-header__button[for=__search] svg{transform:scaleX(-1)}@media screen and (min-width:76.25em){.md-header__button[for=__drawer]{display:none}}.md-header__topic{display:flex;max-width:100%;position:absolute;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s;white-space:nowrap}.md-header__topic+.md-header__topic{opacity:0;pointer-events:none;transform:translateX(1.25rem);transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s;z-index:-1}[dir=rtl] .md-header__topic+.md-header__topic{transform:translateX(-1.25rem)}.md-header__topic:first-child{font-weight:700}[dir=ltr] .md-header__title{margin-left:1rem}[dir=rtl] .md-header__title{margin-right:1rem}[dir=ltr] .md-header__title{margin-right:.4rem}[dir=rtl] .md-header__title{margin-left:.4rem}.md-header__title{flex-grow:1;font-size:.9rem;height:2.4rem;line-height:2.4rem}.md-header__title--active .md-header__topic{opacity:0;pointer-events:none;transform:translateX(-1.25rem);transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s;z-index:-1}[dir=rtl] .md-header__title--active .md-header__topic{transform:translateX(1.25rem)}.md-header__title--active .md-header__topic+.md-header__topic{opacity:1;pointer-events:auto;transform:translateX(0);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s;z-index:0}.md-header__title>.md-header__ellipsis{height:100%;position:relative;width:100%}.md-header__option{display:flex;flex-shrink:0;max-width:100%;transition:max-width 0ms .25s,opacity .25s .25s;white-space:nowrap}[data-md-toggle=search]:checked~.md-header .md-header__option{max-width:0;opacity:0;transition:max-width 0ms,opacity 0ms}.md-header__option>input{bottom:0}.md-header__source{display:none}@media screen and (min-width:60em){[dir=ltr] .md-header__source{margin-left:1rem}[dir=rtl] .md-header__source{margin-right:1rem}.md-header__source{display:block;max-width:11.7rem;width:11.7rem}}@media screen and (min-width:76.25em){[dir=ltr] .md-header__source{margin-left:1.4rem}[dir=rtl] .md-header__source{margin-right:1.4rem}}.md-meta{color:var(--md-default-fg-color--light);font-size:.7rem;line-height:1.3}.md-meta__list{display:inline-flex;flex-wrap:wrap;list-style:none;margin:0;padding:0}.md-meta__item:not(:last-child):after{content:"·";margin-left:.2rem;margin-right:.2rem}.md-meta__link{color:var(--md-typeset-a-color)}.md-meta__link:focus,.md-meta__link:hover{color:var(--md-accent-fg-color)}.md-draft{background-color:#ff1744;border-radius:.125em;color:#fff;display:inline-block;font-weight:700;padding-left:.5714285714em;padding-right:.5714285714em}:root{--md-nav-icon--prev:url('data:image/svg+xml;charset=utf-8,');--md-nav-icon--next:url('data:image/svg+xml;charset=utf-8,');--md-toc-icon:url('data:image/svg+xml;charset=utf-8,')}.md-nav{font-size:.7rem;line-height:1.3}.md-nav__title{color:var(--md-default-fg-color--light);display:block;font-weight:700;overflow:hidden;padding:0 .6rem;text-overflow:ellipsis}.md-nav__title .md-nav__button{display:none}.md-nav__title .md-nav__button img{height:100%;width:auto}.md-nav__title .md-nav__button.md-logo img,.md-nav__title .md-nav__button.md-logo svg{fill:currentcolor;display:block;height:2.4rem;max-width:100%;object-fit:contain;width:auto}.md-nav__list{list-style:none;margin:0;padding:0}.md-nav__item{padding:0 .6rem}[dir=ltr] .md-nav__item .md-nav__item{padding-right:0}[dir=rtl] .md-nav__item .md-nav__item{padding-left:0}.md-nav__link{align-items:flex-start;display:flex;margin-top:.625em;scroll-snap-align:start;transition:color 125ms}.md-nav__link--passed{color:var(--md-default-fg-color--light)}.md-nav__item .md-nav__link--active,.md-nav__item .md-nav__link--active code{color:var(--md-typeset-a-color)}.md-nav__link .md-ellipsis{position:relative}.md-nav__link .md-icon:last-child{margin-left:auto}.md-nav__link svg{fill:currentcolor;flex-shrink:0;height:1.3em}[dir=ltr] .md-nav__link svg+*{margin-left:.4rem}[dir=rtl] .md-nav__link svg+*{margin-right:.4rem}.md-nav__link:not(.md-nav__container):focus,.md-nav__link:not(.md-nav__container):hover{color:var(--md-accent-fg-color);cursor:pointer}.md-nav__link.focus-visible{outline-color:var(--md-accent-fg-color);outline-offset:.2rem}.md-nav--primary .md-nav__link[for=__toc]{display:none}.md-nav--primary .md-nav__link[for=__toc] .md-icon:after{background-color:currentcolor;display:block;height:100%;-webkit-mask-image:var(--md-toc-icon);mask-image:var(--md-toc-icon);width:100%}.md-nav--primary .md-nav__link[for=__toc]~.md-nav{display:none}.md-nav__container>.md-nav__link{margin-top:0}.md-nav__container>.md-nav__link:first-child{flex-grow:1}.md-nav__icon{flex-shrink:0}.md-nav__source{display:none}@media screen and (max-width:76.1875em){.md-nav--primary,.md-nav--primary .md-nav{background-color:var(--md-default-bg-color);display:flex;flex-direction:column;height:100%;left:0;position:absolute;right:0;top:0;z-index:1}.md-nav--primary .md-nav__item,.md-nav--primary .md-nav__title{font-size:.8rem;line-height:1.5}.md-nav--primary .md-nav__title{background-color:var(--md-default-fg-color--lightest);color:var(--md-default-fg-color--light);cursor:pointer;height:5.6rem;line-height:2.4rem;padding:3rem .8rem .2rem;position:relative;white-space:nowrap}[dir=ltr] .md-nav--primary .md-nav__title .md-nav__icon{left:.4rem}[dir=rtl] .md-nav--primary .md-nav__title .md-nav__icon{right:.4rem}.md-nav--primary .md-nav__title .md-nav__icon{display:block;height:1.2rem;margin:.2rem;position:absolute;top:.4rem;width:1.2rem}.md-nav--primary .md-nav__title .md-nav__icon:after{background-color:currentcolor;content:"";display:block;height:100%;-webkit-mask-image:var(--md-nav-icon--prev);mask-image:var(--md-nav-icon--prev);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:100%}.md-nav--primary .md-nav__title~.md-nav__list{background-color:var(--md-default-bg-color);box-shadow:0 .05rem 0 var(--md-default-fg-color--lightest) inset;overflow-y:auto;scroll-snap-type:y mandatory;touch-action:pan-y}.md-nav--primary .md-nav__title~.md-nav__list>:first-child{border-top:0}.md-nav--primary .md-nav__title[for=__drawer]{background-color:var(--md-primary-fg-color);color:var(--md-primary-bg-color);font-weight:700}.md-nav--primary .md-nav__title .md-logo{display:block;left:.2rem;margin:.2rem;padding:.4rem;position:absolute;right:.2rem;top:.2rem}.md-nav--primary .md-nav__list{flex:1}.md-nav--primary .md-nav__item{border-top:.05rem solid var(--md-default-fg-color--lightest);padding:0}.md-nav--primary .md-nav__item--active>.md-nav__link{color:var(--md-typeset-a-color)}.md-nav--primary .md-nav__item--active>.md-nav__link:focus,.md-nav--primary .md-nav__item--active>.md-nav__link:hover{color:var(--md-accent-fg-color)}.md-nav--primary .md-nav__link{margin-top:0;padding:.6rem .8rem}.md-nav--primary .md-nav__link svg{margin-top:.1em}.md-nav--primary .md-nav__link>.md-nav__link{padding:0}[dir=ltr] .md-nav--primary .md-nav__link .md-nav__icon{margin-right:-.2rem}[dir=rtl] .md-nav--primary .md-nav__link .md-nav__icon{margin-left:-.2rem}.md-nav--primary .md-nav__link .md-nav__icon{font-size:1.2rem;height:1.2rem;width:1.2rem}.md-nav--primary .md-nav__link .md-nav__icon:after{background-color:currentcolor;content:"";display:block;height:100%;-webkit-mask-image:var(--md-nav-icon--next);mask-image:var(--md-nav-icon--next);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:100%}[dir=rtl] .md-nav--primary .md-nav__icon:after{transform:scale(-1)}.md-nav--primary .md-nav--secondary .md-nav{background-color:initial;position:static}[dir=ltr] .md-nav--primary .md-nav--secondary .md-nav .md-nav__link{padding-left:1.4rem}[dir=rtl] .md-nav--primary .md-nav--secondary .md-nav .md-nav__link{padding-right:1.4rem}[dir=ltr] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav__link{padding-left:2rem}[dir=rtl] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav__link{padding-right:2rem}[dir=ltr] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav__link{padding-left:2.6rem}[dir=rtl] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav__link{padding-right:2.6rem}[dir=ltr] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav .md-nav__link{padding-left:3.2rem}[dir=rtl] .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav .md-nav__link{padding-right:3.2rem}.md-nav--secondary{background-color:initial}.md-nav__toggle~.md-nav{display:flex;opacity:0;transform:translateX(100%);transition:transform .25s cubic-bezier(.8,0,.6,1),opacity 125ms 50ms}[dir=rtl] .md-nav__toggle~.md-nav{transform:translateX(-100%)}.md-nav__toggle:checked~.md-nav{opacity:1;transform:translateX(0);transition:transform .25s cubic-bezier(.4,0,.2,1),opacity 125ms 125ms}.md-nav__toggle:checked~.md-nav>.md-nav__list{-webkit-backface-visibility:hidden;backface-visibility:hidden}}@media screen and (max-width:59.9375em){.md-nav--primary .md-nav__link[for=__toc]{display:flex}.md-nav--primary .md-nav__link[for=__toc] .md-icon:after{content:""}.md-nav--primary .md-nav__link[for=__toc]+.md-nav__link{display:none}.md-nav--primary .md-nav__link[for=__toc]~.md-nav{display:flex}.md-nav__source{background-color:var(--md-primary-fg-color--dark);color:var(--md-primary-bg-color);display:block;padding:0 .2rem}}@media screen and (min-width:60em) and (max-width:76.1875em){.md-nav--integrated .md-nav__link[for=__toc]{display:flex}.md-nav--integrated .md-nav__link[for=__toc] .md-icon:after{content:""}.md-nav--integrated .md-nav__link[for=__toc]+.md-nav__link{display:none}.md-nav--integrated .md-nav__link[for=__toc]~.md-nav{display:flex}}@media screen and (min-width:60em){.md-nav--secondary .md-nav__title{background:var(--md-default-bg-color);box-shadow:0 0 .4rem .4rem var(--md-default-bg-color);position:sticky;top:0;z-index:1}.md-nav--secondary .md-nav__title[for=__toc]{scroll-snap-align:start}.md-nav--secondary .md-nav__title .md-nav__icon{display:none}}@media screen and (min-width:76.25em){.md-nav{transition:max-height .25s cubic-bezier(.86,0,.07,1)}.md-nav--primary .md-nav__title{background:var(--md-default-bg-color);box-shadow:0 0 .4rem .4rem var(--md-default-bg-color);position:sticky;top:0;z-index:1}.md-nav--primary .md-nav__title[for=__drawer]{scroll-snap-align:start}.md-nav--primary .md-nav__title .md-nav__icon,.md-nav__toggle~.md-nav{display:none}.md-nav__toggle:checked~.md-nav,.md-nav__toggle:indeterminate~.md-nav{display:block}.md-nav__item--nested>.md-nav>.md-nav__title{display:none}.md-nav__item--section{display:block;margin:1.25em 0}.md-nav__item--section:last-child{margin-bottom:0}.md-nav__item--section>.md-nav__link{font-weight:700}.md-nav__item--section>.md-nav__link[for]{color:var(--md-default-fg-color--light)}.md-nav__item--section>.md-nav__link:not(.md-nav__container){pointer-events:none}.md-nav__item--section>.md-nav__link .md-nav__icon{display:none}.md-nav__item--section>.md-nav{display:block}.md-nav__item--section>.md-nav>.md-nav__list>.md-nav__item{padding:0}.md-nav__icon{border-radius:100%;height:.9rem;transition:background-color .25s;width:.9rem}.md-nav__icon:hover{background-color:var(--md-accent-fg-color--transparent)}.md-nav__icon:after{background-color:currentcolor;border-radius:100%;content:"";display:inline-block;height:100%;-webkit-mask-image:var(--md-nav-icon--next);mask-image:var(--md-nav-icon--next);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:transform .25s;vertical-align:-.1rem;width:100%}[dir=rtl] .md-nav__icon:after{transform:rotate(180deg)}.md-nav__item--nested .md-nav__toggle:checked~.md-nav__link .md-nav__icon:after,.md-nav__item--nested .md-nav__toggle:indeterminate~.md-nav__link .md-nav__icon:after{transform:rotate(90deg)}.md-nav--lifted>.md-nav__list>.md-nav__item,.md-nav--lifted>.md-nav__list>.md-nav__item--nested,.md-nav--lifted>.md-nav__title{display:none}.md-nav--lifted>.md-nav__list>.md-nav__item--active{display:block;padding:0}.md-nav--lifted>.md-nav__list>.md-nav__item--active>.md-nav__link{background:var(--md-default-bg-color);box-shadow:0 0 .4rem .4rem var(--md-default-bg-color);font-weight:700;margin-top:0;padding:0 .6rem;position:sticky;top:0;z-index:1}.md-nav--lifted>.md-nav__list>.md-nav__item--active>.md-nav__link:not(.md-nav__container){pointer-events:none}.md-nav--lifted>.md-nav__list>.md-nav__item--active>.md-nav__link .md-nav__icon{display:none}.md-nav--lifted>.md-nav__list>.md-nav__item>[for]{color:var(--md-default-fg-color--light)}.md-nav--lifted .md-nav[data-md-level="1"]{display:block}[dir=ltr] .md-nav--lifted .md-nav[data-md-level="1"]>.md-nav__list>.md-nav__item{padding-right:.6rem}[dir=rtl] .md-nav--lifted .md-nav[data-md-level="1"]>.md-nav__list>.md-nav__item{padding-left:.6rem}.md-nav--integrated>.md-nav__list>.md-nav__item--active:not(.md-nav__item--nested){padding:0 .6rem}.md-nav--integrated>.md-nav__list>.md-nav__item--active:not(.md-nav__item--nested)>.md-nav__link{padding:0}[dir=ltr] .md-nav--integrated>.md-nav__list>.md-nav__item--active .md-nav--secondary{border-left:.05rem solid var(--md-primary-fg-color)}[dir=rtl] .md-nav--integrated>.md-nav__list>.md-nav__item--active .md-nav--secondary{border-right:.05rem solid var(--md-primary-fg-color)}.md-nav--integrated>.md-nav__list>.md-nav__item--active .md-nav--secondary{display:block;margin-bottom:1.25em}.md-nav--integrated>.md-nav__list>.md-nav__item--active .md-nav--secondary>.md-nav__title{display:none}}.md-pagination{font-size:.8rem;font-weight:700;gap:.4rem}.md-pagination,.md-pagination>*{align-items:center;display:flex;justify-content:center}.md-pagination>*{border-radius:.2rem;height:1.8rem;min-width:1.8rem;text-align:center}.md-pagination__current{background-color:var(--md-default-fg-color--lightest);color:var(--md-default-fg-color--light)}.md-pagination__link{transition:color 125ms,background-color 125ms}.md-pagination__link:focus,.md-pagination__link:hover{background-color:var(--md-accent-fg-color--transparent);color:var(--md-accent-fg-color)}.md-pagination__link:focus svg,.md-pagination__link:hover svg{color:var(--md-accent-fg-color)}.md-pagination__link.focus-visible{outline-color:var(--md-accent-fg-color);outline-offset:.2rem}.md-pagination__link svg{fill:currentcolor;color:var(--md-default-fg-color--lighter);display:block;max-height:100%;width:1.2rem}.md-post__back{border-bottom:.05rem solid var(--md-default-fg-color--lightest);margin-bottom:1.2rem;padding-bottom:1.2rem}@media screen and (max-width:76.1875em){.md-post__back{display:none}}[dir=rtl] .md-post__back svg{transform:scaleX(-1)}.md-post__authors{display:flex;flex-direction:column;gap:.6rem;margin:0 .6rem}.md-post .md-post__meta a{transition:color 125ms}.md-post .md-post__meta a:focus,.md-post .md-post__meta a:hover{color:var(--md-accent-fg-color)}.md-post--excerpt{margin-bottom:3.2rem}.md-post--excerpt .md-post__header{align-items:center;display:flex;gap:.6rem;min-height:1.6rem}.md-post--excerpt .md-post__authors{align-items:center;display:inline-flex;flex-direction:row;gap:.2rem;margin:0;min-height:2.4rem}[dir=ltr] .md-post--excerpt .md-post__meta .md-meta__list{margin-right:.4rem}[dir=rtl] .md-post--excerpt .md-post__meta .md-meta__list{margin-left:.4rem}.md-post--excerpt .md-post__content>:first-child{--md-scroll-margin:6rem;margin-top:0}.md-post>.md-nav--secondary,.md-post>.md-nav:first-child>.md-nav__list{margin:1em 0}.md-profile{align-items:center;display:flex;font-size:.7rem;gap:.6rem;line-height:1.4;width:100%}.md-profile__description{flex-grow:1}.md-content--post{display:flex}@media screen and (max-width:76.1875em){.md-content--post{flex-flow:column-reverse}}.md-content--post>.md-content__inner{min-width:0}@media screen and (min-width:76.25em){[dir=ltr] .md-content--post>.md-content__inner{margin-left:1.2rem}[dir=rtl] .md-content--post>.md-content__inner{margin-right:1.2rem}}@media screen and (max-width:76.1875em){.md-sidebar.md-sidebar--post{padding:0}}:root{--md-search-result-icon:url('data:image/svg+xml;charset=utf-8,')}.md-search{position:relative}@media screen and (min-width:60em){.md-search{padding:.2rem 0}}.no-js .md-search{display:none}.md-search__overlay{opacity:0;z-index:1}@media screen and (max-width:59.9375em){[dir=ltr] .md-search__overlay{left:-2.2rem}[dir=rtl] .md-search__overlay{right:-2.2rem}.md-search__overlay{background-color:var(--md-default-bg-color);border-radius:1rem;height:2rem;overflow:hidden;pointer-events:none;position:absolute;top:-1rem;transform-origin:center;transition:transform .3s .1s,opacity .2s .2s;width:2rem}[data-md-toggle=search]:checked~.md-header .md-search__overlay{opacity:1;transition:transform .4s,opacity .1s}}@media screen and (min-width:60em){[dir=ltr] .md-search__overlay{left:0}[dir=rtl] .md-search__overlay{right:0}.md-search__overlay{background-color:#0000008a;cursor:pointer;height:0;position:fixed;top:0;transition:width 0ms .25s,height 0ms .25s,opacity .25s;width:0}[data-md-toggle=search]:checked~.md-header .md-search__overlay{height:200vh;opacity:1;transition:width 0ms,height 0ms,opacity .25s;width:100%}}@media screen and (max-width:29.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{transform:scale(45)}}@media screen and (min-width:30em) and (max-width:44.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{transform:scale(60)}}@media screen and (min-width:45em) and (max-width:59.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{transform:scale(75)}}.md-search__inner{-webkit-backface-visibility:hidden;backface-visibility:hidden}@media screen and (max-width:59.9375em){[dir=ltr] .md-search__inner{left:0}[dir=rtl] .md-search__inner{right:0}.md-search__inner{height:0;opacity:0;overflow:hidden;position:fixed;top:0;transform:translateX(5%);transition:width 0ms .3s,height 0ms .3s,transform .15s cubic-bezier(.4,0,.2,1) .15s,opacity .15s .15s;width:0;z-index:2}[dir=rtl] .md-search__inner{transform:translateX(-5%)}[data-md-toggle=search]:checked~.md-header .md-search__inner{height:100%;opacity:1;transform:translateX(0);transition:width 0ms 0ms,height 0ms 0ms,transform .15s cubic-bezier(.1,.7,.1,1) .15s,opacity .15s .15s;width:100%}}@media screen and (min-width:60em){[dir=ltr] .md-search__inner{float:right}[dir=rtl] .md-search__inner{float:left}.md-search__inner{padding:.1rem 0;position:relative;transition:width .25s cubic-bezier(.1,.7,.1,1);width:11.7rem}}@media screen and (min-width:60em) and (max-width:76.1875em){[data-md-toggle=search]:checked~.md-header .md-search__inner{width:23.4rem}}@media screen and (min-width:76.25em){[data-md-toggle=search]:checked~.md-header .md-search__inner{width:34.4rem}}.md-search__form{background-color:var(--md-default-bg-color);box-shadow:0 0 .6rem #0000;height:2.4rem;position:relative;transition:color .25s,background-color .25s;z-index:2}@media screen and (min-width:60em){.md-search__form{background-color:#00000042;border-radius:.1rem;height:1.8rem}.md-search__form:hover{background-color:#ffffff1f}}[data-md-toggle=search]:checked~.md-header .md-search__form{background-color:var(--md-default-bg-color);border-radius:.1rem .1rem 0 0;box-shadow:0 0 .6rem #00000012;color:var(--md-default-fg-color)}[dir=ltr] .md-search__input{padding-left:3.6rem;padding-right:2.2rem}[dir=rtl] .md-search__input{padding-left:2.2rem;padding-right:3.6rem}.md-search__input{background:#0000;font-size:.9rem;height:100%;position:relative;text-overflow:ellipsis;width:100%;z-index:2}.md-search__input::placeholder{transition:color .25s}.md-search__input::placeholder,.md-search__input~.md-search__icon{color:var(--md-default-fg-color--light)}.md-search__input::-ms-clear{display:none}@media screen and (max-width:59.9375em){.md-search__input{font-size:.9rem;height:2.4rem;width:100%}}@media screen and (min-width:60em){[dir=ltr] .md-search__input{padding-left:2.2rem}[dir=rtl] .md-search__input{padding-right:2.2rem}.md-search__input{color:inherit;font-size:.8rem}.md-search__input::placeholder{color:var(--md-primary-bg-color--light)}.md-search__input+.md-search__icon{color:var(--md-primary-bg-color)}[data-md-toggle=search]:checked~.md-header .md-search__input{text-overflow:clip}[data-md-toggle=search]:checked~.md-header .md-search__input+.md-search__icon{color:var(--md-default-fg-color--light)}[data-md-toggle=search]:checked~.md-header .md-search__input::placeholder{color:#0000}}.md-search__icon{cursor:pointer;display:inline-block;height:1.2rem;transition:color .25s,opacity .25s;width:1.2rem}.md-search__icon:hover{opacity:.7}[dir=ltr] .md-search__icon[for=__search]{left:.5rem}[dir=rtl] .md-search__icon[for=__search]{right:.5rem}.md-search__icon[for=__search]{position:absolute;top:.3rem;z-index:2}[dir=rtl] .md-search__icon[for=__search] svg{transform:scaleX(-1)}@media screen and (max-width:59.9375em){[dir=ltr] .md-search__icon[for=__search]{left:.8rem}[dir=rtl] .md-search__icon[for=__search]{right:.8rem}.md-search__icon[for=__search]{top:.6rem}.md-search__icon[for=__search] svg:first-child{display:none}}@media screen and (min-width:60em){.md-search__icon[for=__search]{pointer-events:none}.md-search__icon[for=__search] svg:last-child{display:none}}[dir=ltr] .md-search__options{right:.5rem}[dir=rtl] .md-search__options{left:.5rem}.md-search__options{pointer-events:none;position:absolute;top:.3rem;z-index:2}@media screen and (max-width:59.9375em){[dir=ltr] .md-search__options{right:.8rem}[dir=rtl] .md-search__options{left:.8rem}.md-search__options{top:.6rem}}[dir=ltr] .md-search__options>.md-icon{margin-left:.2rem}[dir=rtl] .md-search__options>.md-icon{margin-right:.2rem}.md-search__options>.md-icon{color:var(--md-default-fg-color--light);opacity:0;transform:scale(.75);transition:transform .15s cubic-bezier(.1,.7,.1,1),opacity .15s}.md-search__options>.md-icon:not(.focus-visible){-webkit-tap-highlight-color:transparent;outline:none}[data-md-toggle=search]:checked~.md-header .md-search__input:valid~.md-search__options>.md-icon{opacity:1;pointer-events:auto;transform:scale(1)}[data-md-toggle=search]:checked~.md-header .md-search__input:valid~.md-search__options>.md-icon:hover{opacity:.7}[dir=ltr] .md-search__suggest{padding-left:3.6rem;padding-right:2.2rem}[dir=rtl] .md-search__suggest{padding-left:2.2rem;padding-right:3.6rem}.md-search__suggest{align-items:center;color:var(--md-default-fg-color--lighter);display:flex;font-size:.9rem;height:100%;opacity:0;position:absolute;top:0;transition:opacity 50ms;white-space:nowrap;width:100%}@media screen and (min-width:60em){[dir=ltr] .md-search__suggest{padding-left:2.2rem}[dir=rtl] .md-search__suggest{padding-right:2.2rem}.md-search__suggest{font-size:.8rem}}[data-md-toggle=search]:checked~.md-header .md-search__suggest{opacity:1;transition:opacity .3s .1s}[dir=ltr] .md-search__output{border-bottom-left-radius:.1rem}[dir=ltr] .md-search__output,[dir=rtl] .md-search__output{border-bottom-right-radius:.1rem}[dir=rtl] .md-search__output{border-bottom-left-radius:.1rem}.md-search__output{overflow:hidden;position:absolute;width:100%;z-index:1}@media screen and (max-width:59.9375em){.md-search__output{bottom:0;top:2.4rem}}@media screen and (min-width:60em){.md-search__output{opacity:0;top:1.9rem;transition:opacity .4s}[data-md-toggle=search]:checked~.md-header .md-search__output{box-shadow:var(--md-shadow-z3);opacity:1}}.md-search__scrollwrap{-webkit-backface-visibility:hidden;backface-visibility:hidden;background-color:var(--md-default-bg-color);height:100%;overflow-y:auto;touch-action:pan-y}@media (-webkit-max-device-pixel-ratio:1),(max-resolution:1dppx){.md-search__scrollwrap{transform:translateZ(0)}}@media screen and (min-width:60em) and (max-width:76.1875em){.md-search__scrollwrap{width:23.4rem}}@media screen and (min-width:76.25em){.md-search__scrollwrap{width:34.4rem}}@media screen and (min-width:60em){.md-search__scrollwrap{max-height:0;scrollbar-color:var(--md-default-fg-color--lighter) #0000;scrollbar-width:thin}[data-md-toggle=search]:checked~.md-header .md-search__scrollwrap{max-height:75vh}.md-search__scrollwrap:hover{scrollbar-color:var(--md-accent-fg-color) #0000}.md-search__scrollwrap::-webkit-scrollbar{height:.2rem;width:.2rem}.md-search__scrollwrap::-webkit-scrollbar-thumb{background-color:var(--md-default-fg-color--lighter)}.md-search__scrollwrap::-webkit-scrollbar-thumb:hover{background-color:var(--md-accent-fg-color)}}.md-search-result{color:var(--md-default-fg-color);word-break:break-word}.md-search-result__meta{background-color:var(--md-default-fg-color--lightest);color:var(--md-default-fg-color--light);font-size:.64rem;line-height:1.8rem;padding:0 .8rem;scroll-snap-align:start}@media screen and (min-width:60em){[dir=ltr] .md-search-result__meta{padding-left:2.2rem}[dir=rtl] .md-search-result__meta{padding-right:2.2rem}}.md-search-result__list{list-style:none;margin:0;padding:0;-webkit-user-select:none;user-select:none}.md-search-result__item{box-shadow:0 -.05rem var(--md-default-fg-color--lightest)}.md-search-result__item:first-child{box-shadow:none}.md-search-result__link{display:block;outline:none;scroll-snap-align:start;transition:background-color .25s}.md-search-result__link:focus,.md-search-result__link:hover{background-color:var(--md-accent-fg-color--transparent)}.md-search-result__link:last-child p:last-child{margin-bottom:.6rem}.md-search-result__more>summary{cursor:pointer;display:block;outline:none;position:sticky;scroll-snap-align:start;top:0;z-index:1}.md-search-result__more>summary::marker{display:none}.md-search-result__more>summary::-webkit-details-marker{display:none}.md-search-result__more>summary>div{color:var(--md-typeset-a-color);font-size:.64rem;padding:.75em .8rem;transition:color .25s,background-color .25s}@media screen and (min-width:60em){[dir=ltr] .md-search-result__more>summary>div{padding-left:2.2rem}[dir=rtl] .md-search-result__more>summary>div{padding-right:2.2rem}}.md-search-result__more>summary:focus>div,.md-search-result__more>summary:hover>div{background-color:var(--md-accent-fg-color--transparent);color:var(--md-accent-fg-color)}.md-search-result__more[open]>summary{background-color:var(--md-default-bg-color)}.md-search-result__article{overflow:hidden;padding:0 .8rem;position:relative}@media screen and (min-width:60em){[dir=ltr] .md-search-result__article{padding-left:2.2rem}[dir=rtl] .md-search-result__article{padding-right:2.2rem}}[dir=ltr] .md-search-result__icon{left:0}[dir=rtl] .md-search-result__icon{right:0}.md-search-result__icon{color:var(--md-default-fg-color--light);height:1.2rem;margin:.5rem;position:absolute;width:1.2rem}@media screen and (max-width:59.9375em){.md-search-result__icon{display:none}}.md-search-result__icon:after{background-color:currentcolor;content:"";display:inline-block;height:100%;-webkit-mask-image:var(--md-search-result-icon);mask-image:var(--md-search-result-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:100%}[dir=rtl] .md-search-result__icon:after{transform:scaleX(-1)}.md-search-result .md-typeset{color:var(--md-default-fg-color--light);font-size:.64rem;line-height:1.6}.md-search-result .md-typeset h1{color:var(--md-default-fg-color);font-size:.8rem;font-weight:400;line-height:1.4;margin:.55rem 0}.md-search-result .md-typeset h1 mark{text-decoration:none}.md-search-result .md-typeset h2{color:var(--md-default-fg-color);font-size:.64rem;font-weight:700;line-height:1.6;margin:.5em 0}.md-search-result .md-typeset h2 mark{text-decoration:none}.md-search-result__terms{color:var(--md-default-fg-color);display:block;font-size:.64rem;font-style:italic;margin:.5em 0}.md-search-result mark{background-color:initial;color:var(--md-accent-fg-color);text-decoration:underline}.md-select{position:relative;z-index:1}.md-select__inner{background-color:var(--md-default-bg-color);border-radius:.1rem;box-shadow:var(--md-shadow-z2);color:var(--md-default-fg-color);left:50%;margin-top:.2rem;max-height:0;opacity:0;position:absolute;top:calc(100% - .2rem);transform:translate3d(-50%,.3rem,0);transition:transform .25s 375ms,opacity .25s .25s,max-height 0ms .5s}.md-select:focus-within .md-select__inner,.md-select:hover .md-select__inner{max-height:10rem;opacity:1;transform:translate3d(-50%,0,0);transition:transform .25s cubic-bezier(.1,.7,.1,1),opacity .25s,max-height 0ms}.md-select__inner:after{border-bottom:.2rem solid #0000;border-bottom-color:var(--md-default-bg-color);border-left:.2rem solid #0000;border-right:.2rem solid #0000;border-top:0;content:"";height:0;left:50%;margin-left:-.2rem;margin-top:-.2rem;position:absolute;top:0;width:0}.md-select__list{border-radius:.1rem;font-size:.8rem;list-style-type:none;margin:0;max-height:inherit;overflow:auto;padding:0}.md-select__item{line-height:1.8rem}[dir=ltr] .md-select__link{padding-left:.6rem;padding-right:1.2rem}[dir=rtl] .md-select__link{padding-left:1.2rem;padding-right:.6rem}.md-select__link{cursor:pointer;display:block;outline:none;scroll-snap-align:start;transition:background-color .25s,color .25s;width:100%}.md-select__link:focus,.md-select__link:hover{color:var(--md-accent-fg-color)}.md-select__link:focus{background-color:var(--md-default-fg-color--lightest)}.md-sidebar{align-self:flex-start;flex-shrink:0;padding:1.2rem 0;position:sticky;top:2.4rem;width:12.1rem}@media print{.md-sidebar{display:none}}@media screen and (max-width:76.1875em){[dir=ltr] .md-sidebar--primary{left:-12.1rem}[dir=rtl] .md-sidebar--primary{right:-12.1rem}.md-sidebar--primary{background-color:var(--md-default-bg-color);display:block;height:100%;position:fixed;top:0;transform:translateX(0);transition:transform .25s cubic-bezier(.4,0,.2,1),box-shadow .25s;width:12.1rem;z-index:5}[data-md-toggle=drawer]:checked~.md-container .md-sidebar--primary{box-shadow:var(--md-shadow-z3);transform:translateX(12.1rem)}[dir=rtl] [data-md-toggle=drawer]:checked~.md-container .md-sidebar--primary{transform:translateX(-12.1rem)}.md-sidebar--primary .md-sidebar__scrollwrap{bottom:0;left:0;margin:0;overflow:hidden;position:absolute;right:0;scroll-snap-type:none;top:0}}@media screen and (min-width:76.25em){.md-sidebar{height:0}.no-js .md-sidebar{height:auto}.md-header--lifted~.md-container .md-sidebar{top:4.8rem}}.md-sidebar--secondary{display:none;order:2}@media screen and (min-width:60em){.md-sidebar--secondary{height:0}.no-js .md-sidebar--secondary{height:auto}.md-sidebar--secondary:not([hidden]){display:block}.md-sidebar--secondary .md-sidebar__scrollwrap{touch-action:pan-y}}.md-sidebar__scrollwrap{scrollbar-gutter:stable;-webkit-backface-visibility:hidden;backface-visibility:hidden;margin:0 .2rem;overflow-y:auto;scrollbar-color:var(--md-default-fg-color--lighter) #0000;scrollbar-width:thin}.md-sidebar__scrollwrap::-webkit-scrollbar{height:.2rem;width:.2rem}.md-sidebar__scrollwrap:focus-within,.md-sidebar__scrollwrap:hover{scrollbar-color:var(--md-accent-fg-color) #0000}.md-sidebar__scrollwrap:focus-within::-webkit-scrollbar-thumb,.md-sidebar__scrollwrap:hover::-webkit-scrollbar-thumb{background-color:var(--md-default-fg-color--lighter)}.md-sidebar__scrollwrap:focus-within::-webkit-scrollbar-thumb:hover,.md-sidebar__scrollwrap:hover::-webkit-scrollbar-thumb:hover{background-color:var(--md-accent-fg-color)}@supports selector(::-webkit-scrollbar){.md-sidebar__scrollwrap{scrollbar-gutter:auto}[dir=ltr] .md-sidebar__inner{padding-right:calc(100% - 11.5rem)}[dir=rtl] .md-sidebar__inner{padding-left:calc(100% - 11.5rem)}}@media screen and (max-width:76.1875em){.md-overlay{background-color:#0000008a;height:0;opacity:0;position:fixed;top:0;transition:width 0ms .25s,height 0ms .25s,opacity .25s;width:0;z-index:5}[data-md-toggle=drawer]:checked~.md-overlay{height:100%;opacity:1;transition:width 0ms,height 0ms,opacity .25s;width:100%}}@keyframes facts{0%{height:0}to{height:.65rem}}@keyframes fact{0%{opacity:0;transform:translateY(100%)}50%{opacity:0}to{opacity:1;transform:translateY(0)}}:root{--md-source-forks-icon:url('data:image/svg+xml;charset=utf-8,');--md-source-repositories-icon:url('data:image/svg+xml;charset=utf-8,');--md-source-stars-icon:url('data:image/svg+xml;charset=utf-8,');--md-source-version-icon:url('data:image/svg+xml;charset=utf-8,')}.md-source{-webkit-backface-visibility:hidden;backface-visibility:hidden;display:block;font-size:.65rem;line-height:1.2;outline-color:var(--md-accent-fg-color);transition:opacity .25s;white-space:nowrap}.md-source:hover{opacity:.7}.md-source__icon{display:inline-block;height:2.4rem;vertical-align:middle;width:2rem}[dir=ltr] .md-source__icon svg{margin-left:.6rem}[dir=rtl] .md-source__icon svg{margin-right:.6rem}.md-source__icon svg{margin-top:.6rem}[dir=ltr] .md-source__icon+.md-source__repository{padding-left:2rem}[dir=rtl] .md-source__icon+.md-source__repository{padding-right:2rem}[dir=ltr] .md-source__icon+.md-source__repository{margin-left:-2rem}[dir=rtl] .md-source__icon+.md-source__repository{margin-right:-2rem}[dir=ltr] .md-source__repository{margin-left:.6rem}[dir=rtl] .md-source__repository{margin-right:.6rem}.md-source__repository{display:inline-block;max-width:calc(100% - 1.2rem);overflow:hidden;text-overflow:ellipsis;vertical-align:middle}.md-source__facts{display:flex;font-size:.55rem;gap:.4rem;list-style-type:none;margin:.1rem 0 0;opacity:.75;overflow:hidden;padding:0;width:100%}.md-source__repository--active .md-source__facts{animation:facts .25s ease-in}.md-source__fact{overflow:hidden;text-overflow:ellipsis}.md-source__repository--active .md-source__fact{animation:fact .4s ease-out}[dir=ltr] .md-source__fact:before{margin-right:.1rem}[dir=rtl] .md-source__fact:before{margin-left:.1rem}.md-source__fact:before{background-color:currentcolor;content:"";display:inline-block;height:.6rem;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;vertical-align:text-top;width:.6rem}.md-source__fact:nth-child(1n+2){flex-shrink:0}.md-source__fact--version:before{-webkit-mask-image:var(--md-source-version-icon);mask-image:var(--md-source-version-icon)}.md-source__fact--stars:before{-webkit-mask-image:var(--md-source-stars-icon);mask-image:var(--md-source-stars-icon)}.md-source__fact--forks:before{-webkit-mask-image:var(--md-source-forks-icon);mask-image:var(--md-source-forks-icon)}.md-source__fact--repositories:before{-webkit-mask-image:var(--md-source-repositories-icon);mask-image:var(--md-source-repositories-icon)}:root{--md-status:url('data:image/svg+xml;charset=utf-8,');--md-status--new:url('data:image/svg+xml;charset=utf-8,');--md-status--deprecated:url('data:image/svg+xml;charset=utf-8,');--md-status--encrypted:url('data:image/svg+xml;charset=utf-8,')}.md-status{margin-left:.2rem}.md-status:after{background-color:var(--md-default-fg-color--light);content:"";display:inline-block;height:1.125em;-webkit-mask-image:var(--md-status);mask-image:var(--md-status);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;vertical-align:text-bottom;width:1.125em}.md-status:hover:after{background-color:currentcolor}.md-status--new:after{-webkit-mask-image:var(--md-status--new);mask-image:var(--md-status--new)}.md-status--deprecated:after{-webkit-mask-image:var(--md-status--deprecated);mask-image:var(--md-status--deprecated)}.md-status--encrypted:after{-webkit-mask-image:var(--md-status--encrypted);mask-image:var(--md-status--encrypted)}.md-tabs{background-color:var(--md-primary-fg-color);color:var(--md-primary-bg-color);display:block;line-height:1.3;overflow:auto;width:100%;z-index:3}@media print{.md-tabs{display:none}}@media screen and (max-width:76.1875em){.md-tabs{display:none}}.md-tabs[hidden]{pointer-events:none}[dir=ltr] .md-tabs__list{margin-left:.2rem}[dir=rtl] .md-tabs__list{margin-right:.2rem}.md-tabs__list{contain:content;display:flex;list-style:none;margin:0;overflow:auto;padding:0;scrollbar-width:none;white-space:nowrap}.md-tabs__list::-webkit-scrollbar{display:none}.md-tabs__item{height:2.4rem;padding-left:.6rem;padding-right:.6rem}.md-tabs__item--active .md-tabs__link{color:inherit;opacity:1}.md-tabs__link{-webkit-backface-visibility:hidden;backface-visibility:hidden;display:flex;font-size:.7rem;margin-top:.8rem;opacity:.7;outline-color:var(--md-accent-fg-color);outline-offset:.2rem;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .25s}.md-tabs__link:focus,.md-tabs__link:hover{color:inherit;opacity:1}[dir=ltr] .md-tabs__link svg{margin-right:.4rem}[dir=rtl] .md-tabs__link svg{margin-left:.4rem}.md-tabs__link svg{fill:currentcolor;height:1.3em}.md-tabs__item:nth-child(2) .md-tabs__link{transition-delay:20ms}.md-tabs__item:nth-child(3) .md-tabs__link{transition-delay:40ms}.md-tabs__item:nth-child(4) .md-tabs__link{transition-delay:60ms}.md-tabs__item:nth-child(5) .md-tabs__link{transition-delay:80ms}.md-tabs__item:nth-child(6) .md-tabs__link{transition-delay:.1s}.md-tabs__item:nth-child(7) .md-tabs__link{transition-delay:.12s}.md-tabs__item:nth-child(8) .md-tabs__link{transition-delay:.14s}.md-tabs__item:nth-child(9) .md-tabs__link{transition-delay:.16s}.md-tabs__item:nth-child(10) .md-tabs__link{transition-delay:.18s}.md-tabs__item:nth-child(11) .md-tabs__link{transition-delay:.2s}.md-tabs__item:nth-child(12) .md-tabs__link{transition-delay:.22s}.md-tabs__item:nth-child(13) .md-tabs__link{transition-delay:.24s}.md-tabs__item:nth-child(14) .md-tabs__link{transition-delay:.26s}.md-tabs__item:nth-child(15) .md-tabs__link{transition-delay:.28s}.md-tabs__item:nth-child(16) .md-tabs__link{transition-delay:.3s}.md-tabs[hidden] .md-tabs__link{opacity:0;transform:translateY(50%);transition:transform 0ms .1s,opacity .1s}:root{--md-tag-icon:url('data:image/svg+xml;charset=utf-8,')}.md-typeset .md-tags{margin-bottom:.75em;margin-top:-.125em}[dir=ltr] .md-typeset .md-tag{margin-right:.5em}[dir=rtl] .md-typeset .md-tag{margin-left:.5em}.md-typeset .md-tag{background:var(--md-default-fg-color--lightest);border-radius:2.4rem;display:inline-block;font-size:.64rem;font-weight:700;letter-spacing:normal;line-height:1.6;margin-bottom:.5em;padding:.3125em .9375em}.md-typeset .md-tag[href]{-webkit-tap-highlight-color:transparent;color:inherit;outline:none;transition:color 125ms,background-color 125ms}.md-typeset .md-tag[href]:focus,.md-typeset .md-tag[href]:hover{background-color:var(--md-accent-fg-color);color:var(--md-accent-bg-color)}[id]>.md-typeset .md-tag{vertical-align:text-top}.md-typeset .md-tag-icon:before{background-color:var(--md-default-fg-color--lighter);content:"";display:inline-block;height:1.2em;margin-right:.4em;-webkit-mask-image:var(--md-tag-icon);mask-image:var(--md-tag-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background-color 125ms;vertical-align:text-bottom;width:1.2em}.md-typeset .md-tag-icon[href]:focus:before,.md-typeset .md-tag-icon[href]:hover:before{background-color:var(--md-accent-bg-color)}@keyframes pulse{0%{transform:scale(.95)}75%{transform:scale(1)}to{transform:scale(.95)}}:root{--md-annotation-bg-icon:url('data:image/svg+xml;charset=utf-8,');--md-annotation-icon:url('data:image/svg+xml;charset=utf-8,');--md-tooltip-width:20rem}.md-tooltip{-webkit-backface-visibility:hidden;backface-visibility:hidden;background-color:var(--md-default-bg-color);border-radius:.1rem;box-shadow:var(--md-shadow-z2);color:var(--md-default-fg-color);font-family:var(--md-text-font-family);left:clamp(var(--md-tooltip-0,0rem) + .8rem,var(--md-tooltip-x),100vw + var(--md-tooltip-0,0rem) + .8rem - var(--md-tooltip-width) - 2 * .8rem);max-width:calc(100vw - 1.6rem);opacity:0;position:absolute;top:var(--md-tooltip-y);transform:translateY(-.4rem);transition:transform 0ms .25s,opacity .25s,z-index .25s;width:var(--md-tooltip-width);z-index:0}.md-tooltip--active{opacity:1;transform:translateY(0);transition:transform .25s cubic-bezier(.1,.7,.1,1),opacity .25s,z-index 0ms;z-index:2}.focus-visible>.md-tooltip,.md-tooltip:target{outline:var(--md-accent-fg-color) auto}.md-tooltip__inner{font-size:.64rem;padding:.8rem}.md-tooltip__inner.md-typeset>:first-child{margin-top:0}.md-tooltip__inner.md-typeset>:last-child{margin-bottom:0}.md-annotation{font-weight:400;outline:none;vertical-align:text-bottom;white-space:normal}[dir=rtl] .md-annotation{direction:rtl}code .md-annotation{font-family:var(--md-code-font-family);font-size:inherit}.md-annotation:not([hidden]){display:inline-block;line-height:1.25}.md-annotation__index{border-radius:.01px;cursor:pointer;display:inline-block;margin-left:.4ch;margin-right:.4ch;outline:none;overflow:hidden;position:relative;-webkit-user-select:none;user-select:none;vertical-align:text-top;z-index:0}.md-annotation .md-annotation__index{transition:z-index .25s}@media screen{.md-annotation__index{width:2.2ch}[data-md-visible]>.md-annotation__index{animation:pulse 2s infinite}.md-annotation__index:before{background:var(--md-default-bg-color);-webkit-mask-image:var(--md-annotation-bg-icon);mask-image:var(--md-annotation-bg-icon)}.md-annotation__index:after,.md-annotation__index:before{content:"";height:2.2ch;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;position:absolute;top:-.1ch;width:2.2ch;z-index:-1}.md-annotation__index:after{background-color:var(--md-default-fg-color--lighter);-webkit-mask-image:var(--md-annotation-icon);mask-image:var(--md-annotation-icon);transform:scale(1.0001);transition:background-color .25s,transform .25s}.md-tooltip--active+.md-annotation__index:after{transform:rotate(45deg)}.md-tooltip--active+.md-annotation__index:after,:hover>.md-annotation__index:after{background-color:var(--md-accent-fg-color)}}.md-tooltip--active+.md-annotation__index{animation-play-state:paused;transition-duration:0ms;z-index:2}.md-annotation__index [data-md-annotation-id]{display:inline-block}@media print{.md-annotation__index [data-md-annotation-id]{background:var(--md-default-fg-color--lighter);border-radius:2ch;color:var(--md-default-bg-color);font-weight:700;padding:0 .6ch;white-space:nowrap}.md-annotation__index [data-md-annotation-id]:after{content:attr(data-md-annotation-id)}}.md-typeset .md-annotation-list{counter-reset:xxx;list-style:none}.md-typeset .md-annotation-list li{position:relative}[dir=ltr] .md-typeset .md-annotation-list li:before{left:-2.125em}[dir=rtl] .md-typeset .md-annotation-list li:before{right:-2.125em}.md-typeset .md-annotation-list li:before{background:var(--md-default-fg-color--lighter);border-radius:2ch;color:var(--md-default-bg-color);content:counter(xxx);counter-increment:xxx;font-size:.8875em;font-weight:700;height:2ch;line-height:1.25;min-width:2ch;padding:0 .6ch;position:absolute;text-align:center;top:.25em}[dir=ltr] .md-top{margin-left:50%}[dir=rtl] .md-top{margin-right:50%}.md-top{background-color:var(--md-default-bg-color);border-radius:1.6rem;box-shadow:var(--md-shadow-z2);color:var(--md-default-fg-color--light);cursor:pointer;display:block;font-size:.7rem;outline:none;padding:.4rem .8rem;position:fixed;top:3.2rem;transform:translate(-50%);transition:color 125ms,background-color 125ms,transform 125ms cubic-bezier(.4,0,.2,1),opacity 125ms;z-index:2}@media print{.md-top{display:none}}[dir=rtl] .md-top{transform:translate(50%)}.md-top[hidden]{opacity:0;pointer-events:none;transform:translate(-50%,.2rem);transition-duration:0ms}[dir=rtl] .md-top[hidden]{transform:translate(50%,.2rem)}.md-top:focus,.md-top:hover{background-color:var(--md-accent-fg-color);color:var(--md-accent-bg-color)}.md-top svg{display:inline-block;vertical-align:-.5em}@keyframes hoverfix{0%{pointer-events:none}}:root{--md-version-icon:url('data:image/svg+xml;charset=utf-8,')}.md-version{flex-shrink:0;font-size:.8rem;height:2.4rem}[dir=ltr] .md-version__current{margin-left:1.4rem;margin-right:.4rem}[dir=rtl] .md-version__current{margin-left:.4rem;margin-right:1.4rem}.md-version__current{color:inherit;cursor:pointer;outline:none;position:relative;top:.05rem}[dir=ltr] .md-version__current:after{margin-left:.4rem}[dir=rtl] .md-version__current:after{margin-right:.4rem}.md-version__current:after{background-color:currentcolor;content:"";display:inline-block;height:.6rem;-webkit-mask-image:var(--md-version-icon);mask-image:var(--md-version-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:.4rem}.md-version__list{background-color:var(--md-default-bg-color);border-radius:.1rem;box-shadow:var(--md-shadow-z2);color:var(--md-default-fg-color);list-style-type:none;margin:.2rem .8rem;max-height:0;opacity:0;overflow:auto;padding:0;position:absolute;scroll-snap-type:y mandatory;top:.15rem;transition:max-height 0ms .5s,opacity .25s .25s;z-index:3}.md-version:focus-within .md-version__list,.md-version:hover .md-version__list{max-height:10rem;opacity:1;transition:max-height 0ms,opacity .25s}@media (hover:none),(pointer:coarse){.md-version:hover .md-version__list{animation:hoverfix .25s forwards}.md-version:focus-within .md-version__list{animation:none}}.md-version__item{line-height:1.8rem}[dir=ltr] .md-version__link{padding-left:.6rem;padding-right:1.2rem}[dir=rtl] .md-version__link{padding-left:1.2rem;padding-right:.6rem}.md-version__link{cursor:pointer;display:block;outline:none;scroll-snap-align:start;transition:color .25s,background-color .25s;white-space:nowrap;width:100%}.md-version__link:focus,.md-version__link:hover{color:var(--md-accent-fg-color)}.md-version__link:focus{background-color:var(--md-default-fg-color--lightest)}:root{--md-admonition-icon--note:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--abstract:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--info:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--tip:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--success:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--question:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--warning:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--failure:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--danger:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--bug:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--example:url('data:image/svg+xml;charset=utf-8,');--md-admonition-icon--quote:url('data:image/svg+xml;charset=utf-8,')}.md-typeset .admonition,.md-typeset details{background-color:var(--md-admonition-bg-color);border:.05rem solid #448aff;border-radius:.2rem;box-shadow:var(--md-shadow-z1);color:var(--md-admonition-fg-color);display:flow-root;font-size:.64rem;margin:1.5625em 0;padding:0 .6rem;page-break-inside:avoid;transition:box-shadow 125ms}@media print{.md-typeset .admonition,.md-typeset details{box-shadow:none}}.md-typeset .admonition:focus-within,.md-typeset details:focus-within{box-shadow:0 0 0 .2rem #448aff1a}.md-typeset .admonition>*,.md-typeset details>*{box-sizing:border-box}.md-typeset .admonition .admonition,.md-typeset .admonition details,.md-typeset details .admonition,.md-typeset details details{margin-bottom:1em;margin-top:1em}.md-typeset .admonition .md-typeset__scrollwrap,.md-typeset details .md-typeset__scrollwrap{margin:1em -.6rem}.md-typeset .admonition .md-typeset__table,.md-typeset details .md-typeset__table{padding:0 .6rem}.md-typeset .admonition>.tabbed-set:only-child,.md-typeset details>.tabbed-set:only-child{margin-top:0}html .md-typeset .admonition>:last-child,html .md-typeset details>:last-child{margin-bottom:.6rem}[dir=ltr] .md-typeset .admonition-title,[dir=ltr] .md-typeset summary{padding-left:2rem;padding-right:.6rem}[dir=rtl] .md-typeset .admonition-title,[dir=rtl] .md-typeset summary{padding-left:.6rem;padding-right:2rem}[dir=ltr] .md-typeset .admonition-title,[dir=ltr] .md-typeset summary{border-left-width:.2rem}[dir=rtl] .md-typeset .admonition-title,[dir=rtl] .md-typeset summary{border-right-width:.2rem}[dir=ltr] .md-typeset .admonition-title,[dir=ltr] .md-typeset summary{border-top-left-radius:.1rem}[dir=ltr] .md-typeset .admonition-title,[dir=ltr] .md-typeset summary,[dir=rtl] .md-typeset .admonition-title,[dir=rtl] .md-typeset summary{border-top-right-radius:.1rem}[dir=rtl] .md-typeset .admonition-title,[dir=rtl] .md-typeset summary{border-top-left-radius:.1rem}.md-typeset .admonition-title,.md-typeset summary{background-color:#448aff1a;border:none;font-weight:700;margin:0 -.6rem;padding-bottom:.4rem;padding-top:.4rem;position:relative}html .md-typeset .admonition-title:last-child,html .md-typeset summary:last-child{margin-bottom:0}[dir=ltr] .md-typeset .admonition-title:before,[dir=ltr] .md-typeset summary:before{left:.6rem}[dir=rtl] .md-typeset .admonition-title:before,[dir=rtl] .md-typeset summary:before{right:.6rem}.md-typeset .admonition-title:before,.md-typeset summary:before{background-color:#448aff;content:"";height:1rem;-webkit-mask-image:var(--md-admonition-icon--note);mask-image:var(--md-admonition-icon--note);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;position:absolute;top:.625em;width:1rem}.md-typeset .admonition-title code,.md-typeset summary code{box-shadow:0 0 0 .05rem var(--md-default-fg-color--lightest)}.md-typeset .admonition.note,.md-typeset details.note{border-color:#448aff}.md-typeset .admonition.note:focus-within,.md-typeset details.note:focus-within{box-shadow:0 0 0 .2rem #448aff1a}.md-typeset .note>.admonition-title,.md-typeset .note>summary{background-color:#448aff1a}.md-typeset .note>.admonition-title:before,.md-typeset .note>summary:before{background-color:#448aff;-webkit-mask-image:var(--md-admonition-icon--note);mask-image:var(--md-admonition-icon--note)}.md-typeset .note>.admonition-title:after,.md-typeset .note>summary:after{color:#448aff}.md-typeset .admonition.abstract,.md-typeset details.abstract{border-color:#00b0ff}.md-typeset .admonition.abstract:focus-within,.md-typeset details.abstract:focus-within{box-shadow:0 0 0 .2rem #00b0ff1a}.md-typeset .abstract>.admonition-title,.md-typeset .abstract>summary{background-color:#00b0ff1a}.md-typeset .abstract>.admonition-title:before,.md-typeset .abstract>summary:before{background-color:#00b0ff;-webkit-mask-image:var(--md-admonition-icon--abstract);mask-image:var(--md-admonition-icon--abstract)}.md-typeset .abstract>.admonition-title:after,.md-typeset .abstract>summary:after{color:#00b0ff}.md-typeset .admonition.info,.md-typeset details.info{border-color:#00b8d4}.md-typeset .admonition.info:focus-within,.md-typeset details.info:focus-within{box-shadow:0 0 0 .2rem #00b8d41a}.md-typeset .info>.admonition-title,.md-typeset .info>summary{background-color:#00b8d41a}.md-typeset .info>.admonition-title:before,.md-typeset .info>summary:before{background-color:#00b8d4;-webkit-mask-image:var(--md-admonition-icon--info);mask-image:var(--md-admonition-icon--info)}.md-typeset .info>.admonition-title:after,.md-typeset .info>summary:after{color:#00b8d4}.md-typeset .admonition.tip,.md-typeset details.tip{border-color:#00bfa5}.md-typeset .admonition.tip:focus-within,.md-typeset details.tip:focus-within{box-shadow:0 0 0 .2rem #00bfa51a}.md-typeset .tip>.admonition-title,.md-typeset .tip>summary{background-color:#00bfa51a}.md-typeset .tip>.admonition-title:before,.md-typeset .tip>summary:before{background-color:#00bfa5;-webkit-mask-image:var(--md-admonition-icon--tip);mask-image:var(--md-admonition-icon--tip)}.md-typeset .tip>.admonition-title:after,.md-typeset .tip>summary:after{color:#00bfa5}.md-typeset .admonition.success,.md-typeset details.success{border-color:#00c853}.md-typeset .admonition.success:focus-within,.md-typeset details.success:focus-within{box-shadow:0 0 0 .2rem #00c8531a}.md-typeset .success>.admonition-title,.md-typeset .success>summary{background-color:#00c8531a}.md-typeset .success>.admonition-title:before,.md-typeset .success>summary:before{background-color:#00c853;-webkit-mask-image:var(--md-admonition-icon--success);mask-image:var(--md-admonition-icon--success)}.md-typeset .success>.admonition-title:after,.md-typeset .success>summary:after{color:#00c853}.md-typeset .admonition.question,.md-typeset details.question{border-color:#64dd17}.md-typeset .admonition.question:focus-within,.md-typeset details.question:focus-within{box-shadow:0 0 0 .2rem #64dd171a}.md-typeset .question>.admonition-title,.md-typeset .question>summary{background-color:#64dd171a}.md-typeset .question>.admonition-title:before,.md-typeset .question>summary:before{background-color:#64dd17;-webkit-mask-image:var(--md-admonition-icon--question);mask-image:var(--md-admonition-icon--question)}.md-typeset .question>.admonition-title:after,.md-typeset .question>summary:after{color:#64dd17}.md-typeset .admonition.warning,.md-typeset details.warning{border-color:#ff9100}.md-typeset .admonition.warning:focus-within,.md-typeset details.warning:focus-within{box-shadow:0 0 0 .2rem #ff91001a}.md-typeset .warning>.admonition-title,.md-typeset .warning>summary{background-color:#ff91001a}.md-typeset .warning>.admonition-title:before,.md-typeset .warning>summary:before{background-color:#ff9100;-webkit-mask-image:var(--md-admonition-icon--warning);mask-image:var(--md-admonition-icon--warning)}.md-typeset .warning>.admonition-title:after,.md-typeset .warning>summary:after{color:#ff9100}.md-typeset .admonition.failure,.md-typeset details.failure{border-color:#ff5252}.md-typeset .admonition.failure:focus-within,.md-typeset details.failure:focus-within{box-shadow:0 0 0 .2rem #ff52521a}.md-typeset .failure>.admonition-title,.md-typeset .failure>summary{background-color:#ff52521a}.md-typeset .failure>.admonition-title:before,.md-typeset .failure>summary:before{background-color:#ff5252;-webkit-mask-image:var(--md-admonition-icon--failure);mask-image:var(--md-admonition-icon--failure)}.md-typeset .failure>.admonition-title:after,.md-typeset .failure>summary:after{color:#ff5252}.md-typeset .admonition.danger,.md-typeset details.danger{border-color:#ff1744}.md-typeset .admonition.danger:focus-within,.md-typeset details.danger:focus-within{box-shadow:0 0 0 .2rem #ff17441a}.md-typeset .danger>.admonition-title,.md-typeset .danger>summary{background-color:#ff17441a}.md-typeset .danger>.admonition-title:before,.md-typeset .danger>summary:before{background-color:#ff1744;-webkit-mask-image:var(--md-admonition-icon--danger);mask-image:var(--md-admonition-icon--danger)}.md-typeset .danger>.admonition-title:after,.md-typeset .danger>summary:after{color:#ff1744}.md-typeset .admonition.bug,.md-typeset details.bug{border-color:#f50057}.md-typeset .admonition.bug:focus-within,.md-typeset details.bug:focus-within{box-shadow:0 0 0 .2rem #f500571a}.md-typeset .bug>.admonition-title,.md-typeset .bug>summary{background-color:#f500571a}.md-typeset .bug>.admonition-title:before,.md-typeset .bug>summary:before{background-color:#f50057;-webkit-mask-image:var(--md-admonition-icon--bug);mask-image:var(--md-admonition-icon--bug)}.md-typeset .bug>.admonition-title:after,.md-typeset .bug>summary:after{color:#f50057}.md-typeset .admonition.example,.md-typeset details.example{border-color:#7c4dff}.md-typeset .admonition.example:focus-within,.md-typeset details.example:focus-within{box-shadow:0 0 0 .2rem #7c4dff1a}.md-typeset .example>.admonition-title,.md-typeset .example>summary{background-color:#7c4dff1a}.md-typeset .example>.admonition-title:before,.md-typeset .example>summary:before{background-color:#7c4dff;-webkit-mask-image:var(--md-admonition-icon--example);mask-image:var(--md-admonition-icon--example)}.md-typeset .example>.admonition-title:after,.md-typeset .example>summary:after{color:#7c4dff}.md-typeset .admonition.quote,.md-typeset details.quote{border-color:#9e9e9e}.md-typeset .admonition.quote:focus-within,.md-typeset details.quote:focus-within{box-shadow:0 0 0 .2rem #9e9e9e1a}.md-typeset .quote>.admonition-title,.md-typeset .quote>summary{background-color:#9e9e9e1a}.md-typeset .quote>.admonition-title:before,.md-typeset .quote>summary:before{background-color:#9e9e9e;-webkit-mask-image:var(--md-admonition-icon--quote);mask-image:var(--md-admonition-icon--quote)}.md-typeset .quote>.admonition-title:after,.md-typeset .quote>summary:after{color:#9e9e9e}:root{--md-footnotes-icon:url('data:image/svg+xml;charset=utf-8,')}.md-typeset .footnote{color:var(--md-default-fg-color--light);font-size:.64rem}[dir=ltr] .md-typeset .footnote>ol{margin-left:0}[dir=rtl] .md-typeset .footnote>ol{margin-right:0}.md-typeset .footnote>ol>li{transition:color 125ms}.md-typeset .footnote>ol>li:target{color:var(--md-default-fg-color)}.md-typeset .footnote>ol>li:focus-within .footnote-backref{opacity:1;transform:translateX(0);transition:none}.md-typeset .footnote>ol>li:hover .footnote-backref,.md-typeset .footnote>ol>li:target .footnote-backref{opacity:1;transform:translateX(0)}.md-typeset .footnote>ol>li>:first-child{margin-top:0}.md-typeset .footnote-ref{font-size:.75em;font-weight:700}html .md-typeset .footnote-ref{outline-offset:.1rem}.md-typeset [id^="fnref:"]:target>.footnote-ref{outline:auto}.md-typeset .footnote-backref{color:var(--md-typeset-a-color);display:inline-block;font-size:0;opacity:0;transform:translateX(.25rem);transition:color .25s,transform .25s .25s,opacity 125ms .25s;vertical-align:text-bottom}@media print{.md-typeset .footnote-backref{color:var(--md-typeset-a-color);opacity:1;transform:translateX(0)}}[dir=rtl] .md-typeset .footnote-backref{transform:translateX(-.25rem)}.md-typeset .footnote-backref:hover{color:var(--md-accent-fg-color)}.md-typeset .footnote-backref:before{background-color:currentcolor;content:"";display:inline-block;height:.8rem;-webkit-mask-image:var(--md-footnotes-icon);mask-image:var(--md-footnotes-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;width:.8rem}[dir=rtl] .md-typeset .footnote-backref:before svg{transform:scaleX(-1)}[dir=ltr] .md-typeset .headerlink{margin-left:.5rem}[dir=rtl] .md-typeset .headerlink{margin-right:.5rem}.md-typeset .headerlink{color:var(--md-default-fg-color--lighter);display:inline-block;opacity:0;transition:color .25s,opacity 125ms}@media print{.md-typeset .headerlink{display:none}}.md-typeset .headerlink:focus,.md-typeset :hover>.headerlink,.md-typeset :target>.headerlink{opacity:1;transition:color .25s,opacity 125ms}.md-typeset .headerlink:focus,.md-typeset .headerlink:hover,.md-typeset :target>.headerlink{color:var(--md-accent-fg-color)}.md-typeset :target{--md-scroll-margin:3.6rem;--md-scroll-offset:0rem;scroll-margin-top:calc(var(--md-scroll-margin) - var(--md-scroll-offset))}@media screen and (min-width:76.25em){.md-header--lifted~.md-container .md-typeset :target{--md-scroll-margin:6rem}}.md-typeset h1:target,.md-typeset h2:target,.md-typeset h3:target{--md-scroll-offset:0.2rem}.md-typeset h4:target{--md-scroll-offset:0.15rem}.md-typeset div.arithmatex{overflow:auto}@media screen and (max-width:44.9375em){.md-typeset div.arithmatex{margin:0 -.8rem}}.md-typeset div.arithmatex>*{margin-left:auto!important;margin-right:auto!important;padding:0 .8rem;touch-action:auto;width:-webkit-min-content;width:min-content}.md-typeset div.arithmatex>* mjx-container{margin:0!important}.md-typeset del.critic{background-color:var(--md-typeset-del-color)}.md-typeset del.critic,.md-typeset ins.critic{-webkit-box-decoration-break:clone;box-decoration-break:clone}.md-typeset ins.critic{background-color:var(--md-typeset-ins-color)}.md-typeset .critic.comment{-webkit-box-decoration-break:clone;box-decoration-break:clone;color:var(--md-code-hl-comment-color)}.md-typeset .critic.comment:before{content:"/* "}.md-typeset .critic.comment:after{content:" */"}.md-typeset .critic.block{box-shadow:none;display:block;margin:1em 0;overflow:auto;padding-left:.8rem;padding-right:.8rem}.md-typeset .critic.block>:first-child{margin-top:.5em}.md-typeset .critic.block>:last-child{margin-bottom:.5em}:root{--md-details-icon:url('data:image/svg+xml;charset=utf-8,')}.md-typeset details{display:flow-root;overflow:visible;padding-top:0}.md-typeset details[open]>summary:after{transform:rotate(90deg)}.md-typeset details:not([open]){box-shadow:none;padding-bottom:0}.md-typeset details:not([open])>summary{border-radius:.1rem}[dir=ltr] .md-typeset summary{padding-right:1.8rem}[dir=rtl] .md-typeset summary{padding-left:1.8rem}[dir=ltr] .md-typeset summary{border-top-left-radius:.1rem}[dir=ltr] .md-typeset summary,[dir=rtl] .md-typeset summary{border-top-right-radius:.1rem}[dir=rtl] .md-typeset summary{border-top-left-radius:.1rem}.md-typeset summary{cursor:pointer;display:block;min-height:1rem}.md-typeset summary.focus-visible{outline-color:var(--md-accent-fg-color);outline-offset:.2rem}.md-typeset summary:not(.focus-visible){-webkit-tap-highlight-color:transparent;outline:none}[dir=ltr] .md-typeset summary:after{right:.4rem}[dir=rtl] .md-typeset summary:after{left:.4rem}.md-typeset summary:after{background-color:currentcolor;content:"";height:1rem;-webkit-mask-image:var(--md-details-icon);mask-image:var(--md-details-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;position:absolute;top:.625em;transform:rotate(0deg);transition:transform .25s;width:1rem}[dir=rtl] .md-typeset summary:after{transform:rotate(180deg)}.md-typeset summary::marker{display:none}.md-typeset summary::-webkit-details-marker{display:none}.md-typeset .emojione,.md-typeset .gemoji,.md-typeset .twemoji{display:inline-flex;height:1.125em;vertical-align:text-top}.md-typeset .emojione svg,.md-typeset .gemoji svg,.md-typeset .twemoji svg{fill:currentcolor;max-height:100%;width:1.125em}.highlight .o,.highlight .ow{color:var(--md-code-hl-operator-color)}.highlight .p{color:var(--md-code-hl-punctuation-color)}.highlight .cpf,.highlight .l,.highlight .s,.highlight .s1,.highlight .s2,.highlight .sb,.highlight .sc,.highlight .si,.highlight .ss{color:var(--md-code-hl-string-color)}.highlight .cp,.highlight .se,.highlight .sh,.highlight .sr,.highlight .sx{color:var(--md-code-hl-special-color)}.highlight .il,.highlight .m,.highlight .mb,.highlight .mf,.highlight .mh,.highlight .mi,.highlight .mo{color:var(--md-code-hl-number-color)}.highlight .k,.highlight .kd,.highlight .kn,.highlight .kp,.highlight .kr,.highlight .kt{color:var(--md-code-hl-keyword-color)}.highlight .kc,.highlight .n{color:var(--md-code-hl-name-color)}.highlight .bp,.highlight .nb,.highlight .no{color:var(--md-code-hl-constant-color)}.highlight .nc,.highlight .ne,.highlight .nf,.highlight .nn{color:var(--md-code-hl-function-color)}.highlight .nd,.highlight .ni,.highlight .nl,.highlight .nt{color:var(--md-code-hl-keyword-color)}.highlight .c,.highlight .c1,.highlight .ch,.highlight .cm,.highlight .cs,.highlight .sd{color:var(--md-code-hl-comment-color)}.highlight .na,.highlight .nv,.highlight .vc,.highlight .vg,.highlight .vi{color:var(--md-code-hl-variable-color)}.highlight .ge,.highlight .gh,.highlight .go,.highlight .gp,.highlight .gr,.highlight .gs,.highlight .gt,.highlight .gu{color:var(--md-code-hl-generic-color)}.highlight .gd,.highlight .gi{border-radius:.1rem;margin:0 -.125em;padding:0 .125em}.highlight .gd{background-color:var(--md-typeset-del-color)}.highlight .gi{background-color:var(--md-typeset-ins-color)}.highlight .hll{background-color:var(--md-code-hl-color--light);box-shadow:2px 0 0 0 var(--md-code-hl-color) inset;display:block;margin:0 -1.1764705882em;padding:0 1.1764705882em}.highlight span.filename{background-color:var(--md-code-bg-color);border-bottom:.05rem solid var(--md-default-fg-color--lightest);border-top-left-radius:.1rem;border-top-right-radius:.1rem;display:flow-root;font-size:.85em;font-weight:700;margin-top:1em;padding:.6617647059em 1.1764705882em;position:relative}.highlight span.filename+pre{margin-top:0}.highlight span.filename+pre>code{border-top-left-radius:0;border-top-right-radius:0}.highlight [data-linenos]:before{background-color:var(--md-code-bg-color);box-shadow:-.05rem 0 var(--md-default-fg-color--lightest) inset;color:var(--md-default-fg-color--light);content:attr(data-linenos);float:left;left:-1.1764705882em;margin-left:-1.1764705882em;margin-right:1.1764705882em;padding-left:1.1764705882em;position:sticky;-webkit-user-select:none;user-select:none;z-index:3}.highlight code a[id]{position:absolute;visibility:hidden}.highlight code[data-md-copying] .hll{display:contents}.highlight code[data-md-copying] .md-annotation{display:none}.highlighttable{display:flow-root}.highlighttable tbody,.highlighttable td{display:block;padding:0}.highlighttable tr{display:flex}.highlighttable pre{margin:0}.highlighttable th.filename{flex-grow:1;padding:0;text-align:left}.highlighttable th.filename span.filename{margin-top:0}.highlighttable .linenos{background-color:var(--md-code-bg-color);border-bottom-left-radius:.1rem;border-top-left-radius:.1rem;font-size:.85em;padding:.7720588235em 0 .7720588235em 1.1764705882em;-webkit-user-select:none;user-select:none}.highlighttable .linenodiv{box-shadow:-.05rem 0 var(--md-default-fg-color--lightest) inset;padding-right:.5882352941em}.highlighttable .linenodiv pre{color:var(--md-default-fg-color--light);text-align:right}.highlighttable .code{flex:1;min-width:0}.linenodiv a{color:inherit}.md-typeset .highlighttable{direction:ltr;margin:1em 0}.md-typeset .highlighttable>tbody>tr>.code>div>pre>code{border-bottom-left-radius:0;border-top-left-radius:0}.md-typeset .highlight+.result{border:.05rem solid var(--md-code-bg-color);border-bottom-left-radius:.1rem;border-bottom-right-radius:.1rem;border-top-width:.1rem;margin-top:-1.125em;overflow:visible;padding:0 1em}.md-typeset .highlight+.result:after{clear:both;content:"";display:block}@media screen and (max-width:44.9375em){.md-content__inner>.highlight{margin:1em -.8rem}.md-content__inner>.highlight>.filename,.md-content__inner>.highlight>.highlighttable>tbody>tr>.code>div>pre>code,.md-content__inner>.highlight>.highlighttable>tbody>tr>.filename span.filename,.md-content__inner>.highlight>.highlighttable>tbody>tr>.linenos,.md-content__inner>.highlight>pre>code{border-radius:0}.md-content__inner>.highlight+.result{border-left-width:0;border-radius:0;border-right-width:0;margin-left:-.8rem;margin-right:-.8rem}}.md-typeset .keys kbd:after,.md-typeset .keys kbd:before{-moz-osx-font-smoothing:initial;-webkit-font-smoothing:initial;color:inherit;margin:0;position:relative}.md-typeset .keys span{color:var(--md-default-fg-color--light);padding:0 .2em}.md-typeset .keys .key-alt:before,.md-typeset .keys .key-left-alt:before,.md-typeset .keys .key-right-alt:before{content:"⎇";padding-right:.4em}.md-typeset .keys .key-command:before,.md-typeset .keys .key-left-command:before,.md-typeset .keys .key-right-command:before{content:"⌘";padding-right:.4em}.md-typeset .keys .key-control:before,.md-typeset .keys .key-left-control:before,.md-typeset .keys .key-right-control:before{content:"⌃";padding-right:.4em}.md-typeset .keys .key-left-meta:before,.md-typeset .keys .key-meta:before,.md-typeset .keys .key-right-meta:before{content:"◆";padding-right:.4em}.md-typeset .keys .key-left-option:before,.md-typeset .keys .key-option:before,.md-typeset .keys .key-right-option:before{content:"⌥";padding-right:.4em}.md-typeset .keys .key-left-shift:before,.md-typeset .keys .key-right-shift:before,.md-typeset .keys .key-shift:before{content:"⇧";padding-right:.4em}.md-typeset .keys .key-left-super:before,.md-typeset .keys .key-right-super:before,.md-typeset .keys .key-super:before{content:"❖";padding-right:.4em}.md-typeset .keys .key-left-windows:before,.md-typeset .keys .key-right-windows:before,.md-typeset .keys .key-windows:before{content:"⊞";padding-right:.4em}.md-typeset .keys .key-arrow-down:before{content:"↓";padding-right:.4em}.md-typeset .keys .key-arrow-left:before{content:"←";padding-right:.4em}.md-typeset .keys .key-arrow-right:before{content:"→";padding-right:.4em}.md-typeset .keys .key-arrow-up:before{content:"↑";padding-right:.4em}.md-typeset .keys .key-backspace:before{content:"⌫";padding-right:.4em}.md-typeset .keys .key-backtab:before{content:"⇤";padding-right:.4em}.md-typeset .keys .key-caps-lock:before{content:"⇪";padding-right:.4em}.md-typeset .keys .key-clear:before{content:"⌧";padding-right:.4em}.md-typeset .keys .key-context-menu:before{content:"☰";padding-right:.4em}.md-typeset .keys .key-delete:before{content:"⌦";padding-right:.4em}.md-typeset .keys .key-eject:before{content:"⏏";padding-right:.4em}.md-typeset .keys .key-end:before{content:"⤓";padding-right:.4em}.md-typeset .keys .key-escape:before{content:"⎋";padding-right:.4em}.md-typeset .keys .key-home:before{content:"⤒";padding-right:.4em}.md-typeset .keys .key-insert:before{content:"⎀";padding-right:.4em}.md-typeset .keys .key-page-down:before{content:"⇟";padding-right:.4em}.md-typeset .keys .key-page-up:before{content:"⇞";padding-right:.4em}.md-typeset .keys .key-print-screen:before{content:"⎙";padding-right:.4em}.md-typeset .keys .key-tab:after{content:"⇥";padding-left:.4em}.md-typeset .keys .key-num-enter:after{content:"⌤";padding-left:.4em}.md-typeset .keys .key-enter:after{content:"⏎";padding-left:.4em}:root{--md-tabbed-icon--prev:url('data:image/svg+xml;charset=utf-8,');--md-tabbed-icon--next:url('data:image/svg+xml;charset=utf-8,')}.md-typeset .tabbed-set{border-radius:.1rem;display:flex;flex-flow:column wrap;margin:1em 0;position:relative}.md-typeset .tabbed-set>input{height:0;opacity:0;position:absolute;width:0}.md-typeset .tabbed-set>input:target{--md-scroll-offset:0.625em}.md-typeset .tabbed-labels{-ms-overflow-style:none;box-shadow:0 -.05rem var(--md-default-fg-color--lightest) inset;display:flex;max-width:100%;overflow:auto;scrollbar-width:none}@media print{.md-typeset .tabbed-labels{display:contents}}@media screen{.js .md-typeset .tabbed-labels{position:relative}.js .md-typeset .tabbed-labels:before{background:var(--md-accent-fg-color);bottom:0;content:"";display:block;height:2px;left:0;position:absolute;transform:translateX(var(--md-indicator-x));transition:width 225ms,transform .25s;transition-timing-function:cubic-bezier(.4,0,.2,1);width:var(--md-indicator-width)}}.md-typeset .tabbed-labels::-webkit-scrollbar{display:none}.md-typeset .tabbed-labels>label{border-bottom:.1rem solid #0000;border-radius:.1rem .1rem 0 0;color:var(--md-default-fg-color--light);cursor:pointer;flex-shrink:0;font-size:.64rem;font-weight:700;padding:.78125em 1.25em .625em;scroll-margin-inline-start:1rem;transition:background-color .25s,color .25s;white-space:nowrap;width:auto}@media print{.md-typeset .tabbed-labels>label:first-child{order:1}.md-typeset .tabbed-labels>label:nth-child(2){order:2}.md-typeset .tabbed-labels>label:nth-child(3){order:3}.md-typeset .tabbed-labels>label:nth-child(4){order:4}.md-typeset .tabbed-labels>label:nth-child(5){order:5}.md-typeset .tabbed-labels>label:nth-child(6){order:6}.md-typeset .tabbed-labels>label:nth-child(7){order:7}.md-typeset .tabbed-labels>label:nth-child(8){order:8}.md-typeset .tabbed-labels>label:nth-child(9){order:9}.md-typeset .tabbed-labels>label:nth-child(10){order:10}.md-typeset .tabbed-labels>label:nth-child(11){order:11}.md-typeset .tabbed-labels>label:nth-child(12){order:12}.md-typeset .tabbed-labels>label:nth-child(13){order:13}.md-typeset .tabbed-labels>label:nth-child(14){order:14}.md-typeset .tabbed-labels>label:nth-child(15){order:15}.md-typeset .tabbed-labels>label:nth-child(16){order:16}.md-typeset .tabbed-labels>label:nth-child(17){order:17}.md-typeset .tabbed-labels>label:nth-child(18){order:18}.md-typeset .tabbed-labels>label:nth-child(19){order:19}.md-typeset .tabbed-labels>label:nth-child(20){order:20}}.md-typeset .tabbed-labels>label:hover{color:var(--md-accent-fg-color)}.md-typeset .tabbed-content{width:100%}@media print{.md-typeset .tabbed-content{display:contents}}.md-typeset .tabbed-block{display:none}@media print{.md-typeset .tabbed-block{display:block}.md-typeset .tabbed-block:first-child{order:1}.md-typeset .tabbed-block:nth-child(2){order:2}.md-typeset .tabbed-block:nth-child(3){order:3}.md-typeset .tabbed-block:nth-child(4){order:4}.md-typeset .tabbed-block:nth-child(5){order:5}.md-typeset .tabbed-block:nth-child(6){order:6}.md-typeset .tabbed-block:nth-child(7){order:7}.md-typeset .tabbed-block:nth-child(8){order:8}.md-typeset .tabbed-block:nth-child(9){order:9}.md-typeset .tabbed-block:nth-child(10){order:10}.md-typeset .tabbed-block:nth-child(11){order:11}.md-typeset .tabbed-block:nth-child(12){order:12}.md-typeset .tabbed-block:nth-child(13){order:13}.md-typeset .tabbed-block:nth-child(14){order:14}.md-typeset .tabbed-block:nth-child(15){order:15}.md-typeset .tabbed-block:nth-child(16){order:16}.md-typeset .tabbed-block:nth-child(17){order:17}.md-typeset .tabbed-block:nth-child(18){order:18}.md-typeset .tabbed-block:nth-child(19){order:19}.md-typeset .tabbed-block:nth-child(20){order:20}}.md-typeset .tabbed-block>.highlight:first-child>pre,.md-typeset .tabbed-block>pre:first-child{margin:0}.md-typeset .tabbed-block>.highlight:first-child>pre>code,.md-typeset .tabbed-block>pre:first-child>code{border-top-left-radius:0;border-top-right-radius:0}.md-typeset .tabbed-block>.highlight:first-child>.filename{border-top-left-radius:0;border-top-right-radius:0;margin:0}.md-typeset .tabbed-block>.highlight:first-child>.highlighttable{margin:0}.md-typeset .tabbed-block>.highlight:first-child>.highlighttable>tbody>tr>.filename span.filename,.md-typeset .tabbed-block>.highlight:first-child>.highlighttable>tbody>tr>.linenos{border-top-left-radius:0;border-top-right-radius:0;margin:0}.md-typeset .tabbed-block>.highlight:first-child>.highlighttable>tbody>tr>.code>div>pre>code{border-top-left-radius:0;border-top-right-radius:0}.md-typeset .tabbed-block>.highlight:first-child+.result{margin-top:-.125em}.md-typeset .tabbed-block>.tabbed-set{margin:0}.md-typeset .tabbed-button{align-self:center;border-radius:100%;color:var(--md-default-fg-color--light);cursor:pointer;display:block;height:.9rem;margin-top:.1rem;pointer-events:auto;transition:background-color .25s;width:.9rem}.md-typeset .tabbed-button:hover{background-color:var(--md-accent-fg-color--transparent);color:var(--md-accent-fg-color)}.md-typeset .tabbed-button:after{background-color:currentcolor;content:"";display:block;height:100%;-webkit-mask-image:var(--md-tabbed-icon--prev);mask-image:var(--md-tabbed-icon--prev);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background-color .25s,transform .25s;width:100%}.md-typeset .tabbed-control{background:linear-gradient(to right,var(--md-default-bg-color) 60%,#0000);display:flex;height:1.9rem;justify-content:start;pointer-events:none;position:absolute;transition:opacity 125ms;width:1.2rem}[dir=rtl] .md-typeset .tabbed-control{transform:rotate(180deg)}.md-typeset .tabbed-control[hidden]{opacity:0}.md-typeset .tabbed-control--next{background:linear-gradient(to left,var(--md-default-bg-color) 60%,#0000);justify-content:end;right:0}.md-typeset .tabbed-control--next .tabbed-button:after{-webkit-mask-image:var(--md-tabbed-icon--next);mask-image:var(--md-tabbed-icon--next)}@media screen and (max-width:44.9375em){[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels{padding-left:.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels{padding-right:.8rem}.md-content__inner>.tabbed-set .tabbed-labels{margin:0 -.8rem;max-width:100vw;scroll-padding-inline-start:.8rem}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels:after{padding-right:.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels:after{padding-left:.8rem}.md-content__inner>.tabbed-set .tabbed-labels:after{content:""}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{padding-left:.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{padding-right:.8rem}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{margin-left:-.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{margin-right:-.8rem}.md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{width:2rem}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{padding-right:.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{padding-left:.8rem}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{margin-right:-.8rem}[dir=rtl] .md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{margin-left:-.8rem}.md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{width:2rem}}@media screen{.md-typeset .tabbed-set>input:first-child:checked~.tabbed-labels>:first-child,.md-typeset .tabbed-set>input:nth-child(10):checked~.tabbed-labels>:nth-child(10),.md-typeset .tabbed-set>input:nth-child(11):checked~.tabbed-labels>:nth-child(11),.md-typeset .tabbed-set>input:nth-child(12):checked~.tabbed-labels>:nth-child(12),.md-typeset .tabbed-set>input:nth-child(13):checked~.tabbed-labels>:nth-child(13),.md-typeset .tabbed-set>input:nth-child(14):checked~.tabbed-labels>:nth-child(14),.md-typeset .tabbed-set>input:nth-child(15):checked~.tabbed-labels>:nth-child(15),.md-typeset .tabbed-set>input:nth-child(16):checked~.tabbed-labels>:nth-child(16),.md-typeset .tabbed-set>input:nth-child(17):checked~.tabbed-labels>:nth-child(17),.md-typeset .tabbed-set>input:nth-child(18):checked~.tabbed-labels>:nth-child(18),.md-typeset .tabbed-set>input:nth-child(19):checked~.tabbed-labels>:nth-child(19),.md-typeset .tabbed-set>input:nth-child(2):checked~.tabbed-labels>:nth-child(2),.md-typeset .tabbed-set>input:nth-child(20):checked~.tabbed-labels>:nth-child(20),.md-typeset .tabbed-set>input:nth-child(3):checked~.tabbed-labels>:nth-child(3),.md-typeset .tabbed-set>input:nth-child(4):checked~.tabbed-labels>:nth-child(4),.md-typeset .tabbed-set>input:nth-child(5):checked~.tabbed-labels>:nth-child(5),.md-typeset .tabbed-set>input:nth-child(6):checked~.tabbed-labels>:nth-child(6),.md-typeset .tabbed-set>input:nth-child(7):checked~.tabbed-labels>:nth-child(7),.md-typeset .tabbed-set>input:nth-child(8):checked~.tabbed-labels>:nth-child(8),.md-typeset .tabbed-set>input:nth-child(9):checked~.tabbed-labels>:nth-child(9){color:var(--md-accent-fg-color)}.md-typeset .no-js .tabbed-set>input:first-child:checked~.tabbed-labels>:first-child,.md-typeset .no-js .tabbed-set>input:nth-child(10):checked~.tabbed-labels>:nth-child(10),.md-typeset .no-js .tabbed-set>input:nth-child(11):checked~.tabbed-labels>:nth-child(11),.md-typeset .no-js .tabbed-set>input:nth-child(12):checked~.tabbed-labels>:nth-child(12),.md-typeset .no-js .tabbed-set>input:nth-child(13):checked~.tabbed-labels>:nth-child(13),.md-typeset .no-js .tabbed-set>input:nth-child(14):checked~.tabbed-labels>:nth-child(14),.md-typeset .no-js .tabbed-set>input:nth-child(15):checked~.tabbed-labels>:nth-child(15),.md-typeset .no-js .tabbed-set>input:nth-child(16):checked~.tabbed-labels>:nth-child(16),.md-typeset .no-js .tabbed-set>input:nth-child(17):checked~.tabbed-labels>:nth-child(17),.md-typeset .no-js .tabbed-set>input:nth-child(18):checked~.tabbed-labels>:nth-child(18),.md-typeset .no-js .tabbed-set>input:nth-child(19):checked~.tabbed-labels>:nth-child(19),.md-typeset .no-js .tabbed-set>input:nth-child(2):checked~.tabbed-labels>:nth-child(2),.md-typeset .no-js .tabbed-set>input:nth-child(20):checked~.tabbed-labels>:nth-child(20),.md-typeset .no-js .tabbed-set>input:nth-child(3):checked~.tabbed-labels>:nth-child(3),.md-typeset .no-js .tabbed-set>input:nth-child(4):checked~.tabbed-labels>:nth-child(4),.md-typeset .no-js .tabbed-set>input:nth-child(5):checked~.tabbed-labels>:nth-child(5),.md-typeset .no-js .tabbed-set>input:nth-child(6):checked~.tabbed-labels>:nth-child(6),.md-typeset .no-js .tabbed-set>input:nth-child(7):checked~.tabbed-labels>:nth-child(7),.md-typeset .no-js .tabbed-set>input:nth-child(8):checked~.tabbed-labels>:nth-child(8),.md-typeset .no-js .tabbed-set>input:nth-child(9):checked~.tabbed-labels>:nth-child(9),.no-js .md-typeset .tabbed-set>input:first-child:checked~.tabbed-labels>:first-child,.no-js .md-typeset .tabbed-set>input:nth-child(10):checked~.tabbed-labels>:nth-child(10),.no-js .md-typeset .tabbed-set>input:nth-child(11):checked~.tabbed-labels>:nth-child(11),.no-js .md-typeset .tabbed-set>input:nth-child(12):checked~.tabbed-labels>:nth-child(12),.no-js .md-typeset .tabbed-set>input:nth-child(13):checked~.tabbed-labels>:nth-child(13),.no-js .md-typeset .tabbed-set>input:nth-child(14):checked~.tabbed-labels>:nth-child(14),.no-js .md-typeset .tabbed-set>input:nth-child(15):checked~.tabbed-labels>:nth-child(15),.no-js .md-typeset .tabbed-set>input:nth-child(16):checked~.tabbed-labels>:nth-child(16),.no-js .md-typeset .tabbed-set>input:nth-child(17):checked~.tabbed-labels>:nth-child(17),.no-js .md-typeset .tabbed-set>input:nth-child(18):checked~.tabbed-labels>:nth-child(18),.no-js .md-typeset .tabbed-set>input:nth-child(19):checked~.tabbed-labels>:nth-child(19),.no-js .md-typeset .tabbed-set>input:nth-child(2):checked~.tabbed-labels>:nth-child(2),.no-js .md-typeset .tabbed-set>input:nth-child(20):checked~.tabbed-labels>:nth-child(20),.no-js .md-typeset .tabbed-set>input:nth-child(3):checked~.tabbed-labels>:nth-child(3),.no-js .md-typeset .tabbed-set>input:nth-child(4):checked~.tabbed-labels>:nth-child(4),.no-js .md-typeset .tabbed-set>input:nth-child(5):checked~.tabbed-labels>:nth-child(5),.no-js .md-typeset .tabbed-set>input:nth-child(6):checked~.tabbed-labels>:nth-child(6),.no-js .md-typeset .tabbed-set>input:nth-child(7):checked~.tabbed-labels>:nth-child(7),.no-js .md-typeset .tabbed-set>input:nth-child(8):checked~.tabbed-labels>:nth-child(8),.no-js .md-typeset .tabbed-set>input:nth-child(9):checked~.tabbed-labels>:nth-child(9){border-color:var(--md-accent-fg-color)}}.md-typeset .tabbed-set>input:first-child.focus-visible~.tabbed-labels>:first-child,.md-typeset .tabbed-set>input:nth-child(10).focus-visible~.tabbed-labels>:nth-child(10),.md-typeset .tabbed-set>input:nth-child(11).focus-visible~.tabbed-labels>:nth-child(11),.md-typeset .tabbed-set>input:nth-child(12).focus-visible~.tabbed-labels>:nth-child(12),.md-typeset .tabbed-set>input:nth-child(13).focus-visible~.tabbed-labels>:nth-child(13),.md-typeset .tabbed-set>input:nth-child(14).focus-visible~.tabbed-labels>:nth-child(14),.md-typeset .tabbed-set>input:nth-child(15).focus-visible~.tabbed-labels>:nth-child(15),.md-typeset .tabbed-set>input:nth-child(16).focus-visible~.tabbed-labels>:nth-child(16),.md-typeset .tabbed-set>input:nth-child(17).focus-visible~.tabbed-labels>:nth-child(17),.md-typeset .tabbed-set>input:nth-child(18).focus-visible~.tabbed-labels>:nth-child(18),.md-typeset .tabbed-set>input:nth-child(19).focus-visible~.tabbed-labels>:nth-child(19),.md-typeset .tabbed-set>input:nth-child(2).focus-visible~.tabbed-labels>:nth-child(2),.md-typeset .tabbed-set>input:nth-child(20).focus-visible~.tabbed-labels>:nth-child(20),.md-typeset .tabbed-set>input:nth-child(3).focus-visible~.tabbed-labels>:nth-child(3),.md-typeset .tabbed-set>input:nth-child(4).focus-visible~.tabbed-labels>:nth-child(4),.md-typeset .tabbed-set>input:nth-child(5).focus-visible~.tabbed-labels>:nth-child(5),.md-typeset .tabbed-set>input:nth-child(6).focus-visible~.tabbed-labels>:nth-child(6),.md-typeset .tabbed-set>input:nth-child(7).focus-visible~.tabbed-labels>:nth-child(7),.md-typeset .tabbed-set>input:nth-child(8).focus-visible~.tabbed-labels>:nth-child(8),.md-typeset .tabbed-set>input:nth-child(9).focus-visible~.tabbed-labels>:nth-child(9){background-color:var(--md-accent-fg-color--transparent)}.md-typeset .tabbed-set>input:first-child:checked~.tabbed-content>:first-child,.md-typeset .tabbed-set>input:nth-child(10):checked~.tabbed-content>:nth-child(10),.md-typeset .tabbed-set>input:nth-child(11):checked~.tabbed-content>:nth-child(11),.md-typeset .tabbed-set>input:nth-child(12):checked~.tabbed-content>:nth-child(12),.md-typeset .tabbed-set>input:nth-child(13):checked~.tabbed-content>:nth-child(13),.md-typeset .tabbed-set>input:nth-child(14):checked~.tabbed-content>:nth-child(14),.md-typeset .tabbed-set>input:nth-child(15):checked~.tabbed-content>:nth-child(15),.md-typeset .tabbed-set>input:nth-child(16):checked~.tabbed-content>:nth-child(16),.md-typeset .tabbed-set>input:nth-child(17):checked~.tabbed-content>:nth-child(17),.md-typeset .tabbed-set>input:nth-child(18):checked~.tabbed-content>:nth-child(18),.md-typeset .tabbed-set>input:nth-child(19):checked~.tabbed-content>:nth-child(19),.md-typeset .tabbed-set>input:nth-child(2):checked~.tabbed-content>:nth-child(2),.md-typeset .tabbed-set>input:nth-child(20):checked~.tabbed-content>:nth-child(20),.md-typeset .tabbed-set>input:nth-child(3):checked~.tabbed-content>:nth-child(3),.md-typeset .tabbed-set>input:nth-child(4):checked~.tabbed-content>:nth-child(4),.md-typeset .tabbed-set>input:nth-child(5):checked~.tabbed-content>:nth-child(5),.md-typeset .tabbed-set>input:nth-child(6):checked~.tabbed-content>:nth-child(6),.md-typeset .tabbed-set>input:nth-child(7):checked~.tabbed-content>:nth-child(7),.md-typeset .tabbed-set>input:nth-child(8):checked~.tabbed-content>:nth-child(8),.md-typeset .tabbed-set>input:nth-child(9):checked~.tabbed-content>:nth-child(9){display:block}:root{--md-tasklist-icon:url('data:image/svg+xml;charset=utf-8,');--md-tasklist-icon--checked:url('data:image/svg+xml;charset=utf-8,')}.md-typeset .task-list-item{list-style-type:none;position:relative}[dir=ltr] .md-typeset .task-list-item [type=checkbox]{left:-2em}[dir=rtl] .md-typeset .task-list-item [type=checkbox]{right:-2em}.md-typeset .task-list-item [type=checkbox]{position:absolute;top:.45em}.md-typeset .task-list-control [type=checkbox]{opacity:0;z-index:-1}[dir=ltr] .md-typeset .task-list-indicator:before{left:-1.5em}[dir=rtl] .md-typeset .task-list-indicator:before{right:-1.5em}.md-typeset .task-list-indicator:before{background-color:var(--md-default-fg-color--lightest);content:"";height:1.25em;-webkit-mask-image:var(--md-tasklist-icon);mask-image:var(--md-tasklist-icon);-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;position:absolute;top:.15em;width:1.25em}.md-typeset [type=checkbox]:checked+.task-list-indicator:before{background-color:#00e676;-webkit-mask-image:var(--md-tasklist-icon--checked);mask-image:var(--md-tasklist-icon--checked)}:root>*{--md-mermaid-font-family:var(--md-text-font-family),sans-serif;--md-mermaid-edge-color:var(--md-code-fg-color);--md-mermaid-node-bg-color:var(--md-accent-fg-color--transparent);--md-mermaid-node-fg-color:var(--md-accent-fg-color);--md-mermaid-label-bg-color:var(--md-default-bg-color);--md-mermaid-label-fg-color:var(--md-code-fg-color);--md-mermaid-sequence-actor-bg-color:var(--md-mermaid-label-bg-color);--md-mermaid-sequence-actor-fg-color:var(--md-mermaid-label-fg-color);--md-mermaid-sequence-actor-border-color:var(--md-mermaid-node-fg-color);--md-mermaid-sequence-actor-line-color:var(--md-default-fg-color--lighter);--md-mermaid-sequence-actorman-bg-color:var(--md-mermaid-label-bg-color);--md-mermaid-sequence-actorman-line-color:var(--md-mermaid-node-fg-color);--md-mermaid-sequence-box-bg-color:var(--md-mermaid-node-bg-color);--md-mermaid-sequence-box-fg-color:var(--md-mermaid-edge-color);--md-mermaid-sequence-label-bg-color:var(--md-mermaid-node-bg-color);--md-mermaid-sequence-label-fg-color:var(--md-mermaid-node-fg-color);--md-mermaid-sequence-loop-bg-color:var(--md-mermaid-node-bg-color);--md-mermaid-sequence-loop-fg-color:var(--md-mermaid-edge-color);--md-mermaid-sequence-loop-border-color:var(--md-mermaid-node-fg-color);--md-mermaid-sequence-message-fg-color:var(--md-mermaid-edge-color);--md-mermaid-sequence-message-line-color:var(--md-mermaid-edge-color);--md-mermaid-sequence-note-bg-color:var(--md-mermaid-label-bg-color);--md-mermaid-sequence-note-fg-color:var(--md-mermaid-edge-color);--md-mermaid-sequence-note-border-color:var(--md-mermaid-label-fg-color);--md-mermaid-sequence-number-bg-color:var(--md-mermaid-node-fg-color);--md-mermaid-sequence-number-fg-color:var(--md-accent-bg-color)}.mermaid{line-height:normal;margin:1em 0}@media screen and (min-width:45em){[dir=ltr] .md-typeset .inline{float:left}[dir=rtl] .md-typeset .inline{float:right}[dir=ltr] .md-typeset .inline{margin-right:.8rem}[dir=rtl] .md-typeset .inline{margin-left:.8rem}.md-typeset .inline{margin-bottom:.8rem;margin-top:0;width:11.7rem}[dir=ltr] .md-typeset .inline.end{float:right}[dir=rtl] .md-typeset .inline.end{float:left}[dir=ltr] .md-typeset .inline.end{margin-left:.8rem;margin-right:0}[dir=rtl] .md-typeset .inline.end{margin-left:0;margin-right:.8rem}} \ No newline at end of file diff --git a/devel/assets/stylesheets/main.30068a00.min.css.map b/devel/assets/stylesheets/main.30068a00.min.css.map new file mode 100644 index 000000000..23cd93fc3 --- /dev/null +++ b/devel/assets/stylesheets/main.30068a00.min.css.map @@ -0,0 +1 @@ +{"version":3,"sources":["src/assets/stylesheets/main/components/_meta.scss","../../../src/assets/stylesheets/main.scss","src/assets/stylesheets/main/_resets.scss","src/assets/stylesheets/main/_colors.scss","src/assets/stylesheets/main/_icons.scss","src/assets/stylesheets/main/_typeset.scss","src/assets/stylesheets/utilities/_break.scss","src/assets/stylesheets/main/components/_author.scss","src/assets/stylesheets/main/components/_banner.scss","src/assets/stylesheets/main/components/_base.scss","src/assets/stylesheets/main/components/_clipboard.scss","src/assets/stylesheets/main/components/_consent.scss","src/assets/stylesheets/main/components/_content.scss","src/assets/stylesheets/main/components/_dialog.scss","src/assets/stylesheets/main/components/_feedback.scss","src/assets/stylesheets/main/components/_footer.scss","src/assets/stylesheets/main/components/_form.scss","src/assets/stylesheets/main/components/_header.scss","node_modules/material-design-color/material-color.scss","src/assets/stylesheets/main/components/_nav.scss","src/assets/stylesheets/main/components/_pagination.scss","src/assets/stylesheets/main/components/_post.scss","src/assets/stylesheets/main/components/_search.scss","src/assets/stylesheets/main/components/_select.scss","src/assets/stylesheets/main/components/_sidebar.scss","src/assets/stylesheets/main/components/_source.scss","src/assets/stylesheets/main/components/_status.scss","src/assets/stylesheets/main/components/_tabs.scss","src/assets/stylesheets/main/components/_tag.scss","src/assets/stylesheets/main/components/_tooltip.scss","src/assets/stylesheets/main/components/_top.scss","src/assets/stylesheets/main/components/_version.scss","src/assets/stylesheets/main/extensions/markdown/_admonition.scss","src/assets/stylesheets/main/extensions/markdown/_footnotes.scss","src/assets/stylesheets/main/extensions/markdown/_toc.scss","src/assets/stylesheets/main/extensions/pymdownx/_arithmatex.scss","src/assets/stylesheets/main/extensions/pymdownx/_critic.scss","src/assets/stylesheets/main/extensions/pymdownx/_details.scss","src/assets/stylesheets/main/extensions/pymdownx/_emoji.scss","src/assets/stylesheets/main/extensions/pymdownx/_highlight.scss","src/assets/stylesheets/main/extensions/pymdownx/_keys.scss","src/assets/stylesheets/main/extensions/pymdownx/_tabbed.scss","src/assets/stylesheets/main/extensions/pymdownx/_tasklist.scss","src/assets/stylesheets/main/integrations/_mermaid.scss","src/assets/stylesheets/main/_modifiers.scss"],"names":[],"mappings":"AA0CE,gBC8xCF,CC5yCA,KAEE,6BAAA,CAAA,0BAAA,CAAA,qBAAA,CADA,qBDzBF,CC8BA,iBAGE,kBD3BF,CC8BE,gCANF,iBAOI,yBDzBF,CACF,CC6BA,KACE,QD1BF,CC8BA,qBAIE,uCD3BF,CC+BA,EACE,aAAA,CACA,oBD5BF,CCgCA,GAME,QAAA,CALA,kBAAA,CACA,aAAA,CACA,aAAA,CAEA,gBAAA,CADA,SD3BF,CCiCA,MACE,aD9BF,CCkCA,QAEE,eD/BF,CCmCA,IACE,iBDhCF,CCoCA,MAEE,uBAAA,CADA,gBDhCF,CCqCA,MAEE,eAAA,CACA,kBDlCF,CCsCA,OAKE,gBAAA,CACA,QAAA,CAHA,mBAAA,CACA,iBAAA,CAFA,QAAA,CADA,SD9BF,CCuCA,MACE,QAAA,CACA,YDpCF,CErDA,MAIE,6BAAA,CACA,oCAAA,CACA,mCAAA,CACA,0BAAA,CACA,sCAAA,CAGA,4BAAA,CACA,2CAAA,CACA,yBAAA,CACA,qCFmDF,CE7CA,+BAIE,kBF6CF,CE1CE,oHAEE,YF4CJ,CEnCA,qCAGE,+BAAA,CACA,sCAAA,CACA,wCAAA,CACA,yCAAA,CACA,0BAAA,CACA,sCAAA,CACA,wCAAA,CACA,yCAAA,CAGA,0BAAA,CACA,0BAAA,CAGA,0BAAA,CACA,mCAAA,CACA,iCAAA,CACA,kCAAA,CACA,mCAAA,CACA,mCAAA,CACA,kCAAA,CACA,iCAAA,CACA,+CAAA,CACA,6DAAA,CACA,gEAAA,CACA,4DAAA,CACA,4DAAA,CACA,6DAAA,CAGA,6CAAA,CAGA,+CAAA,CAGA,iCAAA,CAGA,gCAAA,CACA,gCAAA,CAGA,8BAAA,CACA,kCAAA,CACA,qCAAA,CAGA,kCAAA,CACA,gDAAA,CAGA,mDAAA,CACA,mDAAA,CAGA,+BAAA,CACA,0BAAA,CAGA,yBAAA,CACA,qCAAA,CACA,uCAAA,CACA,8BAAA,CACA,oCAAA,CAGA,8DAAA,CAKA,8DAAA,CAKA,0DFUF,CG7HE,aAIE,iBAAA,CAHA,aAAA,CAEA,aAAA,CADA,YHkIJ,CIvIA,KACE,kCAAA,CACA,iCAAA,CAGA,uGAAA,CAKA,mFJwIF,CIlIA,iBAIE,mCAAA,CACA,6BAAA,CAFA,sCJuIF,CIjIA,aAIE,4BAAA,CADA,sCJqIF,CI5HA,MACE,0NAAA,CACA,mNAAA,CACA,oNJ+HF,CIxHA,YAGE,gCAAA,CAAA,kBAAA,CAFA,eAAA,CACA,eJ4HF,CIvHE,aAPF,YAQI,gBJ0HF,CACF,CIvHE,uGAME,iBAAA,CAAA,cJyHJ,CIrHE,eAKE,uCAAA,CAHA,aAAA,CAEA,eAAA,CAHA,iBJ4HJ,CInHE,8BAPE,eAAA,CAGA,qBJ8HJ,CI1HE,eAEE,kBAAA,CAEA,eAAA,CAHA,oBJyHJ,CIjHE,eAEE,gBAAA,CACA,eAAA,CAEA,qBAAA,CADA,eAAA,CAHA,mBJuHJ,CI/GE,kBACE,eJiHJ,CI7GE,eAEE,eAAA,CACA,qBAAA,CAFA,YJiHJ,CI3GE,8BAKE,uCAAA,CAFA,cAAA,CACA,eAAA,CAEA,qBAAA,CAJA,eJiHJ,CIzGE,eACE,wBJ2GJ,CIvGE,eAGE,+DAAA,CAFA,iBAAA,CACA,cJ0GJ,CIrGE,cACE,+BAAA,CACA,qBJuGJ,CIpGI,mCAEE,sBJqGN,CIjGI,wCACE,+BJmGN,CIhGM,kDACE,uDJkGR,CI7FI,mBACE,kBAAA,CACA,iCJ+FN,CI3FI,4BACE,uCAAA,CACA,oBJ6FN,CIxFE,iDAIE,6BAAA,CACA,aAAA,CAFA,2BJ4FJ,CIvFI,aARF,iDASI,oBJ4FJ,CACF,CIxFE,iBAIE,wCAAA,CACA,mBAAA,CACA,kCAAA,CAAA,0BAAA,CAJA,eAAA,CADA,uBAAA,CAEA,qBJ6FJ,CIvFI,qCAEE,uCAAA,CADA,YJ0FN,CIpFE,gBAEE,iBAAA,CACA,eAAA,CAFA,iBJwFJ,CInFI,qBASE,kCAAA,CAAA,0BAAA,CADA,eAAA,CAPA,aAAA,CAEA,QAAA,CAIA,uCAAA,CAHA,aAAA,CAFA,oCAAA,CASA,yDAAA,CADA,oBAAA,CAJA,iBAAA,CADA,iBJ2FN,CIlFM,2BACE,+CJoFR,CIhFM,wCAEE,YAAA,CADA,WJmFR,CI9EM,8CACE,oDJgFR,CI7EQ,oDACE,0CJ+EV,CIxEE,gBAOE,4CAAA,CACA,mBAAA,CACA,mKACE,CANF,gCAAA,CAHA,oBAAA,CAEA,eAAA,CADA,uBAAA,CAIA,uBAAA,CADA,qBJ8EJ,CInEE,iBAGE,6CAAA,CACA,kCAAA,CAAA,0BAAA,CAHA,aAAA,CACA,qBJuEJ,CIjEE,iBAGE,6DAAA,CADA,WAAA,CADA,oBJqEJ,CIhEI,oBAGE,wEAQE,2CAAA,CACA,mBAAA,CACA,8BAAA,CAJA,gCAAA,CACA,mBAAA,CAFA,eAAA,CAHA,UAAA,CAEA,cAAA,CADA,mBAAA,CAFA,iBAAA,CACA,WJwEN,CACF,CI3DE,kBACE,WJ6DJ,CIzDE,oDAEE,qBJ2DJ,CI7DE,oDAEE,sBJ2DJ,CIvDE,iCACE,kBJ4DJ,CI7DE,iCACE,mBJ4DJ,CI7DE,iCAIE,2DJyDJ,CI7DE,iCAIE,4DJyDJ,CI7DE,uBAGE,uCAAA,CADA,aAAA,CAAA,cJ2DJ,CIrDE,eACE,oBJuDJ,CInDE,kDAGE,kBJqDJ,CIxDE,kDAGE,mBJqDJ,CIxDE,8BAEE,SJsDJ,CIlDI,0DACE,iBJqDN,CIjDI,oCACE,2BJoDN,CIjDM,0CACE,2BJoDR,CI/CI,wDACE,kBJmDN,CIpDI,wDACE,mBJmDN,CIpDI,oCAEE,kBJkDN,CI/CM,kGAEE,aJmDR,CI/CM,0DACE,eJkDR,CI9CM,4HAEE,kBJiDR,CInDM,4HAEE,mBJiDR,CInDM,oFACE,kBAAA,CAAA,eJkDR,CI3CE,yBAEE,mBJ6CJ,CI/CE,yBAEE,oBJ6CJ,CI/CE,eACE,mBAAA,CAAA,cJ8CJ,CIzCE,kDAIE,WAAA,CADA,cJ4CJ,CIpCI,4BAEE,oBJsCN,CIlCI,6BAEE,oBJoCN,CIhCI,kCACE,YJkCN,CI7BE,mBACE,iBAAA,CAGA,eAAA,CADA,cAAA,CAEA,iBAAA,CAHA,yBAAA,CAAA,sBAAA,CAAA,iBJkCJ,CI5BI,uBACE,aJ8BN,CIzBE,uBAGE,iBAAA,CADA,eAAA,CADA,eJ6BJ,CIvBE,mBACE,cJyBJ,CIrBE,+BAME,2CAAA,CACA,iDAAA,CACA,mBAAA,CAPA,oBAAA,CAGA,gBAAA,CAFA,cAAA,CACA,aAAA,CAEA,iBJ0BJ,CIpBI,aAXF,+BAYI,aJuBJ,CACF,CIlBI,iCACE,gBJoBN,CIbM,8FACE,YJeR,CIXM,4FACE,eJaR,CIRI,8FACE,eJUN,CIPM,kHACE,gBJSR,CIJI,kCAGE,eAAA,CAFA,cAAA,CACA,sBAAA,CAEA,kBJMN,CIFI,kCAGE,qDAAA,CAFA,sBAAA,CACA,kBJKN,CIAI,wCACE,iCJEN,CICM,8CACE,qDAAA,CACA,sDJCR,CIII,iCACE,iBJFN,CIOE,wCACE,cJLJ,CIQI,wDAIE,gBJAN,CIJI,wDAIE,iBJAN,CIJI,8CAME,UAAA,CALA,oBAAA,CAEA,YAAA,CAKA,oDAAA,CAAA,4CAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAHA,iCAAA,CAFA,0BAAA,CAHA,WJEN,CIUI,oDACE,oDJRN,CIYI,mEACE,kDAAA,CACA,yDAAA,CAAA,iDJVN,CIcI,oEACE,kDAAA,CACA,0DAAA,CAAA,kDJZN,CIiBE,wBACE,iBAAA,CACA,eAAA,CACA,iBJfJ,CImBE,mBACE,oBAAA,CAEA,kBAAA,CADA,eJhBJ,CIoBI,aANF,mBAOI,aJjBJ,CACF,CIoBI,8BACE,aAAA,CAEA,QAAA,CACA,eAAA,CAFA,UJhBN,CK/VI,wCD8XF,uBACE,iBJ3BF,CI8BE,4BACE,eJ5BJ,CACF,CM9hBE,uBAEE,aAAA,CACA,aAAA,CAEA,aAAA,CACA,eAAA,CALA,iBAAA,CAMA,sCACE,CAJF,YNmiBJ,CM3hBI,2BAEE,kBAAA,CADA,aN8hBN,CMzhBI,6BAME,+CAAA,CAFA,yCAAA,CAHA,eAAA,CACA,eAAA,CACA,kBAAA,CAEA,iBN4hBN,CMvhBI,6BAEE,aAAA,CADA,YN0hBN,CMphBE,wBACE,kBNshBJ,CMnhBI,4BACE,mCAAA,CACA,uBNqhBN,CMjhBI,4DAEE,oBAAA,CADA,SNohBN,CMhhBM,oEACE,mBNkhBR,COxkBA,WAGE,0CAAA,CADA,+BAAA,CADA,aP6kBF,COxkBE,aANF,WAOI,YP2kBF,CACF,COxkBE,oBAEE,2CAAA,CADA,gCP2kBJ,COtkBE,kBAGE,eAAA,CADA,iBAAA,CADA,eP0kBJ,COpkBE,6BACE,WPykBJ,CO1kBE,6BACE,UPykBJ,CO1kBE,mBAEE,aAAA,CACA,cAAA,CACA,uBPskBJ,COnkBI,0BACE,YPqkBN,COjkBI,yBACE,UPmkBN,CQxmBA,KASE,cAAA,CARA,WAAA,CACA,iBR4mBF,CKxcI,oCGtKJ,KAaI,gBRqmBF,CACF,CK7cI,oCGtKJ,KAkBI,cRqmBF,CACF,CQhmBA,KASE,2CAAA,CAPA,YAAA,CACA,qBAAA,CAKA,eAAA,CAHA,eAAA,CAJA,iBAAA,CAGA,URsmBF,CQ9lBE,aAZF,KAaI,aRimBF,CACF,CK9cI,wCGhJF,yBAII,cR8lBJ,CACF,CQrlBA,SAEE,gBAAA,CAAA,iBAAA,CADA,eRylBF,CQplBA,cACE,YAAA,CACA,qBAAA,CACA,WRulBF,CQplBE,aANF,cAOI,aRulBF,CACF,CQnlBA,SACE,WRslBF,CQnlBE,gBACE,YAAA,CACA,WAAA,CACA,iBRqlBJ,CQhlBA,aACE,eAAA,CACA,sBRmlBF,CQ1kBA,WACE,YR6kBF,CQxkBA,WAGE,QAAA,CACA,SAAA,CAHA,iBAAA,CACA,OR6kBF,CQxkBE,uCACE,aR0kBJ,CQtkBE,+BAEE,uCAAA,CADA,kBRykBJ,CQnkBA,SASE,2CAAA,CACA,mBAAA,CAFA,gCAAA,CADA,gBAAA,CADA,YAAA,CAMA,SAAA,CADA,uCAAA,CANA,mBAAA,CAJA,cAAA,CAYA,2BAAA,CATA,UR6kBF,CQjkBE,eAEE,SAAA,CAIA,uBAAA,CAHA,oEACE,CAHF,URskBJ,CQxjBA,MACE,WR2jBF,CSptBA,MACE,+PTstBF,CShtBA,cASE,mBAAA,CAFA,0CAAA,CACA,cAAA,CAFA,YAAA,CAIA,uCAAA,CACA,oBAAA,CAVA,iBAAA,CAEA,UAAA,CADA,QAAA,CAUA,qBAAA,CAPA,WAAA,CADA,ST2tBF,CShtBE,aAfF,cAgBI,YTmtBF,CACF,CShtBE,kCAEE,uCAAA,CADA,YTmtBJ,CS9sBE,qBACE,uCTgtBJ,CS5sBE,wCACE,+BT8sBJ,CSzsBE,oBAME,6BAAA,CADA,UAAA,CAJA,aAAA,CAEA,cAAA,CACA,aAAA,CAGA,2CAAA,CAAA,mCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CARA,aTmtBJ,CSvsBE,sBACE,cTysBJ,CStsBI,2BACE,2CTwsBN,CSlsBI,kEAEE,uDAAA,CADA,+BTqsBN,CU3wBA,mBACE,GACE,SAAA,CACA,0BV8wBF,CU3wBA,GACE,SAAA,CACA,uBV6wBF,CACF,CUzwBA,mBACE,GACE,SV2wBF,CUxwBA,GACE,SV0wBF,CACF,CU/vBE,qBASE,2BAAA,CADA,mCAAA,CAAA,2BAAA,CAFA,0BAAA,CADA,WAAA,CAEA,SAAA,CANA,cAAA,CACA,KAAA,CAEA,UAAA,CADA,SVuwBJ,CU7vBE,mBAcE,mDAAA,CANA,2CAAA,CACA,QAAA,CACA,mBAAA,CARA,QAAA,CASA,kDACE,CAPF,eAAA,CAEA,aAAA,CADA,SAAA,CALA,cAAA,CAGA,UAAA,CADA,SVwwBJ,CUzvBE,kBACE,aV2vBJ,CUvvBE,sBACE,YAAA,CACA,YVyvBJ,CUtvBI,oCACE,aVwvBN,CUnvBE,sBACE,mBVqvBJ,CUlvBI,6CACE,cVovBN,CK9oBI,wCKvGA,6CAKI,aAAA,CAEA,gBAAA,CACA,iBAAA,CAFA,UVsvBN,CACF,CU/uBE,kBACE,cVivBJ,CWl1BA,YACE,WAAA,CAIA,WXk1BF,CW/0BE,mBAEE,qBAAA,CADA,iBXk1BJ,CKrrBI,sCMtJE,4EACE,kBX80BN,CW10BI,0JACE,mBX40BN,CW70BI,8EACE,kBX40BN,CACF,CWv0BI,0BAGE,UAAA,CAFA,aAAA,CACA,YX00BN,CWr0BI,+BACE,eXu0BN,CWj0BE,8BACE,WXs0BJ,CWv0BE,8BACE,UXs0BJ,CWv0BE,8BAIE,iBXm0BJ,CWv0BE,8BAIE,kBXm0BJ,CWv0BE,oBAGE,cAAA,CADA,SXq0BJ,CWh0BI,aAPF,oBAQI,YXm0BJ,CACF,CWh0BI,gCACE,yCXk0BN,CW9zBI,wBACE,cAAA,CACA,kBXg0BN,CW7zBM,kCACE,oBX+zBR,CYh4BA,qBAeE,WZi4BF,CYh5BA,qBAeE,UZi4BF,CYh5BA,WAOE,2CAAA,CACA,mBAAA,CANA,YAAA,CAOA,8BAAA,CALA,iBAAA,CAMA,SAAA,CALA,mBAAA,CACA,mBAAA,CALA,cAAA,CAaA,0BAAA,CAHA,wCACE,CATF,SZ64BF,CY93BE,aAlBF,WAmBI,YZi4BF,CACF,CY93BE,mBAEE,SAAA,CADA,mBAAA,CAKA,uBAAA,CAHA,kEZi4BJ,CY13BE,kBAEE,gCAAA,CADA,eZ63BJ,Ca/5BA,aACE,gBAAA,CACA,iBbk6BF,Ca/5BE,sBAGE,WAAA,CADA,QAAA,CADA,Sbm6BJ,Ca75BE,oBAEE,eAAA,CADA,ebg6BJ,Ca35BE,oBACE,iBb65BJ,Caz5BE,mBAIE,sBAAA,CAFA,YAAA,CACA,cAAA,CAEA,sBAAA,CAJA,iBb+5BJ,Cax5BI,iDACE,yCb05BN,Cat5BI,6BACE,iBbw5BN,Can5BE,mBAGE,uCAAA,CACA,cAAA,CAHA,aAAA,CACA,cAAA,CAGA,sBbq5BJ,Cal5BI,gDACE,+Bbo5BN,Cah5BI,4BACE,0CAAA,CACA,mBbk5BN,Ca74BE,mBAEE,SAAA,CADA,iBAAA,CAKA,2BAAA,CAHA,8Dbg5BJ,Ca14BI,qBAEE,aAAA,CADA,eb64BN,Cax4BI,6BACE,SAAA,CACA,uBb04BN,Ccz9BA,WAEE,0CAAA,CADA,+Bd69BF,Ccz9BE,aALF,WAMI,Yd49BF,CACF,Ccz9BE,kBACE,6BAAA,CAEA,aAAA,CADA,ad49BJ,Ccx9BI,gCACE,Yd09BN,Ccr9BE,iBAOE,eAAA,CANA,YAAA,CAKA,cAAA,CAGA,mBAAA,CAAA,eAAA,CADA,cAAA,CAGA,uCAAA,CADA,eAAA,CAEA,uBdm9BJ,Cch9BI,8CACE,Udk9BN,Cc98BI,+BACE,oBdg9BN,CKl0BI,wCSvIE,uBACE,ad48BN,Ccz8BO,yCACC,Yd28BR,CACF,Cct8BI,iCACE,gBdy8BN,Cc18BI,iCACE,iBdy8BN,Cc18BI,uBAEE,gBdw8BN,Ccr8BM,iCACE,edu8BR,Ccj8BE,kBACE,WAAA,CAIA,eAAA,CADA,mBAAA,CAFA,6BAAA,CACA,cAAA,CAGA,kBdm8BJ,Cc/7BE,mBAEE,YAAA,CADA,adk8BJ,Cc77BE,sBACE,gBAAA,CACA,Ud+7BJ,Cc17BA,gBACE,gDd67BF,Cc17BE,uBACE,YAAA,CACA,cAAA,CACA,6BAAA,CACA,ad47BJ,Ccx7BE,kCACE,sCd07BJ,Ccv7BI,gFACE,+Bdy7BN,Ccj7BA,cAKE,wCAAA,CADA,gBAAA,CADA,iBAAA,CADA,eAAA,CADA,Udw7BF,CK54BI,mCS7CJ,cASI,Udo7BF,CACF,Cch7BE,yBACE,sCdk7BJ,Cc36BA,WACE,mBAAA,CACA,SAAA,CAEA,cAAA,CADA,qBd+6BF,CK35BI,mCSvBJ,WAQI,ed86BF,CACF,Cc36BE,iBACE,oBAAA,CAEA,aAAA,CACA,iBAAA,CAFA,Yd+6BJ,Cc16BI,wBACE,ed46BN,Ccx6BI,qBAGE,iBAAA,CAFA,gBAAA,CACA,mBd26BN,CejlCE,uBAME,kBAAA,CACA,mBAAA,CAHA,gCAAA,CACA,cAAA,CAJA,oBAAA,CAEA,eAAA,CADA,kBAAA,CAMA,gEfolCJ,Ce9kCI,gCAEE,2CAAA,CACA,uCAAA,CAFA,gCfklCN,Ce5kCI,0DAEE,0CAAA,CACA,sCAAA,CAFA,+BfglCN,CezkCE,gCAKE,4Bf8kCJ,CenlCE,gEAME,6Bf6kCJ,CenlCE,gCAME,4Bf6kCJ,CenlCE,sBAIE,6DAAA,CAGA,8BAAA,CAJA,eAAA,CAFA,aAAA,CACA,eAAA,CAMA,sCf2kCJ,CetkCI,wDACE,6CAAA,CACA,8BfwkCN,CepkCI,+BACE,UfskCN,CgBznCA,WAOE,2CAAA,CAGA,8CACE,CALF,gCAAA,CADA,aAAA,CAHA,MAAA,CADA,eAAA,CACA,OAAA,CACA,KAAA,CACA,ShBgoCF,CgBrnCE,aAfF,WAgBI,YhBwnCF,CACF,CgBrnCE,mBAIE,2BAAA,CAHA,iEhBwnCJ,CgBjnCE,mBACE,kDACE,CAEF,kEhBinCJ,CgB3mCE,kBAEE,kBAAA,CADA,YAAA,CAEA,ehB6mCJ,CgBzmCE,mBAKE,kBAAA,CAEA,cAAA,CAHA,YAAA,CAIA,uCAAA,CALA,aAAA,CAFA,iBAAA,CAQA,uBAAA,CAHA,qBAAA,CAJA,ShBknCJ,CgBxmCI,yBACE,UhB0mCN,CgBtmCI,iCACE,oBhBwmCN,CgBpmCI,uCAEE,uCAAA,CADA,YhBumCN,CgBlmCI,2BAEE,YAAA,CADA,ahBqmCN,CKv/BI,wCW/GA,2BAMI,YhBomCN,CACF,CgBjmCM,8DAIE,iBAAA,CAHA,aAAA,CAEA,aAAA,CADA,UhBqmCR,CKrhCI,mCWzEA,iCAII,YhB8lCN,CACF,CgB3lCM,wCACE,YhB6lCR,CgBzlCM,+CACE,oBhB2lCR,CKhiCI,sCWtDA,iCAII,YhBslCN,CACF,CgBjlCE,kBAEE,YAAA,CACA,cAAA,CAFA,iBAAA,CAIA,8DACE,CAFF,kBhBolCJ,CgB9kCI,oCAGE,SAAA,CADA,mBAAA,CAKA,6BAAA,CAHA,8DACE,CAJF,UhBolCN,CgB3kCM,8CACE,8BhB6kCR,CgBxkCI,8BACE,ehB0kCN,CgBrkCE,4BAGE,gBhB0kCJ,CgB7kCE,4BAGE,iBhB0kCJ,CgB7kCE,4BAIE,kBhBykCJ,CgB7kCE,4BAIE,iBhBykCJ,CgB7kCE,kBACE,WAAA,CAIA,eAAA,CAHA,aAAA,CAIA,kBhBukCJ,CgBpkCI,4CAGE,SAAA,CADA,mBAAA,CAKA,8BAAA,CAHA,8DACE,CAJF,UhB0kCN,CgBjkCM,sDACE,6BhBmkCR,CgB/jCM,8DAGE,SAAA,CADA,mBAAA,CAKA,uBAAA,CAHA,8DACE,CAJF,ShBqkCR,CgB1jCI,uCAGE,WAAA,CAFA,iBAAA,CACA,UhB6jCN,CgBvjCE,mBACE,YAAA,CACA,aAAA,CACA,cAAA,CAEA,+CACE,CAFF,kBhB0jCJ,CgBpjCI,8DACE,WAAA,CACA,SAAA,CACA,oChBsjCN,CgB7iCI,yBACE,QhB+iCN,CgB1iCE,mBACE,YhB4iCJ,CKzmCI,mCW4DF,6BAQI,gBhB4iCJ,CgBpjCA,6BAQI,iBhB4iCJ,CgBpjCA,mBAKI,aAAA,CAEA,iBAAA,CADA,ahB8iCJ,CACF,CKjnCI,sCW4DF,6BAaI,kBhB4iCJ,CgBzjCA,6BAaI,mBhB4iCJ,CACF,CD5xCA,SAGE,uCAAA,CAFA,eAAA,CACA,eCgyCF,CD5xCE,eACE,mBAAA,CACA,cAAA,CAGA,eAAA,CADA,QAAA,CADA,SCgyCJ,CD1xCE,sCAEE,WAAA,CADA,iBAAA,CAAA,kBC6xCJ,CDxxCE,eACE,+BC0xCJ,CDvxCI,0CACE,+BCyxCN,CDnxCA,UAKE,wBkBaa,ClBZb,oBAAA,CAFA,UAAA,CAHA,oBAAA,CAEA,eAAA,CADA,0BAAA,CAAA,2BC0xCF,CkB5zCA,MACE,0MAAA,CACA,gMAAA,CACA,yNlB+zCF,CkBzzCA,QACE,eAAA,CACA,elB4zCF,CkBzzCE,eAKE,uCAAA,CAJA,aAAA,CAGA,eAAA,CADA,eAAA,CADA,eAAA,CAIA,sBlB2zCJ,CkBxzCI,+BACE,YlB0zCN,CkBvzCM,mCAEE,WAAA,CADA,UlB0zCR,CkBlzCQ,sFAME,iBAAA,CALA,aAAA,CAGA,aAAA,CADA,cAAA,CAEA,kBAAA,CAHA,UlBwzCV,CkB7yCE,cAGE,eAAA,CADA,QAAA,CADA,SlBizCJ,CkB3yCE,cACE,elB6yCJ,CkB1yCI,sCACE,elB4yCN,CkB7yCI,sCACE,clB4yCN,CkBvyCE,cAEE,sBAAA,CADA,YAAA,CAEA,iBAAA,CAEA,uBAAA,CADA,sBlB0yCJ,CkBtyCI,sBACE,uClBwyCN,CkBjyCM,6EAEE,+BlBmyCR,CkB9xCI,2BAIE,iBlB6xCN,CkBzxCI,kCACE,gBlB2xCN,CkBvxCI,kBAGE,iBAAA,CAFA,aAAA,CACA,YlB0xCN,CkBtxCM,8BACE,iBlBwxCR,CkBzxCM,8BACE,kBlBwxCR,CkBnxCI,wFACE,+BAAA,CACA,clBqxCN,CkBjxCI,4BACE,uCAAA,CACA,oBlBmxCN,CkB/wCI,0CACE,YlBixCN,CkB9wCM,yDAKE,6BAAA,CAJA,aAAA,CAEA,WAAA,CACA,qCAAA,CAAA,6BAAA,CAFA,UlBmxCR,CkB5wCM,kDACE,YlB8wCR,CkBxwCE,iCACE,YlB0wCJ,CkBvwCI,6CACE,WlBywCN,CkBpwCE,cACE,alBswCJ,CkBlwCE,gBACE,YlBowCJ,CK7uCI,wCahBA,0CASE,2CAAA,CAHA,YAAA,CACA,qBAAA,CACA,WAAA,CALA,MAAA,CADA,iBAAA,CACA,OAAA,CACA,KAAA,CACA,SlBmwCJ,CkBxvCI,+DACE,eAAA,CACA,elB0vCN,CkBtvCI,gCAQE,qDAAA,CAHA,uCAAA,CAEA,cAAA,CALA,aAAA,CAEA,kBAAA,CADA,wBAAA,CAFA,iBAAA,CAKA,kBlB0vCN,CkBrvCM,wDAGE,UlB2vCR,CkB9vCM,wDAGE,WlB2vCR,CkB9vCM,8CAIE,aAAA,CAEA,aAAA,CACA,YAAA,CANA,iBAAA,CACA,SAAA,CAGA,YlByvCR,CkBpvCQ,oDAKE,6BAAA,CADA,UAAA,CAHA,aAAA,CAEA,WAAA,CAGA,2CAAA,CAAA,mCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAPA,UlB6vCV,CkBjvCM,8CAGE,2CAAA,CACA,gEACE,CAJF,eAAA,CAKA,4BAAA,CAJA,kBlBsvCR,CkB/uCQ,2DACE,YlBivCV,CkB5uCM,8CAGE,2CAAA,CADA,gCAAA,CADA,elBgvCR,CkB1uCM,yCAIE,aAAA,CAFA,UAAA,CAIA,YAAA,CADA,aAAA,CAJA,iBAAA,CACA,WAAA,CACA,SlB+uCR,CkBvuCI,+BACE,MlByuCN,CkBruCI,+BAEE,4DAAA,CADA,SlBwuCN,CkBpuCM,qDACE,+BlBsuCR,CkBnuCQ,sHACE,+BlBquCV,CkB/tCI,+BAEE,YAAA,CADA,mBlBkuCN,CkB9tCM,mCACE,elBguCR,CkB5tCM,6CACE,SlB8tCR,CkB1tCM,uDAGE,mBlB6tCR,CkBhuCM,uDAGE,kBlB6tCR,CkBhuCM,6CAIE,gBAAA,CAFA,aAAA,CADA,YlB+tCR,CkBztCQ,mDAKE,6BAAA,CADA,UAAA,CAHA,aAAA,CAEA,WAAA,CAGA,2CAAA,CAAA,mCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAPA,UlBkuCV,CkBltCM,+CACE,mBlBotCR,CkB5sCM,4CAEE,wBAAA,CADA,elB+sCR,CkB3sCQ,oEACE,mBlB6sCV,CkB9sCQ,oEACE,oBlB6sCV,CkBzsCQ,4EACE,iBlB2sCV,CkB5sCQ,4EACE,kBlB2sCV,CkBvsCQ,oFACE,mBlBysCV,CkB1sCQ,oFACE,oBlBysCV,CkBrsCQ,4FACE,mBlBusCV,CkBxsCQ,4FACE,oBlBusCV,CkBhsCE,mBACE,wBlBksCJ,CkB9rCE,wBACE,YAAA,CACA,SAAA,CAIA,0BAAA,CAHA,oElBisCJ,CkB3rCI,kCACE,2BlB6rCN,CkBxrCE,gCACE,SAAA,CAIA,uBAAA,CAHA,qElB2rCJ,CkBrrCI,8CAEE,kCAAA,CAAA,0BlBsrCN,CACF,CKj4CI,wCamNA,0CACE,YlBirCJ,CkB9qCI,yDACE,UlBgrCN,CkB5qCI,wDACE,YlB8qCN,CkB1qCI,kDACE,YlB4qCN,CkBvqCE,gBAIE,iDAAA,CADA,gCAAA,CAFA,aAAA,CACA,elB2qCJ,CACF,CK97CM,6Da4RF,6CACE,YlBqqCJ,CkBlqCI,4DACE,UlBoqCN,CkBhqCI,2DACE,YlBkqCN,CkB9pCI,qDACE,YlBgqCN,CACF,CKt7CI,mCa8RA,kCAME,qCAAA,CACA,qDAAA,CANA,eAAA,CACA,KAAA,CAGA,SlB2pCJ,CkBtpCI,6CACE,uBlBwpCN,CkBppCI,gDACE,YlBspCN,CACF,CKr8CI,sCa7JJ,QAkdI,oDlBopCF,CkBjpCE,gCAME,qCAAA,CACA,qDAAA,CANA,eAAA,CACA,KAAA,CAGA,SlBmpCJ,CkB9oCI,8CACE,uBlBgpCN,CkBtoCE,sEACE,YlB2oCJ,CkBvoCE,sEACE,alByoCJ,CkBroCE,6CACE,YlBuoCJ,CkBnoCE,uBACE,aAAA,CACA,elBqoCJ,CkBloCI,kCACE,elBooCN,CkBhoCI,qCACE,elBkoCN,CkB/nCM,0CACE,uClBioCR,CkB7nCM,6DACE,mBlB+nCR,CkB3nCM,mDACE,YlB6nCR,CkBxnCI,+BACE,alB0nCN,CkBvnCM,2DACE,SlBynCR,CkBnnCE,cAGE,kBAAA,CADA,YAAA,CAEA,gCAAA,CAHA,WlBwnCJ,CkBlnCI,oBACE,uDlBonCN,CkBhnCI,oBAME,6BAAA,CACA,kBAAA,CAFA,UAAA,CAJA,oBAAA,CAEA,WAAA,CAMA,2CAAA,CAAA,mCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAJA,yBAAA,CAJA,qBAAA,CAFA,UlB4nCN,CkB/mCM,8BACE,wBlBinCR,CkB7mCM,sKAEE,uBlB8mCR,CkB/lCI,+HACE,YlBqmCN,CkBlmCM,oDACE,aAAA,CACA,SlBomCR,CkBjmCQ,kEAOE,qCAAA,CACA,qDAAA,CAFA,eAAA,CADA,YAAA,CADA,eAAA,CAHA,eAAA,CACA,KAAA,CACA,SlBwmCV,CkBhmCU,0FACE,mBlBkmCZ,CkB9lCU,gFACE,YlBgmCZ,CkB1lCM,kDACE,uClB4lCR,CkBtlCI,2CACE,alBwlCN,CkBrlCM,iFACE,mBlBulCR,CkBxlCM,iFACE,kBlBulCR,CkB9kCI,mFACE,elBglCN,CkB7kCM,iGACE,SlB+kCR,CkB1kCI,qFAGE,mDlB4kCN,CkB/kCI,qFAGE,oDlB4kCN,CkB/kCI,2EACE,aAAA,CACA,oBlB6kCN,CkBzkCM,0FACE,YlB2kCR,CACF,CmB9uDA,eAKE,eAAA,CACA,eAAA,CAJA,SnBqvDF,CmB9uDE,gCANA,kBAAA,CAFA,YAAA,CAGA,sBnB4vDF,CmBvvDE,iBAOE,mBAAA,CAFA,aAAA,CADA,gBAAA,CAEA,iBnBivDJ,CmB5uDE,wBAEE,qDAAA,CADA,uCnB+uDJ,CmB1uDE,qBACE,6CnB4uDJ,CmBvuDI,sDAEE,uDAAA,CADA,+BnB0uDN,CmBtuDM,8DACE,+BnBwuDR,CmBnuDI,mCACE,uCAAA,CACA,oBnBquDN,CmBjuDI,yBAKE,iBAAA,CADA,yCAAA,CAHA,aAAA,CAEA,eAAA,CADA,YnBsuDN,CoBtxDE,eAGE,+DAAA,CADA,oBAAA,CADA,qBpB2xDJ,CKtmDI,wCetLF,eAOI,YpByxDJ,CACF,CoBnxDM,6BACE,oBpBqxDR,CoB/wDE,kBACE,YAAA,CACA,qBAAA,CACA,SAAA,CACA,cpBixDJ,CoB1wDI,0BACE,sBpB4wDN,CoBzwDM,gEACE,+BpB2wDR,CoBrwDE,kBACE,oBpBuwDJ,CoBpwDI,mCAGE,kBAAA,CAFA,YAAA,CACA,SAAA,CAEA,iBpBswDN,CoBlwDI,oCAIE,kBAAA,CAHA,mBAAA,CACA,kBAAA,CACA,SAAA,CAGA,QAAA,CADA,iBpBqwDN,CoBhwDI,0DACE,kBpBkwDN,CoBnwDI,0DACE,iBpBkwDN,CoB9vDI,iDACE,uBAAA,CAEA,YpB+vDN,CoB1vDE,uEAEE,YpB4vDJ,CoBrvDA,YAGE,kBAAA,CAFA,YAAA,CAIA,eAAA,CAHA,SAAA,CAIA,eAAA,CAFA,UpB0vDF,CoBrvDE,yBACE,WpBuvDJ,CoBhvDA,kBACE,YpBmvDF,CKtqDI,wCe9EJ,kBAKI,wBpBmvDF,CACF,CoBhvDE,qCACE,WpBkvDJ,CKjsDI,sCelDF,+CAKI,kBpBkvDJ,CoBvvDA,+CAKI,mBpBkvDJ,CACF,CKnrDI,wCe1DJ,6BAII,SpB8uDF,CACF,CqBl3DA,MACE,igBrBq3DF,CqB/2DA,WACE,iBrBk3DF,CKptDI,mCgB/JJ,WAKI,erBk3DF,CACF,CqB/2DE,kBACE,YrBi3DJ,CqB72DE,oBAEE,SAAA,CADA,SrBg3DJ,CK7sDI,wCgBpKF,8BAkBI,YrB62DJ,CqB/3DA,8BAkBI,arB62DJ,CqB/3DA,oBAYI,2CAAA,CACA,kBAAA,CAJA,WAAA,CACA,eAAA,CACA,mBAAA,CALA,iBAAA,CACA,SAAA,CAUA,uBAAA,CAHA,4CACE,CAPF,UrBu3DJ,CqB12DI,+DACE,SAAA,CACA,oCrB42DN,CACF,CKnvDI,mCgBjJF,8BAyCI,MrBs2DJ,CqB/4DA,8BAyCI,OrBs2DJ,CqB/4DA,oBAoCI,0BAAA,CADA,cAAA,CADA,QAAA,CAHA,cAAA,CACA,KAAA,CAKA,sDACE,CALF,OrB82DJ,CqBn2DI,+DAME,YAAA,CACA,SAAA,CACA,4CACE,CARF,UrBw2DN,CACF,CKlvDI,wCgBxGA,+DAII,mBrB01DN,CACF,CKhyDM,6DgB/DF,+DASI,mBrB01DN,CACF,CKryDM,6DgB/DF,+DAcI,mBrB01DN,CACF,CqBr1DE,kBAEE,kCAAA,CAAA,0BrBs1DJ,CKpwDI,wCgBpFF,4BAmBI,MrBk1DJ,CqBr2DA,4BAmBI,OrBk1DJ,CqBr2DA,kBAUI,QAAA,CAEA,SAAA,CADA,eAAA,CALA,cAAA,CACA,KAAA,CAWA,wBAAA,CALA,qGACE,CALF,OAAA,CADA,SrB61DJ,CqB/0DI,4BACE,yBrBi1DN,CqB70DI,6DAEE,WAAA,CACA,SAAA,CAMA,uBAAA,CALA,sGACE,CAJF,UrBm1DN,CACF,CK/yDI,mCgBjEF,4BA2CI,WrB60DJ,CqBx3DA,4BA2CI,UrB60DJ,CqBx3DA,kBA6CI,eAAA,CAHA,iBAAA,CAIA,8CAAA,CAFA,arB40DJ,CACF,CK90DM,6DgBOF,6DAII,arBu0DN,CACF,CK7zDI,sCgBfA,6DASI,arBu0DN,CACF,CqBl0DE,iBAIE,2CAAA,CACA,0BAAA,CAFA,aAAA,CAFA,iBAAA,CAKA,2CACE,CALF,SrBw0DJ,CK10DI,mCgBAF,iBAaI,0BAAA,CACA,mBAAA,CAFA,arBo0DJ,CqB/zDI,uBACE,0BrBi0DN,CACF,CqB7zDI,4DAEE,2CAAA,CACA,6BAAA,CACA,8BAAA,CAHA,gCrBk0DN,CqB1zDE,4BAKE,mBAAA,CAAA,oBrB+zDJ,CqBp0DE,4BAKE,mBAAA,CAAA,oBrB+zDJ,CqBp0DE,kBAQE,gBAAA,CAFA,eAAA,CAFA,WAAA,CAHA,iBAAA,CAMA,sBAAA,CAJA,UAAA,CADA,SrBk0DJ,CqBzzDI,+BACE,qBrB2zDN,CqBvzDI,kEAEE,uCrBwzDN,CqBpzDI,6BACE,YrBszDN,CK11DI,wCgBaF,kBA8BI,eAAA,CADA,aAAA,CADA,UrBuzDJ,CACF,CKp3DI,mCgBgCF,4BAmCI,mBrBuzDJ,CqB11DA,4BAmCI,oBrBuzDJ,CqB11DA,kBAqCI,aAAA,CADA,erBszDJ,CqBlzDI,+BACE,uCrBozDN,CqBhzDI,mCACE,gCrBkzDN,CqB9yDI,6DACE,kBrBgzDN,CqB7yDM,8EACE,uCrB+yDR,CqB3yDM,0EACE,WrB6yDR,CACF,CqBvyDE,iBAIE,cAAA,CAHA,oBAAA,CAEA,aAAA,CAEA,kCACE,CAJF,YrB4yDJ,CqBpyDI,uBACE,UrBsyDN,CqBlyDI,yCAGE,UrBqyDN,CqBxyDI,yCAGE,WrBqyDN,CqBxyDI,+BACE,iBAAA,CACA,SAAA,CAEA,SrBoyDN,CqBjyDM,6CACE,oBrBmyDR,CK14DI,wCgB+FA,yCAcI,UrBkyDN,CqBhzDE,yCAcI,WrBkyDN,CqBhzDE,+BAaI,SrBmyDN,CqB/xDM,+CACE,YrBiyDR,CACF,CKt6DI,mCgBkHA,+BAwBI,mBrBgyDN,CqB7xDM,8CACE,YrB+xDR,CACF,CqBzxDE,8BAGE,WrB6xDJ,CqBhyDE,8BAGE,UrB6xDJ,CqBhyDE,oBAKE,mBAAA,CAJA,iBAAA,CACA,SAAA,CAEA,SrB4xDJ,CKl6DI,wCgBkIF,8BAUI,WrB2xDJ,CqBryDA,8BAUI,UrB2xDJ,CqBryDA,oBASI,SrB4xDJ,CACF,CqBxxDI,uCACE,iBrB8xDN,CqB/xDI,uCACE,kBrB8xDN,CqB/xDI,6BAEE,uCAAA,CACA,SAAA,CAIA,oBAAA,CAHA,+DrB2xDN,CqBrxDM,iDAEE,uCAAA,CADA,YrBwxDR,CqBnxDM,gGAGE,SAAA,CADA,mBAAA,CAEA,kBrBoxDR,CqBjxDQ,sGACE,UrBmxDV,CqB5wDE,8BAOE,mBAAA,CAAA,oBrBmxDJ,CqB1xDE,8BAOE,mBAAA,CAAA,oBrBmxDJ,CqB1xDE,oBAIE,kBAAA,CAKA,yCAAA,CANA,YAAA,CAKA,eAAA,CAFA,WAAA,CAKA,SAAA,CAVA,iBAAA,CACA,KAAA,CAUA,uBAAA,CAFA,kBAAA,CALA,UrBqxDJ,CK59DI,mCgBkMF,8BAgBI,mBrB+wDJ,CqB/xDA,8BAgBI,oBrB+wDJ,CqB/xDA,oBAiBI,erB8wDJ,CACF,CqB3wDI,+DACE,SAAA,CACA,0BrB6wDN,CqBxwDE,6BAKE,+BrB2wDJ,CqBhxDE,0DAME,gCrB0wDJ,CqBhxDE,6BAME,+BrB0wDJ,CqBhxDE,mBAIE,eAAA,CAHA,iBAAA,CAEA,UAAA,CADA,SrB8wDJ,CK39DI,wCgB2MF,mBAWI,QAAA,CADA,UrB2wDJ,CACF,CKp/DI,mCgB8NF,mBAiBI,SAAA,CADA,UAAA,CAEA,sBrB0wDJ,CqBvwDI,8DACE,8BAAA,CACA,SrBywDN,CACF,CqBpwDE,uBASE,kCAAA,CAAA,0BAAA,CAFA,2CAAA,CANA,WAAA,CACA,eAAA,CAIA,kBrBqwDJ,CqB/vDI,iEAZF,uBAaI,uBrBkwDJ,CACF,CKjiEM,6DgBiRJ,uBAkBI,arBkwDJ,CACF,CKhhEI,sCgB2PF,uBAuBI,arBkwDJ,CACF,CKrhEI,mCgB2PF,uBA4BI,YAAA,CAEA,yDAAA,CADA,oBrBmwDJ,CqB/vDI,kEACE,erBiwDN,CqB7vDI,6BACE,+CrB+vDN,CqB3vDI,0CAEE,YAAA,CADA,WrB8vDN,CqBzvDI,gDACE,oDrB2vDN,CqBxvDM,sDACE,0CrB0vDR,CACF,CqBnvDA,kBACE,gCAAA,CACA,qBrBsvDF,CqBnvDE,wBAKE,qDAAA,CADA,uCAAA,CAFA,gBAAA,CACA,kBAAA,CAFA,eAAA,CAKA,uBrBqvDJ,CKzjEI,mCgB8TF,kCAUI,mBrBqvDJ,CqB/vDA,kCAUI,oBrBqvDJ,CACF,CqBjvDE,wBAGE,eAAA,CADA,QAAA,CADA,SAAA,CAIA,wBAAA,CAAA,gBrBkvDJ,CqB9uDE,wBACE,yDrBgvDJ,CqB7uDI,oCACE,erB+uDN,CqB1uDE,wBACE,aAAA,CACA,YAAA,CAEA,uBAAA,CADA,gCrB6uDJ,CqBzuDI,4DACE,uDrB2uDN,CqBvuDI,gDACE,mBrByuDN,CqBpuDE,gCAKE,cAAA,CADA,aAAA,CAEA,YAAA,CALA,eAAA,CAMA,uBAAA,CALA,KAAA,CACA,SrB0uDJ,CqBnuDI,wCACE,YrBquDN,CqBhuDI,wDACE,YrBkuDN,CqB9tDI,oCAGE,+BAAA,CADA,gBAAA,CADA,mBAAA,CAGA,2CrBguDN,CK3mEI,mCgBuYA,8CAUI,mBrB8tDN,CqBxuDE,8CAUI,oBrB8tDN,CACF,CqB1tDI,oFAEE,uDAAA,CADA,+BrB6tDN,CqBvtDE,sCACE,2CrBytDJ,CqBptDE,2BAGE,eAAA,CADA,eAAA,CADA,iBrBwtDJ,CK5nEI,mCgBmaF,qCAOI,mBrBstDJ,CqB7tDA,qCAOI,oBrBstDJ,CACF,CqBltDE,kCAEE,MrBwtDJ,CqB1tDE,kCAEE,OrBwtDJ,CqB1tDE,wBAME,uCAAA,CAFA,aAAA,CACA,YAAA,CAJA,iBAAA,CAEA,YrButDJ,CKtnEI,wCgB4ZF,wBAUI,YrBotDJ,CACF,CqBjtDI,8BAKE,6BAAA,CADA,UAAA,CAHA,oBAAA,CAEA,WAAA,CAGA,+CAAA,CAAA,uCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAPA,UrB0tDN,CqBhtDM,wCACE,oBrBktDR,CqB5sDE,8BAGE,uCAAA,CAFA,gBAAA,CACA,erB+sDJ,CqB3sDI,iCAKE,gCAAA,CAHA,eAAA,CACA,eAAA,CACA,eAAA,CAHA,erBitDN,CqB1sDM,sCACE,oBrB4sDR,CqBvsDI,iCAKE,gCAAA,CAHA,gBAAA,CACA,eAAA,CACA,eAAA,CAHA,arB6sDN,CqBtsDM,sCACE,oBrBwsDR,CqBlsDE,yBAKE,gCAAA,CAJA,aAAA,CAEA,gBAAA,CACA,iBAAA,CAFA,arBusDJ,CqBhsDE,uBAGE,wBAAA,CAFA,+BAAA,CACA,yBrBmsDJ,CsBv2EA,WACE,iBAAA,CACA,StB02EF,CsBv2EE,kBAOE,2CAAA,CACA,mBAAA,CACA,8BAAA,CAHA,gCAAA,CAHA,QAAA,CAEA,gBAAA,CADA,YAAA,CAMA,SAAA,CATA,iBAAA,CACA,sBAAA,CAaA,mCAAA,CAJA,oEtB02EJ,CsBn2EI,6EACE,gBAAA,CACA,SAAA,CAKA,+BAAA,CAJA,8EtBs2EN,CsB91EI,wBAWE,+BAAA,CAAA,8CAAA,CAFA,6BAAA,CAAA,8BAAA,CACA,YAAA,CAFA,UAAA,CAHA,QAAA,CAFA,QAAA,CAIA,kBAAA,CADA,iBAAA,CALA,iBAAA,CACA,KAAA,CAEA,OtBu2EN,CsB31EE,iBAOE,mBAAA,CAFA,eAAA,CACA,oBAAA,CAHA,QAAA,CAFA,kBAAA,CAGA,aAAA,CAFA,StBk2EJ,CsBz1EE,iBACE,kBtB21EJ,CsBv1EE,2BAGE,kBAAA,CAAA,oBtB61EJ,CsBh2EE,2BAGE,mBAAA,CAAA,mBtB61EJ,CsBh2EE,iBAIE,cAAA,CAHA,aAAA,CAIA,YAAA,CAIA,uBAAA,CAHA,2CACE,CALF,UtB81EJ,CsBp1EI,8CACE,+BtBs1EN,CsBl1EI,uBACE,qDtBo1EN,CuBx6EA,YAIE,qBAAA,CADA,aAAA,CAGA,gBAAA,CALA,eAAA,CACA,UAAA,CAGA,avB46EF,CuBx6EE,aATF,YAUI,YvB26EF,CACF,CK7vEI,wCkB3KF,+BAeI,avBs6EJ,CuBr7EA,+BAeI,cvBs6EJ,CuBr7EA,qBAUI,2CAAA,CAHA,aAAA,CAEA,WAAA,CALA,cAAA,CACA,KAAA,CASA,uBAAA,CAHA,iEACE,CAJF,aAAA,CAFA,SvB+6EJ,CuBn6EI,mEACE,8BAAA,CACA,6BvBq6EN,CuBl6EM,6EACE,8BvBo6ER,CuB/5EI,6CAEE,QAAA,CAAA,MAAA,CACA,QAAA,CAEA,eAAA,CAJA,iBAAA,CACA,OAAA,CAEA,qBAAA,CAFA,KvBo6EN,CACF,CK5yEI,sCkBtKJ,YAuDI,QvB+5EF,CuB55EE,mBACE,WvB85EJ,CuB15EE,6CACE,UvB45EJ,CACF,CuBx5EE,uBACE,YAAA,CACA,OvB05EJ,CK3zEI,mCkBjGF,uBAMI,QvB05EJ,CuBv5EI,8BACE,WvBy5EN,CuBr5EI,qCACE,avBu5EN,CuBn5EI,+CACE,kBvBq5EN,CACF,CuBh5EE,wBAUE,uBAAA,CANA,kCAAA,CAAA,0BAAA,CAHA,cAAA,CACA,eAAA,CASA,yDAAA,CAFA,oBvB+4EJ,CuB14EI,2CAEE,YAAA,CADA,WvB64EN,CuBx4EI,mEACE,+CvB04EN,CuBv4EM,qHACE,oDvBy4ER,CuBt4EQ,iIACE,0CvBw4EV,CuBz3EE,wCAGE,wBACE,qBvBy3EJ,CuBr3EE,6BACE,kCvBu3EJ,CuBx3EE,6BACE,iCvBu3EJ,CACF,CKn1EI,wCkB5BF,YAME,0BAAA,CADA,QAAA,CAEA,SAAA,CANA,cAAA,CACA,KAAA,CAMA,sDACE,CALF,OAAA,CADA,SvBw3EF,CuB72EE,4CAEE,WAAA,CACA,SAAA,CACA,4CACE,CAJF,UvBk3EJ,CACF,CwB/hFA,iBACE,GACE,QxBiiFF,CwB9hFA,GACE,axBgiFF,CACF,CwB5hFA,gBACE,GACE,SAAA,CACA,0BxB8hFF,CwB3hFA,IACE,SxB6hFF,CwB1hFA,GACE,SAAA,CACA,uBxB4hFF,CACF,CwBphFA,MACE,+eAAA,CACA,ygBAAA,CACA,mmBAAA,CACA,sfxBshFF,CwBhhFA,WAOE,kCAAA,CAAA,0BAAA,CANA,aAAA,CACA,gBAAA,CACA,eAAA,CAEA,uCAAA,CAGA,uBAAA,CAJA,kBxBshFF,CwB/gFE,iBACE,UxBihFJ,CwB7gFE,iBACE,oBAAA,CAEA,aAAA,CACA,qBAAA,CAFA,UxBihFJ,CwB5gFI,+BACE,iBxB+gFN,CwBhhFI,+BACE,kBxB+gFN,CwBhhFI,qBAEE,gBxB8gFN,CwB1gFI,kDACE,iBxB6gFN,CwB9gFI,kDACE,kBxB6gFN,CwB9gFI,kDAEE,iBxB4gFN,CwB9gFI,kDAEE,kBxB4gFN,CwBvgFE,iCAGE,iBxB4gFJ,CwB/gFE,iCAGE,kBxB4gFJ,CwB/gFE,uBACE,oBAAA,CACA,6BAAA,CAEA,eAAA,CACA,sBAAA,CACA,qBxBygFJ,CwBrgFE,kBACE,YAAA,CAMA,gBAAA,CALA,SAAA,CAMA,oBAAA,CAHA,gBAAA,CAIA,WAAA,CAHA,eAAA,CAFA,SAAA,CADA,UxB6gFJ,CwBpgFI,iDACE,4BxBsgFN,CwBjgFE,iBACE,eAAA,CACA,sBxBmgFJ,CwBhgFI,gDACE,2BxBkgFN,CwB9/EI,kCAIE,kBxBsgFN,CwB1gFI,kCAIE,iBxBsgFN,CwB1gFI,wBAOE,6BAAA,CADA,UAAA,CALA,oBAAA,CAEA,YAAA,CAKA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CALA,uBAAA,CAHA,WxBwgFN,CwB5/EI,iCACE,axB8/EN,CwB1/EI,iCACE,gDAAA,CAAA,wCxB4/EN,CwBx/EI,+BACE,8CAAA,CAAA,sCxB0/EN,CwBt/EI,+BACE,8CAAA,CAAA,sCxBw/EN,CwBp/EI,sCACE,qDAAA,CAAA,6CxBs/EN,CyB7oFA,MACE,mSAAA,CACA,oVAAA,CACA,mOAAA,CACA,qZzBgpFF,CyB1oFA,WACE,iBzB6oFF,CyB1oFE,iBAME,kDAAA,CADA,UAAA,CAJA,oBAAA,CAEA,cAAA,CAIA,mCAAA,CAAA,2BAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CANA,0BAAA,CAFA,azBopFJ,CyBxoFE,uBACE,6BzB0oFJ,CyBtoFE,sBACE,wCAAA,CAAA,gCzBwoFJ,CyBpoFE,6BACE,+CAAA,CAAA,uCzBsoFJ,CyBloFE,4BACE,8CAAA,CAAA,sCzBooFJ,C0BhrFA,SASE,2CAAA,CADA,gCAAA,CAJA,aAAA,CAGA,eAAA,CADA,aAAA,CADA,UAAA,CAFA,S1BurFF,C0B9qFE,aAZF,SAaI,Y1BirFF,CACF,CKtgFI,wCqBzLJ,SAkBI,Y1BirFF,CACF,C0B9qFE,iBACE,mB1BgrFJ,C0B5qFE,yBAIE,iB1BmrFJ,C0BvrFE,yBAIE,kB1BmrFJ,C0BvrFE,eAQE,eAAA,CAPA,YAAA,CAMA,eAAA,CAJA,QAAA,CAEA,aAAA,CAHA,SAAA,CAWA,oBAAA,CAPA,kB1BirFJ,C0BvqFI,kCACE,Y1ByqFN,C0BpqFE,eACE,aAAA,CACA,kBAAA,CAAA,mB1BsqFJ,C0BnqFI,sCACE,aAAA,CACA,S1BqqFN,C0B/pFE,eAOE,kCAAA,CAAA,0BAAA,CANA,YAAA,CAEA,eAAA,CADA,gBAAA,CAMA,UAAA,CAJA,uCAAA,CACA,oBAAA,CAIA,8D1BgqFJ,C0B3pFI,0CACE,aAAA,CACA,S1B6pFN,C0BzpFI,6BAEE,kB1B4pFN,C0B9pFI,6BAEE,iB1B4pFN,C0B9pFI,mBAGE,iBAAA,CAFA,Y1B6pFN,C0BtpFM,2CACE,qB1BwpFR,C0BzpFM,2CACE,qB1B2pFR,C0B5pFM,2CACE,qB1B8pFR,C0B/pFM,2CACE,qB1BiqFR,C0BlqFM,2CACE,oB1BoqFR,C0BrqFM,2CACE,qB1BuqFR,C0BxqFM,2CACE,qB1B0qFR,C0B3qFM,2CACE,qB1B6qFR,C0B9qFM,4CACE,qB1BgrFR,C0BjrFM,4CACE,oB1BmrFR,C0BprFM,4CACE,qB1BsrFR,C0BvrFM,4CACE,qB1ByrFR,C0B1rFM,4CACE,qB1B4rFR,C0B7rFM,4CACE,qB1B+rFR,C0BhsFM,4CACE,oB1BksFR,C0B5rFI,gCACE,SAAA,CAIA,yBAAA,CAHA,wC1B+rFN,C2BlyFA,MACE,wS3BqyFF,C2B5xFE,qBAEE,mBAAA,CADA,kB3BgyFJ,C2B3xFE,8BAGE,iB3BoyFJ,C2BvyFE,8BAGE,gB3BoyFJ,C2BvyFE,oBASE,+CAAA,CACA,oBAAA,CATA,oBAAA,CAIA,gBAAA,CACA,eAAA,CAEA,qBAAA,CADA,eAAA,CAHA,kBAAA,CAFA,uB3BqyFJ,C2B1xFI,0BAGE,uCAAA,CAFA,aAAA,CACA,YAAA,CAEA,6C3B4xFN,C2BvxFM,gEAEE,0CAAA,CADA,+B3B0xFR,C2BpxFI,yBACE,uB3BsxFN,C2B9wFI,gCAOE,oDAAA,CADA,UAAA,CALA,oBAAA,CAEA,YAAA,CACA,iBAAA,CAKA,qCAAA,CAAA,6BAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAJA,iCAAA,CAHA,0BAAA,CAHA,W3B0xFN,C2B5wFI,wFACE,0C3B8wFN,C4Br1FA,iBACE,GACE,oB5Bw1FF,C4Br1FA,IACE,kB5Bu1FF,C4Bp1FA,GACE,oB5Bs1FF,CACF,C4B90FA,MACE,0NAAA,CACA,uPAAA,CACA,wB5Bg1FF,C4B10FA,YA6BE,kCAAA,CAAA,0BAAA,CAVA,2CAAA,CACA,mBAAA,CACA,8BAAA,CAHA,gCAAA,CADA,sCAAA,CAdA,+IACE,CAYF,8BAAA,CAMA,SAAA,CArBA,iBAAA,CACA,uBAAA,CAyBA,4BAAA,CAJA,uDACE,CATF,6BAAA,CADA,S5B80FF,C4B5zFE,oBAEE,SAAA,CAKA,uBAAA,CAJA,2EACE,CAHF,S5Bi0FJ,C4BvzFE,8CACE,sC5ByzFJ,C4BrzFE,mBAEE,gBAAA,CADA,a5BwzFJ,C4BpzFI,2CACE,Y5BszFN,C4BlzFI,0CACE,e5BozFN,C4B5yFA,eACE,eAAA,CAGA,YAAA,CADA,0BAAA,CADA,kB5BizFF,C4B5yFE,yBACE,a5B8yFJ,C4B1yFE,oBACE,sCAAA,CACA,iB5B4yFJ,C4BxyFE,6BACE,oBAAA,CAGA,gB5BwyFJ,C4BpyFE,sBAoBE,mBAAA,CAdA,cAAA,CAHA,oBAAA,CACA,gBAAA,CAAA,iBAAA,CAIA,YAAA,CAWA,eAAA,CAlBA,iBAAA,CAMA,wBAAA,CAAA,gBAAA,CAFA,uBAAA,CAHA,S5B8yFJ,C4BpyFI,qCACE,uB5BsyFN,C4B5xFI,cAvBF,sBAwBI,W5B+xFJ,C4B5xFI,wCACE,2B5B8xFN,C4B1xFI,6BAOE,qCAAA,CACA,+CAAA,CAAA,uC5B+xFN,C4BrxFI,yDAZE,UAAA,CADA,YAAA,CAIA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAVA,iBAAA,CACA,SAAA,CAEA,WAAA,CADA,U5BmzFN,C4BpyFI,4BAOE,oDAAA,CAMA,4CAAA,CAAA,oCAAA,CADA,uBAAA,CAJA,+C5B4xFN,C4BjxFM,gDACE,uB5BmxFR,C4B/wFM,mFACE,0C5BixFR,CACF,C4B5wFI,0CAGE,2BAAA,CADA,uBAAA,CADA,S5BgxFN,C4B1wFI,8CACE,oB5B4wFN,C4BzwFM,aAJF,8CASI,8CAAA,CACA,iBAAA,CAHA,gCAAA,CADA,eAAA,CADA,cAAA,CAGA,kB5B8wFN,C4BzwFM,oDACE,mC5B2wFR,CACF,C4B/vFE,gCAEE,iBAAA,CADA,e5BmwFJ,C4B/vFI,mCACE,iB5BiwFN,C4B9vFM,oDAGE,a5B4wFR,C4B/wFM,oDAGE,c5B4wFR,C4B/wFM,0CAcE,8CAAA,CACA,iBAAA,CALA,gCAAA,CAEA,oBAAA,CACA,qBAAA,CANA,iBAAA,CACA,eAAA,CAHA,UAAA,CAIA,gBAAA,CALA,aAAA,CAEA,cAAA,CALA,iBAAA,CAUA,iBAAA,CATA,S5B6wFR,C6BpgGA,kBAME,e7BghGF,C6BthGA,kBAME,gB7BghGF,C6BthGA,QAUE,2CAAA,CACA,oBAAA,CAEA,8BAAA,CALA,uCAAA,CACA,cAAA,CALA,aAAA,CAGA,eAAA,CAKA,YAAA,CAPA,mBAAA,CAJA,cAAA,CACA,UAAA,CAiBA,yBAAA,CALA,mGACE,CAZF,S7BmhGF,C6BhgGE,aAtBF,QAuBI,Y7BmgGF,CACF,C6BhgGE,kBACE,wB7BkgGJ,C6B9/FE,gBAEE,SAAA,CADA,mBAAA,CAGA,+BAAA,CADA,uB7BigGJ,C6B7/FI,0BACE,8B7B+/FN,C6B1/FE,4BAEE,0CAAA,CADA,+B7B6/FJ,C6Bx/FE,YACE,oBAAA,CACA,oB7B0/FJ,C8B/iGA,oBACE,GACE,mB9BkjGF,CACF,C8B1iGA,MACE,wf9B4iGF,C8BtiGA,YACE,aAAA,CAEA,eAAA,CADA,a9B0iGF,C8BtiGE,+BAOE,kBAAA,CAAA,kB9BuiGJ,C8B9iGE,+BAOE,iBAAA,CAAA,mB9BuiGJ,C8B9iGE,qBAQE,aAAA,CACA,cAAA,CACA,YAAA,CATA,iBAAA,CAKA,U9BwiGJ,C8BjiGI,qCAIE,iB9ByiGN,C8B7iGI,qCAIE,kB9ByiGN,C8B7iGI,2BAME,6BAAA,CADA,UAAA,CAJA,oBAAA,CAEA,YAAA,CAIA,yCAAA,CAAA,iCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CARA,W9B2iGN,C8B9hGE,kBAUE,2CAAA,CACA,mBAAA,CACA,8BAAA,CAJA,gCAAA,CACA,oBAAA,CAHA,kBAAA,CAFA,YAAA,CASA,SAAA,CANA,aAAA,CAFA,SAAA,CAJA,iBAAA,CAgBA,4BAAA,CAfA,UAAA,CAYA,+CACE,CAZF,S9B4iGJ,C8B3hGI,+EACE,gBAAA,CACA,SAAA,CACA,sC9B6hGN,C8BvhGI,qCAEE,oCACE,gC9BwhGN,C8BphGI,2CACE,c9BshGN,CACF,C8BjhGE,kBACE,kB9BmhGJ,C8B/gGE,4BAGE,kBAAA,CAAA,oB9BshGJ,C8BzhGE,4BAGE,mBAAA,CAAA,mB9BshGJ,C8BzhGE,kBAKE,cAAA,CAJA,aAAA,CAKA,YAAA,CAIA,uBAAA,CAHA,2CACE,CAJF,kBAAA,CAFA,U9BuhGJ,C8B5gGI,gDACE,+B9B8gGN,C8B1gGI,wBACE,qD9B4gGN,C+B5mGA,MAEI,uWAAA,CAAA,8WAAA,CAAA,sPAAA,CAAA,8xBAAA,CAAA,0MAAA,CAAA,gbAAA,CAAA,gMAAA,CAAA,iQAAA,CAAA,0VAAA,CAAA,6aAAA,CAAA,8SAAA,CAAA,gM/BqoGJ,C+BznGE,4CAME,8CAAA,CACA,2BAAA,CACA,mBAAA,CACA,8BAAA,CAJA,mCAAA,CAJA,iBAAA,CAGA,gBAAA,CADA,iBAAA,CADA,eAAA,CASA,uBAAA,CADA,2B/B6nGJ,C+BznGI,aAdF,4CAeI,e/B4nGJ,CACF,C+BznGI,sEACE,gC/B2nGN,C+BtnGI,gDACE,qB/BwnGN,C+BpnGI,gIAEE,iBAAA,CADA,c/BunGN,C+BlnGI,4FACE,iB/BonGN,C+BhnGI,kFACE,e/BknGN,C+B9mGI,0FACE,Y/BgnGN,C+B5mGI,8EACE,mB/B8mGN,C+BzmGE,sEAGE,iBAAA,CAAA,mB/BmnGJ,C+BtnGE,sEAGE,kBAAA,CAAA,kB/BmnGJ,C+BtnGE,sEASE,uB/B6mGJ,C+BtnGE,sEASE,wB/B6mGJ,C+BtnGE,sEAUE,4B/B4mGJ,C+BtnGE,4IAWE,6B/B2mGJ,C+BtnGE,sEAWE,4B/B2mGJ,C+BtnGE,kDAOE,0BAAA,CACA,WAAA,CAFA,eAAA,CADA,eAAA,CAHA,oBAAA,CAAA,iBAAA,CADA,iB/BqnGJ,C+BxmGI,kFACE,e/B0mGN,C+BtmGI,oFAOE,U/B4mGN,C+BnnGI,oFAOE,W/B4mGN,C+BnnGI,gEAME,wBdkIU,CcnIV,UAAA,CADA,WAAA,CAIA,kDAAA,CAAA,0CAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAVA,iBAAA,CACA,UAAA,CACA,U/BgnGN,C+BpmGI,4DACE,4D/BsmGN,C+BxlGE,sDACE,oB/B2lGJ,C+BxlGI,gFACE,gC/B0lGN,C+BrlGE,8DACE,0B/BwlGJ,C+BrlGI,4EACE,wBAlBG,CAmBH,kDAAA,CAAA,0C/BulGN,C+BnlGI,0EACE,a/BqlGN,C+B1mGE,8DACE,oB/B6mGJ,C+B1mGI,wFACE,gC/B4mGN,C+BvmGE,sEACE,0B/B0mGJ,C+BvmGI,oFACE,wBAlBG,CAmBH,sDAAA,CAAA,8C/BymGN,C+BrmGI,kFACE,a/BumGN,C+B5nGE,sDACE,oB/B+nGJ,C+B5nGI,gFACE,gC/B8nGN,C+BznGE,8DACE,0B/B4nGJ,C+BznGI,4EACE,wBAlBG,CAmBH,kDAAA,CAAA,0C/B2nGN,C+BvnGI,0EACE,a/BynGN,C+B9oGE,oDACE,oB/BipGJ,C+B9oGI,8EACE,gC/BgpGN,C+B3oGE,4DACE,0B/B8oGJ,C+B3oGI,0EACE,wBAlBG,CAmBH,iDAAA,CAAA,yC/B6oGN,C+BzoGI,wEACE,a/B2oGN,C+BhqGE,4DACE,oB/BmqGJ,C+BhqGI,sFACE,gC/BkqGN,C+B7pGE,oEACE,0B/BgqGJ,C+B7pGI,kFACE,wBAlBG,CAmBH,qDAAA,CAAA,6C/B+pGN,C+B3pGI,gFACE,a/B6pGN,C+BlrGE,8DACE,oB/BqrGJ,C+BlrGI,wFACE,gC/BorGN,C+B/qGE,sEACE,0B/BkrGJ,C+B/qGI,oFACE,wBAlBG,CAmBH,sDAAA,CAAA,8C/BirGN,C+B7qGI,kFACE,a/B+qGN,C+BpsGE,4DACE,oB/BusGJ,C+BpsGI,sFACE,gC/BssGN,C+BjsGE,oEACE,0B/BosGJ,C+BjsGI,kFACE,wBAlBG,CAmBH,qDAAA,CAAA,6C/BmsGN,C+B/rGI,gFACE,a/BisGN,C+BttGE,4DACE,oB/BytGJ,C+BttGI,sFACE,gC/BwtGN,C+BntGE,oEACE,0B/BstGJ,C+BntGI,kFACE,wBAlBG,CAmBH,qDAAA,CAAA,6C/BqtGN,C+BjtGI,gFACE,a/BmtGN,C+BxuGE,0DACE,oB/B2uGJ,C+BxuGI,oFACE,gC/B0uGN,C+BruGE,kEACE,0B/BwuGJ,C+BruGI,gFACE,wBAlBG,CAmBH,oDAAA,CAAA,4C/BuuGN,C+BnuGI,8EACE,a/BquGN,C+B1vGE,oDACE,oB/B6vGJ,C+B1vGI,8EACE,gC/B4vGN,C+BvvGE,4DACE,0B/B0vGJ,C+BvvGI,0EACE,wBAlBG,CAmBH,iDAAA,CAAA,yC/ByvGN,C+BrvGI,wEACE,a/BuvGN,C+B5wGE,4DACE,oB/B+wGJ,C+B5wGI,sFACE,gC/B8wGN,C+BzwGE,oEACE,0B/B4wGJ,C+BzwGI,kFACE,wBAlBG,CAmBH,qDAAA,CAAA,6C/B2wGN,C+BvwGI,gFACE,a/BywGN,C+B9xGE,wDACE,oB/BiyGJ,C+B9xGI,kFACE,gC/BgyGN,C+B3xGE,gEACE,0B/B8xGJ,C+B3xGI,8EACE,wBAlBG,CAmBH,mDAAA,CAAA,2C/B6xGN,C+BzxGI,4EACE,a/B2xGN,CgC/7GA,MACE,wMhCk8GF,CgCz7GE,sBAEE,uCAAA,CADA,gBhC67GJ,CgCz7GI,mCACE,ahC27GN,CgC57GI,mCACE,chC27GN,CgCv7GM,4BACE,sBhCy7GR,CgCt7GQ,mCACE,gChCw7GV,CgCp7GQ,2DACE,SAAA,CAEA,uBAAA,CADA,ehCu7GV,CgCl7GQ,yGACE,SAAA,CACA,uBhCo7GV,CgCh7GQ,yCACE,YhCk7GV,CgC36GE,0BACE,eAAA,CACA,ehC66GJ,CgC16GI,+BACE,oBhC46GN,CgCv6GE,gDACE,YhCy6GJ,CgCr6GE,8BAIE,+BAAA,CAHA,oBAAA,CAEA,WAAA,CAGA,SAAA,CAKA,4BAAA,CAJA,4DACE,CAHF,0BhCy6GJ,CgCh6GI,aAdF,8BAeI,+BAAA,CACA,SAAA,CACA,uBhCm6GJ,CACF,CgCh6GI,wCACE,6BhCk6GN,CgC95GI,oCACE,+BhCg6GN,CgC55GI,qCAKE,6BAAA,CADA,UAAA,CAHA,oBAAA,CAEA,YAAA,CAGA,2CAAA,CAAA,mCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAPA,WhCq6GN,CgCx5GQ,mDACE,oBhC05GV,CiCxgHE,kCAEE,iBjC8gHJ,CiChhHE,kCAEE,kBjC8gHJ,CiChhHE,wBAGE,yCAAA,CAFA,oBAAA,CAGA,SAAA,CACA,mCjC2gHJ,CiCtgHI,aAVF,wBAWI,YjCygHJ,CACF,CiCrgHE,6FAEE,SAAA,CACA,mCjCugHJ,CiCjgHE,4FAEE,+BjCmgHJ,CiC//GE,oBACE,yBAAA,CACA,uBAAA,CAGA,yEjC+/GJ,CKh4GI,sC4BrHE,qDACE,uBjCw/GN,CACF,CiCn/GE,kEACE,yBjCq/GJ,CiCj/GE,sBACE,0BjCm/GJ,CkC9iHE,2BACE,alCijHJ,CK53GI,wC6BtLF,2BAKI,elCijHJ,CACF,CkC9iHI,6BAGE,0BAAA,CAAA,2BAAA,CADA,eAAA,CAEA,iBAAA,CAHA,yBAAA,CAAA,iBlCmjHN,CkC7iHM,2CACE,kBlC+iHR,CmChkHE,uBACE,4CnCokHJ,CmC/jHE,8CAJE,kCAAA,CAAA,0BnCukHJ,CmCnkHE,uBACE,4CnCkkHJ,CmC7jHE,4BAEE,kCAAA,CAAA,0BAAA,CADA,qCnCgkHJ,CmC5jHI,mCACE,anC8jHN,CmC1jHI,kCACE,anC4jHN,CmCvjHE,0BAKE,eAAA,CAJA,aAAA,CAEA,YAAA,CACA,aAAA,CAFA,kBAAA,CAAA,mBnC4jHJ,CmCtjHI,uCACE,enCwjHN,CmCpjHI,sCACE,kBnCsjHN,CoCnmHA,MACE,8LpCsmHF,CoC7lHE,oBAGE,iBAAA,CAEA,gBAAA,CADA,apC+lHJ,CoC3lHI,wCACE,uBpC6lHN,CoCzlHI,gCAEE,eAAA,CADA,gBpC4lHN,CoCrlHM,wCACE,mBpCulHR,CoCjlHE,8BAKE,oBpColHJ,CoCzlHE,8BAKE,mBpColHJ,CoCzlHE,8BAOE,4BpCklHJ,CoCzlHE,4DAQE,6BpCilHJ,CoCzlHE,8BAQE,4BpCilHJ,CoCzlHE,oBAME,cAAA,CAHA,aAAA,CACA,epCqlHJ,CoC9kHI,kCACE,uCAAA,CACA,oBpCglHN,CoC5kHI,wCAEE,uCAAA,CADA,YpC+kHN,CoC1kHI,oCASE,WpCglHN,CoCzlHI,oCASE,UpCglHN,CoCzlHI,0BAME,6BAAA,CADA,UAAA,CADA,WAAA,CAMA,yCAAA,CAAA,iCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAZA,iBAAA,CACA,UAAA,CAMA,sBAAA,CADA,yBAAA,CAJA,UpCslHN,CoCzkHM,oCACE,wBpC2kHR,CoCtkHI,4BACE,YpCwkHN,CoCnkHI,4CACE,YpCqkHN,CqC5pHE,+DACE,mBAAA,CACA,cAAA,CACA,uBrC+pHJ,CqC5pHI,2EAGE,iBAAA,CADA,eAAA,CADA,arCgqHN,CsCtqHE,6BACE,sCtCyqHJ,CsCtqHE,cACE,yCtCwqHJ,CsC5pHE,sIACE,oCtC8pHJ,CsCtpHE,2EACE,qCtCwpHJ,CsC9oHE,wGACE,oCtCgpHJ,CsCvoHE,yFACE,qCtCyoHJ,CsCpoHE,6BACE,kCtCsoHJ,CsChoHE,6CACE,sCtCkoHJ,CsC3nHE,4DACE,sCtC6nHJ,CsCtnHE,4DACE,qCtCwnHJ,CsC/mHE,yFACE,qCtCinHJ,CsCzmHE,2EACE,sCtC2mHJ,CsChmHE,wHACE,qCtCkmHJ,CsC7lHE,8BAGE,mBAAA,CADA,gBAAA,CADA,gBtCimHJ,CsC5lHE,eACE,4CtC8lHJ,CsC3lHE,eACE,4CtC6lHJ,CsCzlHE,gBAIE,+CAAA,CACA,kDAAA,CAJA,aAAA,CAEA,wBAAA,CADA,wBtC8lHJ,CsCvlHE,yBAOE,wCAAA,CACA,+DAAA,CACA,4BAAA,CACA,6BAAA,CARA,iBAAA,CAGA,eAAA,CACA,eAAA,CAFA,cAAA,CADA,oCAAA,CAFA,iBtCkmHJ,CsCtlHI,6BACE,YtCwlHN,CsCrlHM,kCACE,wBAAA,CACA,yBtCulHR,CsCjlHE,iCAaE,wCAAA,CACA,+DAAA,CAJA,uCAAA,CACA,0BAAA,CALA,UAAA,CAJA,oBAAA,CAOA,2BAAA,CADA,2BAAA,CADA,2BAAA,CANA,eAAA,CAWA,wBAAA,CAAA,gBAAA,CAPA,StC0lHJ,CsCxkHE,sBACE,iBAAA,CACA,iBtC0kHJ,CsClkHI,sCACE,gBtCokHN,CsChkHI,gDACE,YtCkkHN,CsCxjHA,gBACE,iBtC2jHF,CsCvjHE,yCACE,aAAA,CACA,StCyjHJ,CsCpjHE,mBACE,YtCsjHJ,CsCjjHE,oBACE,QtCmjHJ,CsC/iHE,4BACE,WAAA,CACA,SAAA,CACA,etCijHJ,CsC9iHI,0CACE,YtCgjHN,CsC1iHE,yBAKE,wCAAA,CAEA,+BAAA,CADA,4BAAA,CAHA,eAAA,CADA,oDAAA,CAEA,wBAAA,CAAA,gBtC+iHJ,CsCxiHE,2BAEE,+DAAA,CADA,2BtC2iHJ,CsCviHI,+BACE,uCAAA,CACA,gBtCyiHN,CsCpiHE,sBACE,MAAA,CACA,WtCsiHJ,CsCjiHA,aACE,atCoiHF,CsC1hHE,4BAEE,aAAA,CADA,YtC8hHJ,CsC1hHI,wDAEE,2BAAA,CADA,wBtC6hHN,CsCvhHE,+BAKE,2CAAA,CAEA,+BAAA,CADA,gCAAA,CADA,sBAAA,CAHA,mBAAA,CACA,gBAAA,CAFA,atC+hHJ,CsCthHI,qCAEE,UAAA,CACA,UAAA,CAFA,atC0hHN,CK5pHI,wCiCiJF,8BACE,iBtC+gHF,CsCrgHE,wSAGE,etC2gHJ,CsCvgHE,sCAEE,mBAAA,CACA,eAAA,CADA,oBAAA,CADA,kBAAA,CAAA,mBtC2gHJ,CACF,CuCn2HI,yDAIE,+BAAA,CACA,8BAAA,CAFA,aAAA,CADA,QAAA,CADA,iBvCy2HN,CuCj2HI,uBAEE,uCAAA,CADA,cvCo2HN,CuC/yHM,iHAEE,WAlDkB,CAiDlB,kBvC0zHR,CuC3zHM,6HAEE,WAlDkB,CAiDlB,kBvCs0HR,CuCv0HM,6HAEE,WAlDkB,CAiDlB,kBvCk1HR,CuCn1HM,oHAEE,WAlDkB,CAiDlB,kBvC81HR,CuC/1HM,0HAEE,WAlDkB,CAiDlB,kBvC02HR,CuC32HM,uHAEE,WAlDkB,CAiDlB,kBvCs3HR,CuCv3HM,uHAEE,WAlDkB,CAiDlB,kBvCk4HR,CuCn4HM,6HAEE,WAlDkB,CAiDlB,kBvC84HR,CuC/4HM,yCAEE,WAlDkB,CAiDlB,kBvCk5HR,CuCn5HM,yCAEE,WAlDkB,CAiDlB,kBvCs5HR,CuCv5HM,0CAEE,WAlDkB,CAiDlB,kBvC05HR,CuC35HM,uCAEE,WAlDkB,CAiDlB,kBvC85HR,CuC/5HM,wCAEE,WAlDkB,CAiDlB,kBvCk6HR,CuCn6HM,sCAEE,WAlDkB,CAiDlB,kBvCs6HR,CuCv6HM,wCAEE,WAlDkB,CAiDlB,kBvC06HR,CuC36HM,oCAEE,WAlDkB,CAiDlB,kBvC86HR,CuC/6HM,2CAEE,WAlDkB,CAiDlB,kBvCk7HR,CuCn7HM,qCAEE,WAlDkB,CAiDlB,kBvCs7HR,CuCv7HM,oCAEE,WAlDkB,CAiDlB,kBvC07HR,CuC37HM,kCAEE,WAlDkB,CAiDlB,kBvC87HR,CuC/7HM,qCAEE,WAlDkB,CAiDlB,kBvCk8HR,CuCn8HM,mCAEE,WAlDkB,CAiDlB,kBvCs8HR,CuCv8HM,qCAEE,WAlDkB,CAiDlB,kBvC08HR,CuC38HM,wCAEE,WAlDkB,CAiDlB,kBvC88HR,CuC/8HM,sCAEE,WAlDkB,CAiDlB,kBvCk9HR,CuCn9HM,2CAEE,WAlDkB,CAiDlB,kBvCs9HR,CuC38HM,iCAEE,WAPkB,CAMlB,iBvC88HR,CuC/8HM,uCAEE,WAPkB,CAMlB,iBvCk9HR,CuCn9HM,mCAEE,WAPkB,CAMlB,iBvCs9HR,CwCxiIA,MACE,qMAAA,CACA,mMxC2iIF,CwCliIE,wBAKE,mBAAA,CAHA,YAAA,CACA,qBAAA,CACA,YAAA,CAHA,iBxCyiIJ,CwC/hII,8BAGE,QAAA,CACA,SAAA,CAHA,iBAAA,CACA,OxCmiIN,CwC9hIM,qCACE,0BxCgiIR,CwCjgIE,2BAKE,uBAAA,CADA,+DAAA,CAHA,YAAA,CACA,cAAA,CACA,aAAA,CAGA,oBxCmgIJ,CwChgII,aATF,2BAUI,gBxCmgIJ,CACF,CwChgII,cAGE,+BACE,iBxCggIN,CwC7/HM,sCAQE,oCAAA,CANA,QAAA,CAKA,UAAA,CAHA,aAAA,CAEA,UAAA,CAHA,MAAA,CAFA,iBAAA,CAYA,2CAAA,CAJA,qCACE,CAEF,kDAAA,CAPA,+BxCqgIR,CACF,CwCx/HI,8CACE,YxC0/HN,CwCt/HI,iCASE,+BAAA,CACA,6BAAA,CAJA,uCAAA,CAEA,cAAA,CAPA,aAAA,CAGA,gBAAA,CACA,eAAA,CAFA,8BAAA,CAWA,+BAAA,CAHA,2CACE,CALF,kBAAA,CALA,UxCkgIN,CwCn/HM,aAII,6CACE,OxCk/HV,CwCn/HQ,8CACE,OxCq/HV,CwCt/HQ,8CACE,OxCw/HV,CwCz/HQ,8CACE,OxC2/HV,CwC5/HQ,8CACE,OxC8/HV,CwC//HQ,8CACE,OxCigIV,CwClgIQ,8CACE,OxCogIV,CwCrgIQ,8CACE,OxCugIV,CwCxgIQ,8CACE,OxC0gIV,CwC3gIQ,+CACE,QxC6gIV,CwC9gIQ,+CACE,QxCghIV,CwCjhIQ,+CACE,QxCmhIV,CwCphIQ,+CACE,QxCshIV,CwCvhIQ,+CACE,QxCyhIV,CwC1hIQ,+CACE,QxC4hIV,CwC7hIQ,+CACE,QxC+hIV,CwChiIQ,+CACE,QxCkiIV,CwCniIQ,+CACE,QxCqiIV,CwCtiIQ,+CACE,QxCwiIV,CwCziIQ,+CACE,QxC2iIV,CACF,CwCtiIM,uCACE,+BxCwiIR,CwCliIE,4BACE,UxCoiIJ,CwCjiII,aAJF,4BAKI,gBxCoiIJ,CACF,CwChiIE,0BACE,YxCkiIJ,CwC/hII,aAJF,0BAKI,axCkiIJ,CwC9hIM,sCACE,OxCgiIR,CwCjiIM,uCACE,OxCmiIR,CwCpiIM,uCACE,OxCsiIR,CwCviIM,uCACE,OxCyiIR,CwC1iIM,uCACE,OxC4iIR,CwC7iIM,uCACE,OxC+iIR,CwChjIM,uCACE,OxCkjIR,CwCnjIM,uCACE,OxCqjIR,CwCtjIM,uCACE,OxCwjIR,CwCzjIM,wCACE,QxC2jIR,CwC5jIM,wCACE,QxC8jIR,CwC/jIM,wCACE,QxCikIR,CwClkIM,wCACE,QxCokIR,CwCrkIM,wCACE,QxCukIR,CwCxkIM,wCACE,QxC0kIR,CwC3kIM,wCACE,QxC6kIR,CwC9kIM,wCACE,QxCglIR,CwCjlIM,wCACE,QxCmlIR,CwCplIM,wCACE,QxCslIR,CwCvlIM,wCACE,QxCylIR,CACF,CwCnlII,+FAEE,QxCqlIN,CwCllIM,yGACE,wBAAA,CACA,yBxCqlIR,CwC5kIM,2DAEE,wBAAA,CACA,yBAAA,CAFA,QxCglIR,CwCzkIM,iEACE,QxC2kIR,CwCxkIQ,qLAGE,wBAAA,CACA,yBAAA,CAFA,QxC4kIV,CwCtkIQ,6FACE,wBAAA,CACA,yBxCwkIV,CwCnkIM,yDACE,kBxCqkIR,CwChkII,sCACE,QxCkkIN,CwC7jIE,2BAEE,iBAAA,CAOA,kBAAA,CAHA,uCAAA,CAEA,cAAA,CAPA,aAAA,CAGA,YAAA,CACA,gBAAA,CAEA,mBAAA,CAGA,gCAAA,CAPA,WxCskIJ,CwC5jII,iCAEE,uDAAA,CADA,+BxC+jIN,CwC1jII,iCAKE,6BAAA,CADA,UAAA,CAHA,aAAA,CAEA,WAAA,CAMA,8CAAA,CAAA,sCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CANA,+CACE,CALF,UxCokIN,CwCrjIE,4BAOE,yEACE,CANF,YAAA,CAGA,aAAA,CAFA,qBAAA,CAGA,mBAAA,CALA,iBAAA,CAYA,wBAAA,CATA,YxC2jIJ,CwC/iII,sCACE,wBxCijIN,CwC7iII,oCACE,SxC+iIN,CwC3iII,kCAGE,wEACE,CAFF,mBAAA,CADA,OxC+iIN,CwCriIM,uDACE,8CAAA,CAAA,sCxCuiIR,CKvpII,wCmC8HF,wDAEE,kBxC+hIF,CwCjiIA,wDAEE,mBxC+hIF,CwCjiIA,8CAGE,eAAA,CAFA,eAAA,CAGA,iCxC6hIF,CwCzhIE,8DACE,mBxC4hIJ,CwC7hIE,8DACE,kBxC4hIJ,CwC7hIE,oDAEE,UxC2hIJ,CwCvhIE,8EAEE,kBxC0hIJ,CwC5hIE,8EAEE,mBxC0hIJ,CwC5hIE,8EAGE,kBxCyhIJ,CwC5hIE,8EAGE,mBxCyhIJ,CwC5hIE,oEACE,UxC2hIJ,CwCrhIE,8EAEE,mBxCwhIJ,CwC1hIE,8EAEE,kBxCwhIJ,CwC1hIE,8EAGE,mBxCuhIJ,CwC1hIE,8EAGE,kBxCuhIJ,CwC1hIE,oEACE,UxCyhIJ,CACF,CwC3gIE,cAHF,olDAII,+BxC8gIF,CwC3gIE,g8GACE,sCxC6gIJ,CACF,CwCxgIA,4sDACE,uDxC2gIF,CwCvgIA,wmDACE,axC0gIF,CyCv3IA,MACE,8WAAA,CAEA,uXzC23IF,CyCj3IE,4BAEE,oBAAA,CADA,iBzCq3IJ,CyCh3II,sDAGE,SzCk3IN,CyCr3II,sDAGE,UzCk3IN,CyCr3II,4CACE,iBAAA,CACA,SzCm3IN,CyC72IE,+CAEE,SAAA,CADA,UzCg3IJ,CyC32IE,kDAOE,WzCi3IJ,CyCx3IE,kDAOE,YzCi3IJ,CyCx3IE,wCAME,qDAAA,CADA,UAAA,CADA,aAAA,CAIA,0CAAA,CAAA,kCAAA,CACA,4BAAA,CAAA,oBAAA,CACA,6BAAA,CAAA,qBAAA,CACA,yBAAA,CAAA,iBAAA,CAVA,iBAAA,CACA,SAAA,CACA,YzCq3IJ,CyCz2IE,gEACE,wBxByWa,CwBxWb,mDAAA,CAAA,2CzC22IJ,C0C75IA,QACE,8DAAA,CAGA,+CAAA,CACA,iEAAA,CACA,oDAAA,CACA,sDAAA,CACA,mDAAA,CAGA,qEAAA,CACA,qEAAA,CACA,wEAAA,CACA,0EAAA,CACA,wEAAA,CACA,yEAAA,CACA,kEAAA,CACA,+DAAA,CACA,oEAAA,CACA,oEAAA,CACA,mEAAA,CACA,gEAAA,CACA,uEAAA,CACA,mEAAA,CACA,qEAAA,CACA,oEAAA,CACA,gEAAA,CACA,wEAAA,CACA,qEAAA,CACA,+D1C45IF,C0Ct5IA,SAEE,kBAAA,CADA,Y1C05IF,CKzxII,mCsChKA,8BACE,U3Ci8IJ,C2Cl8IE,8BACE,W3Ci8IJ,C2Cl8IE,8BAGE,kB3C+7IJ,C2Cl8IE,8BAGE,iB3C+7IJ,C2Cl8IE,oBAKE,mBAAA,CADA,YAAA,CAFA,a3Cg8IJ,C2C17II,kCACE,W3C67IN,C2C97II,kCACE,U3C67IN,C2C97II,kCAEE,iBAAA,CAAA,c3C47IN,C2C97II,kCAEE,aAAA,CAAA,kB3C47IN,CACF","file":"main.css"} \ No newline at end of file diff --git a/devel/css/extra.css b/devel/css/extra.css index 4716fee11..6f3a15839 100644 --- a/devel/css/extra.css +++ b/devel/css/extra.css @@ -49,7 +49,12 @@ a.autorefs-external:hover::after { } .md-typeset h2 { - font-size: 1.7em; + font-size: 1.3em; + font-weight: 300; +} + +.md-typeset h3 { + font-size: 1.1em; font-weight: 300; } @@ -77,12 +82,6 @@ a.autorefs-external:hover::after { user-select: none; } -/* Nicer style of headers in generated API */ -h2 code { - font-size: large!important; - background-color: inherit!important; -} - /* Remove cell input and output prompt */ .jp-InputArea-prompt, .jp-OutputArea-prompt { display: none !important; diff --git a/devel/examples/data_oob/index.html b/devel/examples/data_oob/index.html index be680cd8a..bb3edbc33 100644 --- a/devel/examples/data_oob/index.html +++ b/devel/examples/data_oob/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -696,8 +696,15 @@
  • - - Variance + + Computing the OOB values + + +
  • + +
  • + + Point removal experiments
  • @@ -2494,8 +2501,15 @@
  • - - Variance + + Computing the OOB values + + +
  • + +
  • + + Point removal experiments
  • @@ -2586,32 +2600,6 @@

    Setup& If you are reading this in the documentation, some boilerplate (including most plotting code) has been omitted for convenience. - - - - -
    -
    -
    -
    -
    -
    -   age  fnlwgt  education-num  capital-gain  capital-loss  hours-per-week  \
    -0   39   77516             13          2174             0              40   
    -1   50   83311             13             0             0              13   
    -2   38  215646              9             0             0              40   
    -3   53  234721              7             0             0              40   
    -4   28  338409             13             0             0              40   
    -
    -  income  
    -0  <=50K  
    -1  <=50K  
    -2  <=50K  
    -3  <=50K  
    -4  <=50K  
    -
    -
    -
    @@ -2619,38 +2607,156 @@

    Setup&
    -
    oob_values = compute_data_oob(utility, n_est=1000, max_samples=0.95)
    +
    from pydvl.utils import Dataset, Scorer, Seed, Utility, ensure_seed_sequence
    +from pydvl.value import ValuationResult, compute_data_oob
    +
    +RANDOM_SEED = 42
     
    +
    +
    +
    +
    +
    +

    We will work with the adult classification dataset from the UCI repository. The objective is to predict whether a person earns more than 50k a year based on a set of features such as age, education, occupation, etc.

    +

    With a helper function we download the data and obtain the following pandas dataframe, where the categorical features have been removed:

    +
    -
    -No description has been provided for this image +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    agefnlwgteducation-numcapital-gaincapital-losshours-per-weekincome
    03977516132174040<=50K
    15083311130013<=50K
    23821564690040<=50K
    35323472170040<=50K
    428338409130040<=50K
    -
    -
    -
    -
    -
    -No description has been provided for this image +
    +
    +
    +
    +

    Computing the OOB values

    +

    The main idea of Data-OOB is to take an existing classifier or regression model and compute a per-sample out-of-bag performance estimate via bagging.

    +

    For this example, we use a simple KNN classifier with \(k=5\) neighbours on the data and compute the data-oob values with two choices for the number of estimators in the bagging. For that we construct a Utility object using the Scorer class to specify the metric to use for the evaluation. Note how we pass a random seed to Dataset.from_arrays in order to ensure that we always get the same split when running this notebook multiple times. This will be particularly important when running the standard point removal experiments later.

    +

    We then use the compute_data_oob function to compute the data-oob values.

    +
    +
    + +
    data = Dataset.from_arrays(
    +    X=data_adult.drop(columns=["income"]).values,
    +    y=data_adult.loc[:, "income"].cat.codes.values,
    +    random_state=RANDOM_SEED,
    +)
    +
    +model = KNeighborsClassifier(n_neighbors=5)
    +
    +utility = Utility(model, data, Scorer("accuracy", default=0.0))
    +
    + +
    +
    +
    +
    + +
    n_estimators = [100, 500]
    +oob_values = [
    +    compute_data_oob(utility, n_est=n_est, max_samples=0.95, seed=RANDOM_SEED)
    +    for n_est in n_estimators
    +]
    +
    +
    -

    Variance

    -

    The variance it the weak learner variance. It is computed with Welford's online algorithm.

    +

    The two results are stored in an array of ValuationResult objects. Here's their distribution. The left-hand side depicts value as it increases with rank and a 99% t-confidence interval. The right-hand side shows the histogram of values.

    +

    Observe how adding estimators reduces the variance of the values, but doesn't change their distribution much.

    @@ -2659,7 +2765,7 @@

    Variance
    -No description has been provided for this image +No description has been provided for this image

    @@ -2668,8 +2774,9 @@

    Variance
    -

    Point removal experiments

    -

    The standard procedure for the evaluation of data valuation schemes is the point removal experiment. The objective is to measure the evolution of performance when the best/worst points are removed from the training set.

    +

    Point removal experiments

    +

    The standard procedure for the evaluation of data valuation schemes is the point removal experiment. The objective is to measure the evolution of performance when the best/worst points are removed from the training set. This can be done with the function compute_removal_score, which takes precomputed values and computes the performance of the model as points are removed.

    +

    In order to test the true performance of DataOOB, we repeat the whole task of computing the values and the point removal experiment multiple times, including the splitting of the dataset into training and valuation sets. It is important to remember to pass random state adequately for full reproducibility.

    @@ -2678,7 +2785,7 @@

    Point removal experiments
    -No description has been provided for this image +No description has been provided for this image

    @@ -2690,11 +2797,11 @@

    Point removal experiments2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/examples/influence_imagenet/index.html b/devel/examples/influence_imagenet/index.html index 7331f9c8f..9943890fc 100644 --- a/devel/examples/influence_imagenet/index.html +++ b/devel/examples/influence_imagenet/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3492,11 +3492,11 @@

    Regularizing the Hessian Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/examples/influence_synthetic/index.html b/devel/examples/influence_synthetic/index.html index 8f89176fe..c2de14c42 100644 --- a/devel/examples/influence_synthetic/index.html +++ b/devel/examples/influence_synthetic/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3311,11 +3311,11 @@

    Appendix: Calculating the de Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/examples/influence_wine/index.html b/devel/examples/influence_wine/index.html index ee7ab35af..cf9ebb002 100644 --- a/devel/examples/influence_wine/index.html +++ b/devel/examples/influence_wine/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3194,11 +3194,11 @@

    Speeding up influences for big mo Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/examples/least_core_basic/index.html b/devel/examples/least_core_basic/index.html index 538dca207..72d1c2f93 100644 --- a/devel/examples/least_core_basic/index.html +++ b/devel/examples/least_core_basic/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3078,11 +3078,11 @@

    Remove Worst Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/examples/shapley_basic_spotify/index.html b/devel/examples/shapley_basic_spotify/index.html index d02269767..457d5f587 100644 --- a/devel/examples/shapley_basic_spotify/index.html +++ b/devel/examples/shapley_basic_spotify/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3161,11 +3161,11 @@

    Evaluation on anomalous data2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/examples/shapley_knn_flowers/index.html b/devel/examples/shapley_knn_flowers/index.html index 871f3aefc..4d3ff4f9b 100644 --- a/devel/examples/shapley_knn_flowers/index.html +++ b/devel/examples/shapley_knn_flowers/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2819,11 +2819,11 @@

    Corrupting labels2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/examples/shapley_utility_learning/index.html b/devel/examples/shapley_utility_learning/index.html index feedc4a1c..3cb5b4c4e 100644 --- a/devel/examples/shapley_utility_learning/index.html +++ b/devel/examples/shapley_utility_learning/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3032,11 +3032,11 @@

    Evaluation on anomalous data2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/getting-started/first-steps/index.html b/devel/getting-started/first-steps/index.html index b98b0dfa7..78bf10432 100644 --- a/devel/getting-started/first-steps/index.html +++ b/devel/getting-started/first-steps/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2588,11 +2588,11 @@

    Parallelization2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/getting-started/installation/index.html b/devel/getting-started/installation/index.html index 98b2b0e76..6a83244c4 100644 --- a/devel/getting-started/installation/index.html +++ b/devel/getting-started/installation/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2575,11 +2575,11 @@

    Setting up the cache2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/index.html b/devel/index.html index 3730fe53f..b42933c30 100644 --- a/devel/index.html +++ b/devel/index.html @@ -16,7 +16,7 @@ - + @@ -24,7 +24,7 @@ - + @@ -2466,11 +2466,11 @@

    The python library for data valua Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20
    diff --git a/devel/influence/index.html b/devel/influence/index.html index c634af32f..60c79d28a 100644 --- a/devel/influence/index.html +++ b/devel/influence/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -3125,11 +3125,11 @@

    Arnoldi solver2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/objects.inv b/devel/objects.inv index 6ddc62c0d..53dc04421 100644 Binary files a/devel/objects.inv and b/devel/objects.inv differ diff --git a/devel/search/search_index.json b/devel/search/search_index.json index 99e9df878..01327d935 100644 --- a/devel/search/search_index.json +++ b/devel/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"The python library for data valuation","text":"

    pyDVL collects algorithms for data valuation and influence function computation. It runs most of them in parallel either locally or in a cluster and supports distributed caching of results.

    If you're a first time user of pyDVL, we recommend you to go through the Getting Started and Installing pyDVL guides.

    Installation

    Steps to install and requirements

    Data valuation

    Basics of data valuation and description of the main algorithms

    Influence Function

    An introduction to the influence function and its computation with pyDVL

    Browse the API

    Full documentation of the API

    "},{"location":"CHANGELOG/","title":"Changelog","text":""},{"location":"CHANGELOG/#unreleased","title":"Unreleased","text":"
    • Implementation of Data-OOB by @BastienZim PR #426
    • Refactoring of parallel module. Old imports will stop working in v0.9.0 PR #421
    "},{"location":"CHANGELOG/#070-documentation-and-if-overhaul-new-methods-and-bug-fixes","title":"0.7.0 - \ud83d\udcda\ud83c\udd95 Documentation and IF overhaul, new methods and bug fixes \ud83d\udca5\ud83d\udc1e","text":"

    This is our first \u03b2 release! We have worked hard to deliver improvements across the board, with a focus on documentation and usability. We have also reworked the internals of the influence module, improved parallelism and handling of randomness.

    "},{"location":"CHANGELOG/#added","title":"Added","text":"
    • Implemented solving the Hessian equation via spectral low-rank approximation PR #365
    • Enabled parallel computation for Leave-One-Out values PR #406
    • Added more abbreviations to documentation PR #415
    • Added seed to functions from pydvl.utils.numeric, pydvl.value.shapley and pydvl.value.semivalues. Introduced new type Seed and conversion function ensure_seed_sequence. PR #396
    • Added batch_size parameter to compute_banzhaf_semivalues, compute_beta_shapley_semivalues, compute_shapley_semivalues and compute_generic_semivalues. PR #428
    "},{"location":"CHANGELOG/#changed","title":"Changed","text":"
    • Replaced sphinx with mkdocs for documentation. Major overhaul of documentation PR #352
    • Made ray an optional dependency, relying on joblib as default parallel backend PR #408
    • Decoupled ray.init from ParallelConfig PR #373
    • Breaking Changes
    • Signature change: return information about Hessian inversion from compute_influence_factors PR #375
    • Major changes to IF interface and functionality. Foundation for a framework abstraction for IF computation. PR #278 PR #394
    • Renamed semivalues to compute_generic_semivalues PR #413
    • New joblib backend as default instead of ray. Simplify MapReduceJob. PR #355
    • Bump torch dependency for influence package to 2.0 PR #365
    "},{"location":"CHANGELOG/#fixed","title":"Fixed","text":"
    • Fixes to parallel computation of generic semi-values: properly handle all samplers and stopping criteria, irrespective of parallel backend. PR #372
    • Optimises memory usage in IF calculation PR #375
    • Fix adding valuation results with overlapping indices and different lengths PR #370
    • Fixed bugs in conjugate gradient and linear_solve PR #358
    • Fix installation of dev requirements for Python3.10 PR #382
    • Improvements to IF documentation PR #371
    "},{"location":"CHANGELOG/#061-bug-fixes-and-small-improvements","title":"0.6.1 - \ud83c\udfd7 Bug fixes and small improvements","text":"
    • Fix parsing keyword arguments of compute_semivalues dispatch function PR #333
    • Create new RayExecutor class based on the concurrent.futures API, use the new class to fix an issue with Truncated Monte Carlo Shapley (TMCS) starting too many processes and dying, plus other small changes PR #329
    • Fix creation of GroupedDataset objects using the from_arrays and from_sklearn class methods PR #324
    • Fix release job not triggering on CI when a new tag is pushed PR #331
    • Added alias ApproShapley from Castro et al. 2009 for permutation Shapley PR #332
    "},{"location":"CHANGELOG/#060-new-algorithms-cleanup-and-bug-fixes","title":"0.6.0 - \ud83c\udd95 New algorithms, cleanup and bug fixes \ud83c\udfd7","text":"
    • Fixes in ValuationResult: bugs around data names, semantics of empty(), new method zeros() and normalised random values PR #327
    • New method: Implements generalised semi-values for data valuation, including Data Banzhaf and Beta Shapley, with configurable sampling strategies PR #319
    • Adds kwargs parameter to from_array and from_sklearn Dataset and GroupedDataset class methods PR #316
    • PEP-561 conformance: added py.typed PR #307
    • Removed default non-negativity constraint on least core subsidy and added instead a non_negative_subsidy boolean flag. Renamed options to solver_options and pass it as dict. Change default least-core solver to SCS with 10000 max_iters. PR #304
    • Cleanup: removed unnecessary decorator @unpackable PR #233
    • Stopping criteria: fixed problem with StandardError and enable proper composition of index convergence statuses. Fixed a bug with n_jobs in truncated_montecarlo_shapley. PR #300 and PR #305
    • Shuffling code around to allow for simpler user imports, some cleanup and documentation fixes. PR #284
    • Bug fix: Warn instead of raising an error when n_iterations is less than the size of the dataset in Monte Carlo Least Core PR #281
    "},{"location":"CHANGELOG/#050-fixes-nicer-interfaces-and-more-breaking-changes","title":"0.5.0 - \ud83d\udca5 Fixes, nicer interfaces and... more breaking changes \ud83d\ude12","text":"
    • Fixed parallel and antithetic Owen sampling for Shapley values. Simplified and extended tests. PR #267
    • Added Scorer class for a cleaner interface. Fixed minor bugs around Group-Testing Shapley, added more tests and switched to cvxpy for the solver. PR #264
    • Generalised stopping criteria for valuation algorithms. Improved classes ValuationResult and Status with more operations. Some minor issues fixed. PR #252
    • Fixed a bug whereby compute_shapley_values would only spawn one process when using n_jobs=-1 and Monte Carlo methods. PR #270
    • Bugfix in RayParallelBackend: wrong semantics for kwargs. PR #268
    • Splitting of problem preparation and solution in Least-Core computation. Umbrella function for LC methods. PR #257
    • Operations on ValuationResult and Status and some cleanup PR #248
    • Bug fix and minor improvements: Fixes bug in TMCS with remote Ray cluster, raises an error for dummy sequential parallel backend with TMCS, clones model inside Utility before fitting by default, with flag clone_before_fit to disable it, catches all warnings in Utility when show_warnings is False. Adds Miner and Gloves toy games utilities PR #247
    "},{"location":"CHANGELOG/#040-new-algorithms-and-more-breaking-changes","title":"0.4.0 - \ud83c\udfed\ud83d\udca5 New algorithms and more breaking changes","text":"
    • GH action to mark issues as stale PR #201
    • Disabled caching of Utility values as well as repeated evaluations by default PR #211
    • Test and officially support Python version 3.9 and 3.10 PR #208
    • Breaking change: Introduces a class ValuationResult to gather and inspect results from all valuation algorithms PR #214
    • Fixes bug in Influence calculation with multidimensional input and adds new example notebook PR #195
    • Breaking change: Passes the input to MapReduceJob at initialization, removes chunkify_inputs argument from MapReduceJob, removes n_runs argument from MapReduceJob, calls the parallel backend's put() method for each generated chunk in _chunkify(), renames ParallelConfig's num_workers attribute to n_local_workers, fixes a bug in MapReduceJob's chunkification when n_runs >= n_jobs, and defines a sequential parallel backend to run all jobs in the current thread PR #232
    • New method: Implements exact and monte carlo Least Core for data valuation, adds from_arrays() class method to the Dataset and GroupedDataset classes, adds extra_values argument to ValuationResult, adds compute_removal_score() and compute_random_removal_score() helper functions PR #237
    • New method: Group Testing Shapley for valuation, from Jia et al. 2019 PR #240
    • Fixes bug in ray initialization in RayParallelBackend class PR #239
    • Implements \"Egalitarian Least Core\", adds cvxpy as a dependency and uses it instead of scipy as optimizer PR #243
    "},{"location":"CHANGELOG/#030-breaking-changes","title":"0.3.0 - \ud83d\udca5 Breaking changes","text":"
    • Simplified and fixed powerset sampling and testing PR #181
    • Simplified and fixed publishing to PyPI from CI PR #183
    • Fixed bug in release script and updated contributing docs. PR #184
    • Added Pull Request template PR #185
    • Modified Pull Request template to automatically link PR to issue PR ##186
    • First implementation of Owen Sampling, squashed scores, better testing PR #194
    • Improved documentation on caching, Shapley, caveats of values, bibtex PR #194
    • Breaking change: Rearranging of modules to accommodate for new methods PR #194
    "},{"location":"CHANGELOG/#020-better-docs","title":"0.2.0 - \ud83d\udcda Better docs","text":"

    Mostly API documentation and notebooks, plus some bugfixes.

    "},{"location":"CHANGELOG/#added_1","title":"Added","text":"

    In PR #161: - Support for $$ math in sphinx docs. - Usage of sphinx extension for external links (introducing new directives like :gh:, :issue: and :tfl: to construct standardised links to external resources). - Only update auto-generated documentation files if there are changes. Some minor additions to update_docs.py. - Parallelization of exact combinatorial Shapley. - Integrated KNN shapley into the main interface compute_shapley_values.

    "},{"location":"CHANGELOG/#changed_1","title":"Changed","text":"

    In PR #161: - Improved main docs and Shapley notebooks. Added or fixed many docstrings, readme and documentation for contributors. Typos, grammar and style in code, documentation and notebooks. - Internal renaming and rearranging in the parallelization and caching modules.

    "},{"location":"CHANGELOG/#fixed_1","title":"Fixed","text":"
    • Bug in random matrix generation PR #161.
    • Bugs in MapReduceJob's _chunkify and _backpressure methods PR #176.
    "},{"location":"CHANGELOG/#010-first-release","title":"0.1.0 - \ud83c\udf89 first release","text":"

    This is very first release of pyDVL.

    It contains:

    • Data Valuation Methods:

    • Leave-One-Out

    • Influence Functions
    • Shapley:
      • Exact Permutation and Combinatorial
      • Montecarlo Permutation and Combinatorial
      • Truncated Montecarlo Permutation
    • Caching of results with Memcached
    • Parallelization of computations with Ray
    • Documentation
    • Notebooks containing examples of different use cases
    "},{"location":"api/pydvl/","title":"API","text":""},{"location":"api/pydvl/#pydvl--the-python-data-valuation-library-api","title":"The Python Data Valuation Library API","text":"

    This is the API documentation for the Python Data Valuation Library (PyDVL). Use the table of contents to access the documentation for each module.

    The two main modules you will want to look at are value and influence.

    "},{"location":"api/pydvl/influence/","title":"Influence","text":"

    This package contains algorithms for the computation of the influence function.

    Warning: Much of the code in this package is experimental or untested and is subject to modification. In particular, the package structure and basic API will probably change.

    "},{"location":"api/pydvl/influence/general/","title":"General","text":"

    This module contains influence calculation functions for general models, as introduced in (Koh and Liang, 2017)1.

    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general--references","title":"References","text":"
    1. Koh, P.W., Liang, P., 2017. Understanding Black-box Predictions via Influence Functions. In: Proceedings of the 34th International Conference on Machine Learning, pp. 1885\u20131894. PMLR.\u00a0\u21a9

    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general.InfluenceType","title":"InfluenceType","text":"

    Bases: str, Enum

    Enum representation for the types of influence.

    ATTRIBUTE DESCRIPTION Up

    Up-weighting a training point, see section 2.1 of (Koh and Liang, 2017)1

    Perturbation

    Perturb a training point, see section 2.2 of (Koh and Liang, 2017)1

    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general.compute_influence_factors","title":"compute_influence_factors(model, training_data, test_data, inversion_method, *, hessian_perturbation=0.0, progress=False, **kwargs)","text":"

    Calculates influence factors of a model for training and test data.

    Given a test point \\(z_{test} = (x_{test}, y_{test})\\), a loss \\(L(z_{test}, \\theta)\\) (\\(\\theta\\) being the parameters of the model) and the Hessian of the model \\(H_{\\theta}\\), influence factors are defined as:

    \\[ s_{test} = H_{\\theta}^{-1} \\operatorname{grad}_{\\theta} L(z_{test}, \\theta). \\]

    They are used for efficient influence calculation. This method first (implicitly) calculates the Hessian and then (explicitly) finds the influence factors for the model using the given inversion method. The parameter hessian_perturbation is used to regularize the inversion of the Hessian. For more info, refer to (Koh and Liang, 2017)1, paragraph 3.

    PARAMETER DESCRIPTION model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TwiceDifferentiable

    training_data

    DataLoader containing the training data.

    TYPE: DataLoaderType

    test_data

    DataLoader containing the test data.

    TYPE: DataLoaderType

    inversion_method

    Name of method for computing inverse hessian vector products.

    TYPE: InversionMethod

    hessian_perturbation

    Regularization of the hessian.

    TYPE: float DEFAULT: 0.0

    progress

    If True, display progress bars.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION array

    An array of size (N, D) containing the influence factors for each dimension (D) and test sample (N).

    TYPE: InverseHvpResult

    Source code in src/pydvl/influence/general.py
    def compute_influence_factors(\nmodel: TwiceDifferentiable,\ntraining_data: DataLoaderType,\ntest_data: DataLoaderType,\ninversion_method: InversionMethod,\n*,\nhessian_perturbation: float = 0.0,\nprogress: bool = False,\n**kwargs: Any,\n) -> InverseHvpResult:\nr\"\"\"\n    Calculates influence factors of a model for training and test data.\n    Given a test point \\(z_{test} = (x_{test}, y_{test})\\), a loss \\(L(z_{test}, \\theta)\\)\n    (\\(\\theta\\) being the parameters of the model) and the Hessian of the model \\(H_{\\theta}\\),\n    influence factors are defined as:\n    \\[\n    s_{test} = H_{\\theta}^{-1} \\operatorname{grad}_{\\theta} L(z_{test}, \\theta).\n    \\]\n    They are used for efficient influence calculation. This method first (implicitly) calculates\n    the Hessian and then (explicitly) finds the influence factors for the model using the given\n    inversion method. The parameter `hessian_perturbation` is used to regularize the inversion of\n    the Hessian. For more info, refer to (Koh and Liang, 2017)<sup><a href=\"#koh_liang_2017\">1</a></sup>, paragraph 3.\n    Args:\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data: DataLoader containing the training data.\n        test_data: DataLoader containing the test data.\n        inversion_method: Name of method for computing inverse hessian vector products.\n        hessian_perturbation: Regularization of the hessian.\n        progress: If True, display progress bars.\n    Returns:\n        array: An array of size (N, D) containing the influence factors for each dimension (D) and test sample (N).\n    \"\"\"\ntensor_util: Type[TensorUtilities] = TensorUtilities.from_twice_differentiable(\nmodel\n)\nstack = tensor_util.stack\nunsqueeze = tensor_util.unsqueeze\ncat_gen = tensor_util.cat_gen\ncat = tensor_util.cat\ndef test_grads() -> Generator[TensorType, None, None]:\nfor x_test, y_test in maybe_progress(\ntest_data, progress, desc=\"Batch Test Gradients\"\n):\nyield stack(\n[\nmodel.grad(inpt, target)\nfor inpt, target in zip(unsqueeze(x_test, 1), y_test)\n]\n)  # type:ignore\ntry:\n# if provided input_data implements __len__, pre-allocate the result tensor to reduce memory consumption\nresulting_shape = (len(test_data), model.num_params)  # type:ignore\nrhs = cat_gen(\ntest_grads(), resulting_shape, model  # type:ignore\n)  # type:ignore\nexcept Exception as e:\nlogger.warning(\nf\"Failed to pre-allocate result tensor: {e}\\n\"\nf\"Evaluate all resulting tensor and concatenate\"\n)\nrhs = cat(list(test_grads()))\nreturn solve_hvp(\ninversion_method,\nmodel,\ntraining_data,\nrhs,\nhessian_perturbation=hessian_perturbation,\n**kwargs,\n)\n
    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general.compute_influences_up","title":"compute_influences_up(model, input_data, influence_factors, *, progress=False)","text":"

    Given the model, the training points, and the influence factors, this function calculates the influences using the up-weighting method.

    The procedure involves two main steps: 1. Calculating the gradients of the model with respect to each training sample (\\(\\operatorname{grad}_{\\theta} L\\), where \\(L\\) is the loss of a single point and \\(\\theta\\) are the parameters of the model). 2. Multiplying each gradient with the influence factors.

    For a detailed description of the methodology, see section 2.1 of (Koh and Liang, 2017)1.

    PARAMETER DESCRIPTION model

    A model that implements the TwiceDifferentiable interface.

    TYPE: TwiceDifferentiable

    input_data

    DataLoader containing the samples for which the influence will be calculated.

    TYPE: DataLoaderType

    influence_factors

    Array containing pre-computed influence factors.

    TYPE: TensorType

    progress

    If set to True, progress bars will be displayed during computation.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION TensorType

    An array of shape [NxM], where N is the number of influence factors, and M is the number of input samples.

    Source code in src/pydvl/influence/general.py
    def compute_influences_up(\nmodel: TwiceDifferentiable,\ninput_data: DataLoaderType,\ninfluence_factors: TensorType,\n*,\nprogress: bool = False,\n) -> TensorType:\nr\"\"\"\n    Given the model, the training points, and the influence factors, this function calculates the\n    influences using the up-weighting method.\n    The procedure involves two main steps:\n    1. Calculating the gradients of the model with respect to each training sample\n       (\\(\\operatorname{grad}_{\\theta} L\\), where \\(L\\) is the loss of a single point and \\(\\theta\\) are the\n       parameters of the model).\n    2. Multiplying each gradient with the influence factors.\n    For a detailed description of the methodology, see section 2.1 of (Koh and Liang, 2017)<sup><a href=\"#koh_liang_2017\">1</a></sup>.\n    Args:\n        model: A model that implements the TwiceDifferentiable interface.\n        input_data: DataLoader containing the samples for which the influence will be calculated.\n        influence_factors: Array containing pre-computed influence factors.\n        progress: If set to True, progress bars will be displayed during computation.\n    Returns:\n        An array of shape [NxM], where N is the number of influence factors, and M is the number of input samples.\n    \"\"\"\ntensor_util: Type[TensorUtilities] = TensorUtilities.from_twice_differentiable(\nmodel\n)\nstack = tensor_util.stack\nunsqueeze = tensor_util.unsqueeze\ncat_gen = tensor_util.cat_gen\ncat = tensor_util.cat\neinsum = tensor_util.einsum\ndef train_grads() -> Generator[TensorType, None, None]:\nfor x, y in maybe_progress(\ninput_data, progress, desc=\"Batch Split Input Gradients\"\n):\nyield stack(\n[model.grad(inpt, target) for inpt, target in zip(unsqueeze(x, 1), y)]\n)  # type:ignore\ntry:\n# if provided input_data implements __len__, pre-allocate the result tensor to reduce memory consumption\nresulting_shape = (len(input_data), model.num_params)  # type:ignore\ntrain_grad_tensor = cat_gen(\ntrain_grads(), resulting_shape, model  # type:ignore\n)  # type:ignore\nexcept Exception as e:\nlogger.warning(\nf\"Failed to pre-allocate result tensor: {e}\\n\"\nf\"Evaluate all resulting tensor and concatenate\"\n)\ntrain_grad_tensor = cat([x for x in train_grads()])  # type:ignore\nreturn einsum(\"ta,va->tv\", influence_factors, train_grad_tensor)  # type:ignore\n
    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general.compute_influences_pert","title":"compute_influences_pert(model, input_data, influence_factors, *, progress=False)","text":"

    Calculates the influence values based on the influence factors and training samples using the perturbation method.

    The process involves two main steps: 1. Calculating the gradient of the model with respect to each training sample (\\(\\operatorname{grad}_{\\theta} L\\), where \\(L\\) is the loss of the model for a single data point and \\(\\theta\\) are the parameters of the model). 2. Using the method TwiceDifferentiable.mvp to efficiently compute the product of the influence factors and \\(\\operatorname{grad}_x \\operatorname{grad}_{\\theta} L\\).

    For a detailed methodology, see section 2.2 of (Koh and Liang, 2017)1.

    PARAMETER DESCRIPTION model

    A model that implements the TwiceDifferentiable interface.

    TYPE: TwiceDifferentiable

    input_data

    DataLoader containing the samples for which the influence will be calculated.

    TYPE: DataLoaderType

    influence_factors

    Array containing pre-computed influence factors.

    TYPE: TensorType

    progress

    If set to True, progress bars will be displayed during computation.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION TensorType

    A 3D array with shape [NxMxP], where N is the number of influence factors, M is the number of input samples, and P is the number of features.

    Source code in src/pydvl/influence/general.py
    def compute_influences_pert(\nmodel: TwiceDifferentiable,\ninput_data: DataLoaderType,\ninfluence_factors: TensorType,\n*,\nprogress: bool = False,\n) -> TensorType:\nr\"\"\"\n    Calculates the influence values based on the influence factors and training samples using the perturbation method.\n    The process involves two main steps:\n    1. Calculating the gradient of the model with respect to each training sample\n       (\\(\\operatorname{grad}_{\\theta} L\\), where \\(L\\) is the loss of the model for a single data point and \\(\\theta\\)\n       are the parameters of the model).\n    2. Using the method [TwiceDifferentiable.mvp][pydvl.influence.twice_differentiable.TwiceDifferentiable.mvp]\n       to efficiently compute the product of the\n       influence factors and \\(\\operatorname{grad}_x \\operatorname{grad}_{\\theta} L\\).\n    For a detailed methodology, see section 2.2 of (Koh and Liang, 2017)<sup><a href=\"#koh_liang_2017\">1</a></sup>.\n    Args:\n        model: A model that implements the TwiceDifferentiable interface.\n        input_data: DataLoader containing the samples for which the influence will be calculated.\n        influence_factors: Array containing pre-computed influence factors.\n        progress: If set to True, progress bars will be displayed during computation.\n    Returns:\n        A 3D array with shape [NxMxP], where N is the number of influence factors,\n            M is the number of input samples, and P is the number of features.\n    \"\"\"\ntensor_util: Type[TensorUtilities] = TensorUtilities.from_twice_differentiable(\nmodel\n)\nstack = tensor_util.stack\ntu_slice = tensor_util.slice\nreshape = tensor_util.reshape\nget_element = tensor_util.get_element\nshape = tensor_util.shape\nall_pert_influences = []\nfor x, y in maybe_progress(\ninput_data,\nprogress,\ndesc=\"Batch Influence Perturbation\",\n):\nfor i in range(len(x)):\ntensor_x = tu_slice(x, i, i + 1)\ngrad_xy = model.grad(tensor_x, get_element(y, i), create_graph=True)\nperturbation_influences = model.mvp(\ngrad_xy,\ninfluence_factors,\nbackprop_on=tensor_x,\n)\nall_pert_influences.append(\nreshape(perturbation_influences, (-1, *shape(get_element(x, i))))\n)\nreturn stack(all_pert_influences, axis=1)  # type:ignore\n
    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general.compute_influences","title":"compute_influences(differentiable_model, training_data, *, test_data=None, input_data=None, inversion_method=InversionMethod.Direct, influence_type=InfluenceType.Up, hessian_regularization=0.0, progress=False, **kwargs)","text":"

    Calculates the influence of each input data point on the specified test points.

    This method operates in two primary stages: 1. Computes the influence factors for all test points concerning the model and its training data. 2. Uses these factors to derive the influences over the complete set of input data.

    The influence calculation relies on the twice-differentiable nature of the provided model.

    PARAMETER DESCRIPTION differentiable_model

    A model bundled with its corresponding loss in the TwiceDifferentiable wrapper.

    TYPE: TwiceDifferentiable

    training_data

    DataLoader instance supplying the training data. This data is pivotal in computing the Hessian matrix for the model's loss.

    TYPE: DataLoaderType

    test_data

    DataLoader instance with the test samples. Defaults to training_data if None.

    TYPE: Optional[DataLoaderType] DEFAULT: None

    input_data

    DataLoader instance holding samples whose influences need to be computed. Defaults to training_data if None.

    TYPE: Optional[DataLoaderType] DEFAULT: None

    inversion_method

    An enumeration value determining the approach for inverting matrices or computing inverse operations, see [.inversion.InversionMethod]

    TYPE: InversionMethod DEFAULT: Direct

    progress

    A boolean indicating whether progress bars should be displayed during computation.

    TYPE: bool DEFAULT: False

    influence_type

    Determines the methodology for computing influences. Valid choices include 'up' (for up-weighting) and 'perturbation'. For an in-depth understanding, see (Koh and Liang, 2017)1.

    TYPE: InfluenceType DEFAULT: Up

    hessian_regularization

    A lambda value used in Hessian regularization. The regularized Hessian, \\( H_{reg} \\), is computed as \\( H + \\lambda \\times I \\), where \\( I \\) is the identity matrix and \\( H \\) is the simple, unmodified Hessian. This regularization is typically utilized for more sophisticated models to ensure that the Hessian remains positive definite.

    TYPE: float DEFAULT: 0.0

    RETURNS DESCRIPTION TensorType

    The shape of this array varies based on the influence_type. If 'up', the shape is [NxM], where N denotes the number of test points and M denotes the number of training points. Conversely, if the influence_type is 'perturbation', the shape is [NxMxP], with P representing the number of input features.

    Source code in src/pydvl/influence/general.py
    def compute_influences(\ndifferentiable_model: TwiceDifferentiable,\ntraining_data: DataLoaderType,\n*,\ntest_data: Optional[DataLoaderType] = None,\ninput_data: Optional[DataLoaderType] = None,\ninversion_method: InversionMethod = InversionMethod.Direct,\ninfluence_type: InfluenceType = InfluenceType.Up,\nhessian_regularization: float = 0.0,\nprogress: bool = False,\n**kwargs: Any,\n) -> TensorType:  # type: ignore # ToDO fix typing\nr\"\"\"\n    Calculates the influence of each input data point on the specified test points.\n    This method operates in two primary stages:\n    1. Computes the influence factors for all test points concerning the model and its training data.\n    2. Uses these factors to derive the influences over the complete set of input data.\n    The influence calculation relies on the twice-differentiable nature of the provided model.\n    Args:\n        differentiable_model: A model bundled with its corresponding loss in the `TwiceDifferentiable` wrapper.\n        training_data: DataLoader instance supplying the training data. This data is pivotal in computing the\n                       Hessian matrix for the model's loss.\n        test_data: DataLoader instance with the test samples. Defaults to `training_data` if None.\n        input_data: DataLoader instance holding samples whose influences need to be computed. Defaults to\n                    `training_data` if None.\n        inversion_method: An enumeration value determining the approach for inverting matrices\n            or computing inverse operations, see [.inversion.InversionMethod]\n        progress: A boolean indicating whether progress bars should be displayed during computation.\n        influence_type: Determines the methodology for computing influences.\n            Valid choices include 'up' (for up-weighting) and 'perturbation'.\n            For an in-depth understanding, see (Koh and Liang, 2017)<sup><a href=\"#koh_liang_2017\">1</a></sup>.\n        hessian_regularization: A lambda value used in Hessian regularization. The regularized Hessian, \\( H_{reg} \\),\n            is computed as \\( H + \\lambda \\times I \\), where \\( I \\) is the identity matrix and \\( H \\)\n            is the simple, unmodified Hessian. This regularization is typically utilized for more\n            sophisticated models to ensure that the Hessian remains positive definite.\n    Returns:\n        The shape of this array varies based on the `influence_type`. If 'up', the shape is [NxM], where\n            N denotes the number of test points and M denotes the number of training points. Conversely, if the\n            influence_type is 'perturbation', the shape is [NxMxP], with P representing the number of input features.\n    \"\"\"\nif input_data is None:\ninput_data = deepcopy(training_data)\nif test_data is None:\ntest_data = deepcopy(training_data)\ninfluence_factors, _ = compute_influence_factors(\ndifferentiable_model,\ntraining_data,\ntest_data,\ninversion_method,\nhessian_perturbation=hessian_regularization,\nprogress=progress,\n**kwargs,\n)\nreturn influence_type_registry[influence_type](\ndifferentiable_model,\ninput_data,\ninfluence_factors,\nprogress=progress,\n)\n
    "},{"location":"api/pydvl/influence/inversion/","title":"Inversion","text":"

    Contains methods to invert the hessian vector product.

    "},{"location":"api/pydvl/influence/inversion/#pydvl.influence.inversion.InversionMethod","title":"InversionMethod","text":"

    Bases: str, Enum

    Different inversion methods types.

    "},{"location":"api/pydvl/influence/inversion/#pydvl.influence.inversion.InversionRegistry","title":"InversionRegistry","text":"

    A registry to hold inversion methods for different models.

    "},{"location":"api/pydvl/influence/inversion/#pydvl.influence.inversion.InversionRegistry.register","title":"register(model_type, inversion_method, overwrite=False) classmethod","text":"

    Register a function for a specific model type and inversion method.

    The function to be registered must conform to the following signature: (model: TwiceDifferentiable, training_data: DataLoaderType, b: TensorType, hessian_perturbation: float = 0.0, ...).

    PARAMETER DESCRIPTION model_type

    The type of the model the function should be registered for.

    TYPE: Type[TwiceDifferentiable]

    inversion_method

    The inversion method the function should be registered for.

    TYPE: InversionMethod

    overwrite

    If True, allows overwriting of an existing registered function for the same model type and inversion method. If False, logs a warning when attempting to register a function for an already registered model type and inversion method.

    TYPE: bool DEFAULT: False

    RAISES DESCRIPTION TypeError

    If the provided model_type or inversion_method are of the wrong type.

    ValueError

    If the function to be registered does not match the required signature.

    RETURNS DESCRIPTION

    A decorator for registering a function.

    Source code in src/pydvl/influence/inversion.py
    @classmethod\ndef register(\ncls,\nmodel_type: Type[TwiceDifferentiable],\ninversion_method: InversionMethod,\noverwrite: bool = False,\n):\n\"\"\"\n    Register a function for a specific model type and inversion method.\n    The function to be registered must conform to the following signature:\n    `(model: TwiceDifferentiable, training_data: DataLoaderType, b: TensorType,\n    hessian_perturbation: float = 0.0, ...)`.\n    Args:\n        model_type: The type of the model the function should be registered for.\n        inversion_method: The inversion method the function should be\n            registered for.\n        overwrite: If ``True``, allows overwriting of an existing registered\n            function for the same model type and inversion method. If ``False``,\n            logs a warning when attempting to register a function for an already\n            registered model type and inversion method.\n    Raises:\n        TypeError: If the provided model_type or inversion_method are of the wrong type.\n        ValueError: If the function to be registered does not match the required signature.\n    Returns:\n        A decorator for registering a function.\n    \"\"\"\nif not isinstance(model_type, type):\nraise TypeError(\nf\"'model_type' is of type {type(model_type)} but should be a Type[TwiceDifferentiable]\"\n)\nif not isinstance(inversion_method, InversionMethod):\nraise TypeError(\nf\"'inversion_method' must be an 'InversionMethod' \"\nf\"but has type {type(inversion_method)} instead.\"\n)\nkey = (model_type, inversion_method)\ndef decorator(func):\nif not overwrite and key in cls.registry:\nwarnings.warn(\nf\"There is already a function registered for model type {model_type} \"\nf\"and inversion method {inversion_method}. \"\nf\"To overwrite the existing function {cls.registry.get(key)} with {func}, set overwrite to True.\"\n)\nsig = inspect.signature(func)\nparams = list(sig.parameters.values())\nexpected_args = [\n(\"model\", model_type),\n(\"training_data\", DataLoaderType.__bound__),\n(\"b\", model_type.tensor_type()),\n(\"hessian_perturbation\", float),\n]\nfor (name, typ), param in zip(expected_args, params):\nif not (\nisinstance(param.annotation, typ)\nor issubclass(param.annotation, typ)\n):\nraise ValueError(\nf'Parameter \"{name}\" must be of type \"{typ.__name__}\"'\n)\n@functools.wraps(func)\ndef wrapper(*args, **kwargs):\nreturn func(*args, **kwargs)\ncls.registry[key] = wrapper\nreturn wrapper\nreturn decorator\n
    "},{"location":"api/pydvl/influence/inversion/#pydvl.influence.inversion.InversionRegistry.call","title":"call(inversion_method, model, training_data, b, hessian_perturbation, **kwargs) classmethod","text":"

    Call a registered function with the provided parameters.

    PARAMETER DESCRIPTION inversion_method

    The inversion method to use.

    TYPE: InversionMethod

    model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TwiceDifferentiable

    training_data

    The training data to use.

    TYPE: DataLoaderType

    b

    Array as the right hand side of the equation \\(Ax = b\\).

    TYPE: TensorType

    hessian_perturbation

    Regularization of the hessian.

    kwargs

    Additional keyword arguments to pass to the inversion method.

    DEFAULT: {}

    RETURNS DESCRIPTION InverseHvpResult

    An instance of InverseHvpResult, that contains an array, which solves the inverse problem, i.e. it returns \\(x\\) such that \\(Ax = b\\), and a dictionary containing information about the inversion process.

    Source code in src/pydvl/influence/inversion.py
    @classmethod\ndef call(\ncls,\ninversion_method: InversionMethod,\nmodel: TwiceDifferentiable,\ntraining_data: DataLoaderType,\nb: TensorType,\nhessian_perturbation,\n**kwargs,\n) -> InverseHvpResult:\nr\"\"\"\n    Call a registered function with the provided parameters.\n    Args:\n        inversion_method: The inversion method to use.\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data: The training data to use.\n        b: Array as the right hand side of the equation \\(Ax = b\\).\n        hessian_perturbation: Regularization of the hessian.\n        kwargs: Additional keyword arguments to pass to the inversion method.\n    Returns:\n        An instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult],\n            that contains an array, which solves the inverse problem,\n            i.e. it returns \\(x\\) such that \\(Ax = b\\), and a dictionary containing information\n            about the inversion process.\n    \"\"\"\nreturn cls.get(type(model), inversion_method)(\nmodel, training_data, b, hessian_perturbation, **kwargs\n)\n
    "},{"location":"api/pydvl/influence/inversion/#pydvl.influence.inversion.solve_hvp","title":"solve_hvp(inversion_method, model, training_data, b, *, hessian_perturbation=0.0, **kwargs)","text":"

    Finds \\( x \\) such that \\( Ax = b \\), where \\( A \\) is the hessian of the model, and \\( b \\) a vector. Depending on the inversion method, the hessian is either calculated directly and then inverted, or implicitly and then inverted through matrix vector product. The method also allows to add a small regularization term (hessian_perturbation) to facilitate inversion of non fully trained models.

    PARAMETER DESCRIPTION inversion_method

    TYPE: InversionMethod

    model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TwiceDifferentiable

    training_data

    TYPE: DataLoaderType

    b

    Array as the right hand side of the equation \\( Ax = b \\)

    TYPE: TensorType

    hessian_perturbation

    regularization of the hessian.

    TYPE: float DEFAULT: 0.0

    kwargs

    kwargs to pass to the inversion method.

    TYPE: Any DEFAULT: {}

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, with an array that solves the inverse problem, i.e., it returns \\( x \\) such that \\( Ax = b \\) and a dictionary containing information about the inversion process.

    Source code in src/pydvl/influence/inversion.py
    def solve_hvp(\ninversion_method: InversionMethod,\nmodel: TwiceDifferentiable,\ntraining_data: DataLoaderType,\nb: TensorType,\n*,\nhessian_perturbation: float = 0.0,\n**kwargs: Any,\n) -> InverseHvpResult:\nr\"\"\"\n    Finds \\( x \\) such that \\( Ax = b \\), where \\( A \\) is the hessian of the model,\n    and \\( b \\) a vector. Depending on the inversion method, the hessian is either\n    calculated directly and then inverted, or implicitly and then inverted through\n    matrix vector product. The method also allows to add a small regularization term\n    (hessian_perturbation) to facilitate inversion of non fully trained models.\n    Args:\n        inversion_method:\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data:\n        b: Array as the right hand side of the equation \\( Ax = b \\)\n        hessian_perturbation: regularization of the hessian.\n        kwargs: kwargs to pass to the inversion method.\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult], with\n            an array that solves the inverse problem, i.e., it returns \\( x \\) such that \\( Ax = b \\)\n            and a dictionary containing information about the inversion process.\n    \"\"\"\nreturn InversionRegistry.call(\ninversion_method,\nmodel,\ntraining_data,\nb,\nhessian_perturbation=hessian_perturbation,\n**kwargs,\n)\n
    "},{"location":"api/pydvl/influence/twice_differentiable/","title":"Twice differentiable","text":""},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorType","title":"TensorType = TypeVar('TensorType', bound=Sequence) module-attribute","text":"

    Type variable for tensors, i.e. sequences of numbers

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.ModelType","title":"ModelType = TypeVar('ModelType', bound='TwiceDifferentiable') module-attribute","text":"

    Type variable for twice differentiable models

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.DataLoaderType","title":"DataLoaderType = TypeVar('DataLoaderType', bound=Iterable) module-attribute","text":"

    Type variable for data loaders

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.InverseHvpResult","title":"InverseHvpResult dataclass","text":"

    Bases: Generic[TensorType]

    Container class for results of solving a problem \\(Ax=b\\)

    PARAMETER DESCRIPTION x

    solution of a problem \\(Ax=b\\)

    TYPE: TensorType

    info

    additional information, to couple with the solution itself

    TYPE: Dict[str, Any]

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable","title":"TwiceDifferentiable","text":"

    Bases: ABC, Generic[TensorType]

    Abstract base class for wrappers of differentiable models and losses. Meant to be subclassed for each supported framework. Provides methods to compute gradients and second derivative of the loss wrt. the model parameters

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable.num_params","title":"num_params: int abstractmethod property","text":"

    Returns the number of parameters of the model

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable.parameters","title":"parameters: List[TensorType] abstractmethod property","text":"

    Returns all the model parameters that require differentiation

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable.grad","title":"grad(x, y, create_graph=False)","text":"

    Calculates gradient of model parameters with respect to the model parameters.

    PARAMETER DESCRIPTION x

    A matrix representing the features \\(x_i\\).

    TYPE: TensorType

    y

    A matrix representing the target values \\(y_i\\).

    TYPE: TensorType

    create_graph

    Used for further differentiation on input parameters.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION TensorType

    An array with the gradients of the model.

    Source code in src/pydvl/influence/twice_differentiable.py
    def grad(\nself, x: TensorType, y: TensorType, create_graph: bool = False\n) -> TensorType:\nr\"\"\"\n    Calculates gradient of model parameters with respect to the model parameters.\n    Args:\n        x: A matrix representing the features \\(x_i\\).\n        y: A matrix representing the target values \\(y_i\\).\n        create_graph: Used for further differentiation on input parameters.\n    Returns:\n        An array with the gradients of the model.\n    \"\"\"\npass\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable.hessian","title":"hessian(x, y)","text":"

    Calculates the full Hessian of \\(L(f(x),y)\\) with respect to the model parameters given data \\(x\\) and \\(y\\).

    PARAMETER DESCRIPTION x

    An array representing the features \\(x_i\\).

    TYPE: TensorType

    y

    An array representing the target values \\(y_i\\).

    TYPE: TensorType

    RETURNS DESCRIPTION TensorType

    A tensor representing the Hessian of the model, i.e. the second derivative with respect to the model parameters.

    Source code in src/pydvl/influence/twice_differentiable.py
    def hessian(self, x: TensorType, y: TensorType) -> TensorType:\nr\"\"\"\n    Calculates the full Hessian of \\(L(f(x),y)\\) with respect to the model parameters given data \\(x\\) and \\(y\\).\n    Args:\n        x: An array representing the features \\(x_i\\).\n        y: An array representing the target values \\(y_i\\).\n    Returns:\n        A tensor representing the Hessian of the model, i.e. the second derivative\n            with respect to the model parameters.\n    \"\"\"\npass\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable.mvp","title":"mvp(grad_xy, v, backprop_on, *, progress=False) abstractmethod staticmethod","text":"

    Calculates the second order derivative of the model along directions \\(v\\). The second order derivative can be selected through the backprop_on argument.

    PARAMETER DESCRIPTION grad_xy

    An array [P] holding the gradients of the model parameters with respect to input \\(x\\) and labels \\(y\\). \\(P\\) is the number of parameters of the model. Typically obtained through self.grad.

    TYPE: TensorType

    v

    An array ([DxP] or even one-dimensional [D]) which multiplies the matrix. \\(D\\) is the number of directions.

    TYPE: TensorType

    progress

    If True, progress is displayed.

    TYPE: bool DEFAULT: False

    backprop_on

    Tensor used in the second backpropagation. The first one is along \\(x\\) and \\(y\\) as defined via grad_xy.

    TYPE: TensorType

    RETURNS DESCRIPTION TensorType

    A matrix representing the implicit matrix-vector product of the model along the given directions. Output shape is [DxM], where \\(M\\) is the number of elements of backprop_on.

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef mvp(\ngrad_xy: TensorType,\nv: TensorType,\nbackprop_on: TensorType,\n*,\nprogress: bool = False,\n) -> TensorType:\nr\"\"\"\n    Calculates the second order derivative of the model along directions \\(v\\).\n    The second order derivative can be selected through the `backprop_on` argument.\n    Args:\n        grad_xy: An array [P] holding the gradients of the model parameters with respect to input \\(x\\) and\n            labels \\(y\\). \\(P\\) is the number of parameters of the model. Typically obtained through `self.grad`.\n        v: An array ([DxP] or even one-dimensional [D]) which multiplies the matrix.\n            \\(D\\) is the number of directions.\n        progress: If `True`, progress is displayed.\n        backprop_on: Tensor used in the second backpropagation. The first one is along \\(x\\) and \\(y\\)\n            as defined via `grad_xy`.\n    Returns:\n        A matrix representing the implicit matrix-vector product of the model along the given directions.\n            Output shape is [DxM], where \\(M\\) is the number of elements of `backprop_on`.\n    \"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities","title":"TensorUtilities","text":"

    Bases: Generic[TensorType, ModelType], ABC

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.__init_subclass__","title":"__init_subclass__(**kwargs)","text":"

    Automatically registers non-abstract subclasses in the registry.

    This method checks if twice_differentiable_type is defined in the subclass and if it is of the correct type. If either attribute is missing or incorrect, a TypeError is raised.

    PARAMETER DESCRIPTION kwargs

    Additional keyword arguments.

    DEFAULT: {}

    RAISES DESCRIPTION TypeError

    If the subclass does not define twice_differentiable_type, or if it is not of the correct type.

    Source code in src/pydvl/influence/twice_differentiable.py
    def __init_subclass__(cls, **kwargs):\n\"\"\"\n    Automatically registers non-abstract subclasses in the registry.\n    This method checks if `twice_differentiable_type` is defined in the subclass and if it is of the correct type.\n    If either attribute is missing or incorrect, a `TypeError` is raised.\n    Args:\n        kwargs: Additional keyword arguments.\n    Raises:\n        TypeError: If the subclass does not define `twice_differentiable_type`, or if it is not of the correct type.\n    \"\"\"\nif not hasattr(cls, \"twice_differentiable_type\") or not isinstance(\ncls.twice_differentiable_type, type\n):\nraise TypeError(\nf\"'twice_differentiable_type' must be a Type[TwiceDifferentiable]\"\n)\ncls.registry[cls.twice_differentiable_type] = cls\nsuper().__init_subclass__(**kwargs)\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.einsum","title":"einsum(equation, *operands) abstractmethod staticmethod","text":"

    Sums the product of the elements of the input operands along dimensions specified using a notation based on the Einstein summation convention.

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef einsum(equation, *operands) -> TensorType:\n\"\"\"Sums the product of the elements of the input `operands` along dimensions specified using a notation\n    based on the Einstein summation convention.\n    \"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.cat","title":"cat(a, **kwargs) abstractmethod staticmethod","text":"

    Concatenates a sequence of tensors into a single torch tensor

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef cat(a: Sequence[TensorType], **kwargs) -> TensorType:\n\"\"\"Concatenates a sequence of tensors into a single torch tensor\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.stack","title":"stack(a, **kwargs) abstractmethod staticmethod","text":"

    Stacks a sequence of tensors into a single torch tensor

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef stack(a: Sequence[TensorType], **kwargs) -> TensorType:\n\"\"\"Stacks a sequence of tensors into a single torch tensor\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.unsqueeze","title":"unsqueeze(x, dim) abstractmethod staticmethod","text":"

    Add a singleton dimension at a specified position in a tensor

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef unsqueeze(x: TensorType, dim: int) -> TensorType:\n\"\"\"Add a singleton dimension at a specified position in a tensor\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.get_element","title":"get_element(x, idx) abstractmethod staticmethod","text":"

    Get the tensor element x[i] from the first non-singular dimension

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef get_element(x: TensorType, idx: int) -> TensorType:\n\"\"\"Get the tensor element x[i] from the first non-singular dimension\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.slice","title":"slice(x, start, stop, axis=0) abstractmethod staticmethod","text":"

    Slice a tensor in the provided axis

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef slice(x: TensorType, start: int, stop: int, axis: int = 0) -> TensorType:\n\"\"\"Slice a tensor in the provided axis\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.shape","title":"shape(x) abstractmethod staticmethod","text":"

    Slice a tensor in the provided axis

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef shape(x: TensorType) -> Tuple[int, ...]:\n\"\"\"Slice a tensor in the provided axis\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.reshape","title":"reshape(x, shape) abstractmethod staticmethod","text":"

    Reshape a tensor to the provided shape

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef reshape(x: TensorType, shape: Tuple[int, ...]) -> TensorType:\n\"\"\"Reshape a tensor to the provided shape\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.cat_gen","title":"cat_gen(a, resulting_shape, model) abstractmethod staticmethod","text":"

    Concatenate tensors from a generator. Resulting tensor is of shape resulting_shape and compatible to model

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef cat_gen(\na: Generator[TensorType, None, None],\nresulting_shape: Tuple[int, ...],\nmodel: ModelType,\n) -> TensorType:\n\"\"\"Concatenate tensors from a generator. Resulting tensor is of shape resulting_shape\n    and compatible to model\n    \"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.from_twice_differentiable","title":"from_twice_differentiable(twice_diff) classmethod","text":"

    Factory method to create an instance of a subclass TensorUtilities from an instance of a subclass of TwiceDifferentiable.

    PARAMETER DESCRIPTION twice_diff

    An instance of a subclass of TwiceDifferentiable for which a corresponding TensorUtilities object is required.

    TYPE: TwiceDifferentiable

    RETURNS DESCRIPTION Type[TensorUtilities]

    An subclass of TensorUtilities registered to the provided subclass instance of TwiceDifferentiable object.

    RAISES DESCRIPTION KeyError

    If there's no registered TensorUtilities for the provided TwiceDifferentiable type.

    Source code in src/pydvl/influence/twice_differentiable.py
    @classmethod\ndef from_twice_differentiable(\ncls,\ntwice_diff: TwiceDifferentiable,\n) -> Type[\"TensorUtilities\"]:\n\"\"\"\n    Factory method to create an instance of a subclass\n    [TensorUtilities][pydvl.influence.twice_differentiable.TensorUtilities] from an instance of a subclass of\n    [TwiceDifferentiable][pydvl.influence.twice_differentiable.TwiceDifferentiable].\n    Args:\n        twice_diff: An instance of a subclass of\n            [TwiceDifferentiable][pydvl.influence.twice_differentiable.TwiceDifferentiable]\n            for which a corresponding [TensorUtilities][pydvl.influence.twice_differentiable.TensorUtilities]\n            object is required.\n    Returns:\n        An subclass of [TensorUtilities][pydvl.influence.twice_differentiable.TensorUtilities]\n            registered to the provided subclass instance of\n            [TwiceDifferentiable][pydvl.influence.twice_differentiable.TwiceDifferentiable] object.\n    Raises:\n        KeyError: If there's no registered [TensorUtilities][pydvl.influence.twice_differentiable.TensorUtilities]\n            for the provided [TwiceDifferentiable][pydvl.influence.twice_differentiable.TwiceDifferentiable] type.\n    \"\"\"\ntu = cls.registry.get(type(twice_diff), None)\nif tu is None:\nraise KeyError(\nf\"No registered TensorUtilities for the type {type(twice_diff).__name__}\"\n)\nreturn tu\n
    "},{"location":"api/pydvl/influence/torch/","title":"Torch","text":""},{"location":"api/pydvl/influence/torch/functional/","title":"Functional","text":""},{"location":"api/pydvl/influence/torch/functional/#pydvl.influence.torch.functional.hvp","title":"hvp(func, params, vec, reverse_only=True)","text":"

    Computes the Hessian-vector product (HVP) for a given function at given parameters, i.e.

    \\[\\nabla_{\\theta} \\nabla_{\\theta} f (\\theta)\\cdot v\\]

    This function can operate in two modes, either reverse-mode autodiff only or both forward- and reverse-mode autodiff.

    PARAMETER DESCRIPTION func

    The scalar-valued function for which the HVP is computed.

    TYPE: Callable[[TorchTensorContainerType], Tensor]

    params

    The parameters at which the HVP is computed.

    TYPE: TorchTensorContainerType

    vec

    The vector with which the Hessian is multiplied.

    TYPE: TorchTensorContainerType

    reverse_only

    Whether to use only reverse-mode autodiff (True, default) or both forward- and reverse-mode autodiff (False).

    TYPE: bool DEFAULT: True

    RETURNS DESCRIPTION TorchTensorContainerType

    The HVP of the function at the given parameters with the given vector.

    Example:

    >>> def f(z): return torch.sum(z**2)\n>>> u = torch.ones(10, requires_grad=True)\n>>> v = torch.ones(10)\n>>> hvp_vec = hvp(f, u, v)\n>>> assert torch.allclose(hvp_vec, torch.full((10, ), 2.0))\n

    Source code in src/pydvl/influence/torch/functional.py
    def hvp(\nfunc: Callable[[TorchTensorContainerType], torch.Tensor],\nparams: TorchTensorContainerType,\nvec: TorchTensorContainerType,\nreverse_only: bool = True,\n) -> TorchTensorContainerType:\nr\"\"\"\n    Computes the Hessian-vector product (HVP) for a given function at given parameters, i.e.\n    \\[\\nabla_{\\theta} \\nabla_{\\theta} f (\\theta)\\cdot v\\]\n    This function can operate in two modes, either reverse-mode autodiff only or both\n    forward- and reverse-mode autodiff.\n    Args:\n        func: The scalar-valued function for which the HVP is computed.\n        params: The parameters at which the HVP is computed.\n        vec: The vector with which the Hessian is multiplied.\n        reverse_only: Whether to use only reverse-mode autodiff\n            (True, default) or both forward- and reverse-mode autodiff (False).\n    Returns:\n       The HVP of the function at the given parameters with the given vector.\n    Example:\n    ```python\n    >>> def f(z): return torch.sum(z**2)\n    >>> u = torch.ones(10, requires_grad=True)\n    >>> v = torch.ones(10)\n    >>> hvp_vec = hvp(f, u, v)\n    >>> assert torch.allclose(hvp_vec, torch.full((10, ), 2.0))\n    ```\n    \"\"\"\noutput: TorchTensorContainerType\nif reverse_only:\n_, vjp_fn = vjp(grad(func), params)\noutput = vjp_fn(vec)[0]\nelse:\noutput = jvp(grad(func), (params,), (vec,))[1]\nreturn output\n
    "},{"location":"api/pydvl/influence/torch/functional/#pydvl.influence.torch.functional.batch_hvp_gen","title":"batch_hvp_gen(model, loss, data_loader, reverse_only=True)","text":"

    Generates a sequence of batch Hessian-vector product (HVP) computations for the provided model, loss function, and data loader. If \\(f_i\\) is the model's loss on the \\(i\\)-th batch and \\(\\theta\\) the model parameters, this is the sequence of the callable matrix vector products for the matrices

    \\[\\nabla_{\\theta}\\nabla_{\\theta}f_i(\\theta), \\quad i=1,\\dots, \\text{num_batches} \\]

    i.e. iterating over the data_loader, yielding partial function calls for calculating HVPs.

    PARAMETER DESCRIPTION model

    The PyTorch model for which the HVP is calculated.

    TYPE: Module

    loss

    The loss function used to calculate the gradient and HVP.

    TYPE: Callable[[Tensor, Tensor], Tensor]

    data_loader

    PyTorch DataLoader object containing the dataset for which the HVP is calculated.

    TYPE: DataLoader

    reverse_only

    Whether to use only reverse-mode autodiff (True, default) or both forward- and reverse-mode autodiff (False).

    TYPE: bool DEFAULT: True

    YIELDS DESCRIPTION Callable[[Tensor], Tensor]

    Partial functions H_{batch}(vec)=hvp(model, loss, inputs, targets, vec) that when called, will compute the Hessian-vector product H(vec) for the given model and loss in a batch-wise manner, where (inputs, targets) coming from one batch.

    Source code in src/pydvl/influence/torch/functional.py
    def batch_hvp_gen(\nmodel: torch.nn.Module,\nloss: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],\ndata_loader: DataLoader,\nreverse_only: bool = True,\n) -> Generator[Callable[[torch.Tensor], torch.Tensor], None, None]:\nr\"\"\"\n    Generates a sequence of batch Hessian-vector product (HVP) computations for the provided model, loss function,\n    and data loader. If \\(f_i\\) is the model's loss on the \\(i\\)-th batch and \\(\\theta\\) the model parameters,\n    this is the sequence of the callable matrix vector products for the matrices\n    \\[\\nabla_{\\theta}\\nabla_{\\theta}f_i(\\theta), \\quad i=1,\\dots, \\text{num_batches} \\]\n    i.e. iterating over the data_loader, yielding partial function calls for calculating HVPs.\n    Args:\n        model: The PyTorch model for which the HVP is calculated.\n        loss: The loss function used to calculate the gradient and HVP.\n        data_loader: PyTorch DataLoader object containing the dataset for which the HVP is calculated.\n        reverse_only: Whether to use only reverse-mode autodiff\n            (True, default) or both forward- and reverse-mode autodiff (False).\n    Yields:\n        Partial functions `H_{batch}(vec)=hvp(model, loss, inputs, targets, vec)` that when called,\n            will compute the Hessian-vector product H(vec) for the given model and loss in a batch-wise manner, where\n            (inputs, targets) coming from one batch.\n    \"\"\"\nfor inputs, targets in iter(data_loader):\nbatch_loss = batch_loss_function(model, loss, inputs, targets)\nmodel_params = dict(model.named_parameters())\ndef batch_hvp(vec: torch.Tensor):\nreturn flatten_tensors_to_vector(\nhvp(\nbatch_loss,\nmodel_params,\nalign_structure(model_params, vec),\nreverse_only=reverse_only,\n).values()\n)\nyield batch_hvp\n
    "},{"location":"api/pydvl/influence/torch/functional/#pydvl.influence.torch.functional.empirical_loss_function","title":"empirical_loss_function(model, loss, data_loader)","text":"

    Creates a function to compute the empirical loss of a given model on a given dataset. If we denote the model parameters with \\( \\theta \\), the resulting function approximates:

    \\[f(\\theta) = \\frac{1}{N}\\sum_{i=1}^N \\operatorname{loss}(y_i, \\operatorname{model}(\\theta, x_i))\\]

    Args: - model: The model for which the loss should be computed. - loss: The loss function to be used. - data_loader: The data loader for iterating over the dataset.

    RETURNS DESCRIPTION Callable[[Dict[str, Tensor]], Tensor]

    A function that computes the empirical loss of the model on the dataset for given model parameters.

    Source code in src/pydvl/influence/torch/functional.py
    def empirical_loss_function(\nmodel: torch.nn.Module,\nloss: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],\ndata_loader: DataLoader,\n) -> Callable[[Dict[str, torch.Tensor]], torch.Tensor]:\nr\"\"\"\n    Creates a function to compute the empirical loss of a given model on a given dataset.\n    If we denote the model parameters with \\( \\theta \\), the resulting function approximates:\n    \\[f(\\theta) = \\frac{1}{N}\\sum_{i=1}^N \\operatorname{loss}(y_i, \\operatorname{model}(\\theta, x_i))\\]\n    Args:\n    - model: The model for which the loss should be computed.\n    - loss: The loss function to be used.\n    - data_loader: The data loader for iterating over the dataset.\n    Returns:\n        A function that computes the empirical loss of the model on the dataset for given model parameters.\n    \"\"\"\ndef empirical_loss(params: Dict[str, torch.Tensor]):\ntotal_loss = to_model_device(torch.zeros((), requires_grad=True), model)\ntotal_samples = to_model_device(torch.zeros(()), model)\nfor x, y in iter(data_loader):\noutput = functional_call(\nmodel, params, (to_model_device(x, model),), strict=True\n)\nloss_value = loss(output, to_model_device(y, model))\ntotal_loss = total_loss + loss_value * x.size(0)\ntotal_samples += x.size(0)\nreturn total_loss / total_samples\nreturn empirical_loss\n
    "},{"location":"api/pydvl/influence/torch/functional/#pydvl.influence.torch.functional.batch_loss_function","title":"batch_loss_function(model, loss, x, y)","text":"

    Creates a function to compute the loss of a given model on a given batch of data, i.e. for the \\(i\\)-th batch \\(B_i\\)

    \\[\\frac{1}{|B_i|}\\sum_{x,y \\in B_i} \\operatorname{loss}(y, \\operatorname{model}(\\theta, x))\\] PARAMETER DESCRIPTION model

    The model for which the loss should be computed.

    TYPE: Module

    loss

    The loss function to be used.

    TYPE: Callable[[Tensor, Tensor], Tensor]

    x

    The input data for the batch.

    TYPE: Tensor

    y

    The true labels for the batch.

    TYPE: Tensor

    RETURNS DESCRIPTION Callable[[Dict[str, Tensor]], Tensor]

    A function that computes the loss of the model on the batch for given model parameters.

    Source code in src/pydvl/influence/torch/functional.py
    def batch_loss_function(\nmodel: torch.nn.Module,\nloss: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],\nx: torch.Tensor,\ny: torch.Tensor,\n) -> Callable[[Dict[str, torch.Tensor]], torch.Tensor]:\nr\"\"\"\n    Creates a function to compute the loss of a given model on a given batch of data, i.e. for the $i$-th batch $B_i$\n    \\[\\frac{1}{|B_i|}\\sum_{x,y \\in B_i} \\operatorname{loss}(y, \\operatorname{model}(\\theta, x))\\]\n    Args:\n        model: The model for which the loss should be computed.\n        loss: The loss function to be used.\n        x: The input data for the batch.\n        y: The true labels for the batch.\n    Returns:\n        A function that computes the loss of the model on the batch for given model parameters.\n    \"\"\"\ndef batch_loss(params: Dict[str, torch.Tensor]):\noutputs = functional_call(\nmodel, params, (to_model_device(x, model),), strict=True\n)\nreturn loss(outputs, y)\nreturn batch_loss\n
    "},{"location":"api/pydvl/influence/torch/functional/#pydvl.influence.torch.functional.get_hvp_function","title":"get_hvp_function(model, loss, data_loader, use_hessian_avg=True, reverse_only=True, track_gradients=False)","text":"

    Returns a function that calculates the approximate Hessian-vector product for a given vector. If you want to compute the exact hessian, i.e., pulling all data into memory and compute a full gradient computation, use the function hvp.

    PARAMETER DESCRIPTION model

    A PyTorch module representing the model whose loss function's Hessian is to be computed.

    TYPE: Module

    loss

    A callable that takes the model's output and target as input and returns the scalar loss.

    TYPE: Callable[[Tensor, Tensor], Tensor]

    data_loader

    A DataLoader instance that provides batches of data for calculating the Hessian-vector product. Each batch from the DataLoader is assumed to return a tuple where the first element is the model's input and the second element is the target output.

    TYPE: DataLoader

    use_hessian_avg

    If True, the returned function uses batch-wise Hessian computation via batch_loss_function and averages the results. If False, the function uses backpropagation on the full empirical_loss_function, which is more accurate than averaging the batch hessians, but probably has a way higher memory usage.

    TYPE: bool DEFAULT: True

    reverse_only

    Whether to use only reverse-mode autodiff (True, default) or both forward- and reverse-mode autodiff (False).

    TYPE: bool DEFAULT: True

    track_gradients

    Whether to track gradients for the resulting tensor of the hessian vector products are (False, default).

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION Callable[[Tensor], Tensor]

    A function that takes a single argument, a vector, and returns the product of the Hessian of the loss function with respect to the model's parameters and the input vector.

    Source code in src/pydvl/influence/torch/functional.py
    def get_hvp_function(\nmodel: torch.nn.Module,\nloss: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],\ndata_loader: DataLoader,\nuse_hessian_avg: bool = True,\nreverse_only: bool = True,\ntrack_gradients: bool = False,\n) -> Callable[[torch.Tensor], torch.Tensor]:\n\"\"\"\n    Returns a function that calculates the approximate Hessian-vector product for a given vector. If you want to\n    compute the exact hessian, i.e., pulling all data into memory and compute a full gradient computation, use\n    the function `hvp`.\n    Args:\n        model: A PyTorch module representing the model whose loss function's Hessian is to be computed.\n        loss: A callable that takes the model's output and target as input and returns the scalar loss.\n        data_loader: A DataLoader instance that provides batches of data for calculating the Hessian-vector product.\n            Each batch from the DataLoader is assumed to return a tuple where the first element\n            is the model's input and the second element is the target output.\n        use_hessian_avg: If True, the returned function uses batch-wise Hessian computation via\n            [batch_loss_function][pydvl.influence.torch.functional.batch_loss_function] and averages the results.\n            If False, the function uses backpropagation on the full\n            [empirical_loss_function][pydvl.influence.torch.functional.empirical_loss_function],\n            which is more accurate than averaging the batch hessians, but probably has a way higher memory usage.\n        reverse_only: Whether to use only reverse-mode autodiff (True, default) or\n            both forward- and reverse-mode autodiff (False).\n        track_gradients: Whether to track gradients for the resulting tensor of the hessian vector\n            products are (False, default).\n    Returns:\n        A function that takes a single argument, a vector, and returns the product of the Hessian of the `loss`\n            function with respect to the `model`'s parameters and the input vector.\n    \"\"\"\nparams = {\nk: p if track_gradients else p.detach() for k, p in model.named_parameters()\n}\ndef hvp_function(vec: torch.Tensor) -> torch.Tensor:\nv = align_structure(params, vec)\nempirical_loss = empirical_loss_function(model, loss, data_loader)\nreturn flatten_tensors_to_vector(\nhvp(empirical_loss, params, v, reverse_only=reverse_only).values()\n)\ndef avg_hvp_function(vec: torch.Tensor) -> torch.Tensor:\nv = align_structure(params, vec)\nbatch_hessians_vector_products: Iterable[torch.Tensor] = map(\nlambda x: x(v), batch_hvp_gen(model, loss, data_loader, reverse_only)\n)\nnum_batches = len(data_loader)\navg_hessian = to_model_device(torch.zeros_like(vec), model)\nfor batch_hvp in batch_hessians_vector_products:\navg_hessian += batch_hvp\nreturn avg_hessian / float(num_batches)\nreturn avg_hvp_function if use_hessian_avg else hvp_function\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/","title":"Torch differentiable","text":"

    Contains methods for differentiating a pyTorch model. Most of the methods focus on ways to calculate matrix vector products. Moreover, it contains several methods to invert the Hessian vector product. These are used to calculate the influence of a training point on the model.

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable--references","title":"References","text":"
    1. Koh, P.W., Liang, P., 2017. Understanding Black-box Predictions via Influence Functions. In: Proceedings of the 34th International Conference on Machine Learning, pp. 1885\u20131894. PMLR.\u00a0\u21a9

    2. Agarwal, N., Bullins, B., Hazan, E., 2017. Second-Order Stochastic Optimization for Machine Learning in Linear Time. In: Journal of Machine Learning Research, Vol. 18, pp. 1\u201340. JMLR.\u00a0\u21a9

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable","title":"TorchTwiceDifferentiable(model, loss)","text":"

    Bases: TwiceDifferentiable[Tensor]

    Wraps a torch.nn.Module and a loss function and provides methods to compute gradients and second derivative of the loss wrt. the model parameters

    PARAMETER DESCRIPTION model

    A (differentiable) function.

    TYPE: Module

    loss

    A differentiable scalar loss \\( L(\\hat{y}, y) \\), mapping a prediction and a target to a real value.

    TYPE: Callable[[Tensor, Tensor], Tensor]

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def __init__(\nself,\nmodel: nn.Module,\nloss: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],\n):\nif model.training:\nlogger.warning(\n\"Passed model not in evaluation mode. This can create several issues in influence \"\n\"computation, e.g. due to batch normalization. Please call model.eval() before \"\n\"computing influences.\"\n)\nself.loss = loss\nself.model = model\nfirst_param = next(model.parameters())\nself.device = first_param.device\nself.dtype = first_param.dtype\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable.parameters","title":"parameters: List[torch.Tensor] property","text":"RETURNS DESCRIPTION List[Tensor]

    All model parameters that require differentiating.

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable.num_params","title":"num_params: int property","text":"

    Get the number of parameters of model f.

    RETURNS DESCRIPTION int

    Number of parameters.

    TYPE: int

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable.grad","title":"grad(x, y, create_graph=False)","text":"

    Calculates gradient of model parameters with respect to the model parameters.

    PARAMETER DESCRIPTION x

    A matrix [NxD] representing the features \\( x_i \\).

    TYPE: Tensor

    y

    A matrix [NxK] representing the target values \\( y_i \\).

    TYPE: Tensor

    create_graph

    If True, the resulting gradient tensor can be used for further differentiation.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION Tensor

    An array [P] with the gradients of the model.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def grad(\nself, x: torch.Tensor, y: torch.Tensor, create_graph: bool = False\n) -> torch.Tensor:\nr\"\"\"\n    Calculates gradient of model parameters with respect to the model parameters.\n    Args:\n        x: A matrix [NxD] representing the features \\( x_i \\).\n        y: A matrix [NxK] representing the target values \\( y_i \\).\n        create_graph (bool): If True, the resulting gradient tensor can be used for further differentiation.\n    Returns:\n        An array [P] with the gradients of the model.\n    \"\"\"\nx = x.to(self.device)\ny = y.to(self.device)\nif create_graph and not x.requires_grad:\nx = x.requires_grad_(True)\nloss_value = self.loss(torch.squeeze(self.model(x)), torch.squeeze(y))\ngrad_f = torch.autograd.grad(\nloss_value, self.parameters, create_graph=create_graph\n)\nreturn flatten_tensors_to_vector(grad_f)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable.hessian","title":"hessian(x, y)","text":"

    Calculates the explicit hessian of model parameters given data \\(x\\) and \\(y\\).

    PARAMETER DESCRIPTION x

    A matrix [NxD] representing the features \\(x_i\\).

    TYPE: Tensor

    y

    A matrix [NxK] representing the target values \\(y_i\\).

    TYPE: Tensor

    RETURNS DESCRIPTION Tensor

    A tensor representing the hessian of the loss with respect to the model parameters.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def hessian(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:\nr\"\"\"\n    Calculates the explicit hessian of model parameters given data \\(x\\) and \\(y\\).\n    Args:\n        x: A matrix [NxD] representing the features \\(x_i\\).\n        y: A matrix [NxK] representing the target values \\(y_i\\).\n    Returns:\n        A tensor representing the hessian of the loss with respect to the model parameters.\n    \"\"\"\ndef model_func(param):\noutputs = torch.func.functional_call(\nself.model,\nalign_structure(\n{k: p for k, p in self.model.named_parameters() if p.requires_grad},\nparam,\n),\n(x.to(self.device),),\nstrict=True,\n)\nreturn self.loss(outputs, y.to(self.device))\nparams = flatten_tensors_to_vector(\np.detach() for p in self.model.parameters() if p.requires_grad\n)\nreturn torch.func.hessian(model_func)(params)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable.mvp","title":"mvp(grad_xy, v, backprop_on, *, progress=False) staticmethod","text":"

    Calculates the second-order derivative of the model along directions v. This second-order derivative can be selected through the backprop_on argument.

    PARAMETER DESCRIPTION grad_xy

    An array [P] holding the gradients of the model parameters with respect to input \\(x\\) and labels \\(y\\), where P is the number of parameters of the model. It is typically obtained through self.grad.

    TYPE: Tensor

    v

    An array ([DxP] or even one-dimensional [D]) which multiplies the matrix, where D is the number of directions.

    TYPE: Tensor

    progress

    If True, progress will be printed.

    TYPE: bool DEFAULT: False

    backprop_on

    Tensor used in the second backpropagation (the first one is defined via grad_xy).

    TYPE: Tensor

    RETURNS DESCRIPTION Tensor

    A matrix representing the implicit matrix-vector product of the model along the given directions. The output shape is [DxM], with M being the number of elements of backprop_on.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @staticmethod\ndef mvp(\ngrad_xy: torch.Tensor,\nv: torch.Tensor,\nbackprop_on: torch.Tensor,\n*,\nprogress: bool = False,\n) -> torch.Tensor:\nr\"\"\"\n    Calculates the second-order derivative of the model along directions v.\n    This second-order derivative can be selected through the `backprop_on` argument.\n    Args:\n        grad_xy: An array [P] holding the gradients of the model parameters with respect to input\n            \\(x\\) and labels \\(y\\), where P is the number of parameters of the model.\n            It is typically obtained through `self.grad`.\n        v: An array ([DxP] or even one-dimensional [D]) which multiplies the matrix,\n            where D is the number of directions.\n        progress: If True, progress will be printed.\n        backprop_on: Tensor used in the second backpropagation\n            (the first one is defined via grad_xy).\n    Returns:\n        A matrix representing the implicit matrix-vector product of the model along the given directions.\n            The output shape is [DxM], with M being the number of elements of `backprop_on`.\n    \"\"\"\ndevice = grad_xy.device\nv = as_tensor(v, warn=False).to(device)\nif v.ndim == 1:\nv = v.unsqueeze(0)\nz = (grad_xy * Variable(v)).sum(dim=1)\nmvp = []\nfor i in maybe_progress(range(len(z)), progress, desc=\"MVP\"):\nmvp.append(\nflatten_tensors_to_vector(\nautograd.grad(z[i], backprop_on, retain_graph=True)\n)\n)\nreturn torch.stack([grad.contiguous().view(-1) for grad in mvp]).detach()\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation","title":"LowRankProductRepresentation dataclass","text":"

    Representation of a low rank product of the form \\(H = V D V^T\\), where D is a diagonal matrix and V is orthogonal.

    PARAMETER DESCRIPTION eigen_vals

    Diagonal of D.

    TYPE: Tensor

    projections

    The matrix V.

    TYPE: Tensor

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation.to","title":"to(device)","text":"

    Move the representing tensors to a device

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def to(self, device: torch.device):\n\"\"\"\n    Move the representing tensors to a device\n    \"\"\"\nreturn LowRankProductRepresentation(\nself.eigen_vals.to(device), self.projections.to(device)\n)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTensorUtilities","title":"TorchTensorUtilities","text":"

    Bases: TensorUtilities[Tensor, TorchTwiceDifferentiable]

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTensorUtilities.einsum","title":"einsum(equation, *operands) staticmethod","text":"

    Sums the product of the elements of the input :attr:operands along dimensions specified using a notation based on the Einstein summation convention.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @staticmethod\ndef einsum(equation: str, *operands) -> torch.Tensor:\n\"\"\"Sums the product of the elements of the input :attr:`operands` along dimensions specified using a notation\n    based on the Einstein summation convention.\n    \"\"\"\nreturn torch.einsum(equation, *operands)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTensorUtilities.cat","title":"cat(a, **kwargs) staticmethod","text":"

    Concatenates a sequence of tensors into a single torch tensor

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @staticmethod\ndef cat(a: Sequence[torch.Tensor], **kwargs) -> torch.Tensor:\n\"\"\"Concatenates a sequence of tensors into a single torch tensor\"\"\"\nreturn torch.cat(a, **kwargs)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTensorUtilities.stack","title":"stack(a, **kwargs) staticmethod","text":"

    Stacks a sequence of tensors into a single torch tensor

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @staticmethod\ndef stack(a: Sequence[torch.Tensor], **kwargs) -> torch.Tensor:\n\"\"\"Stacks a sequence of tensors into a single torch tensor\"\"\"\nreturn torch.stack(a, **kwargs)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTensorUtilities.unsqueeze","title":"unsqueeze(x, dim) staticmethod","text":"

    Add a singleton dimension at a specified position in a tensor.

    PARAMETER DESCRIPTION x

    A PyTorch tensor.

    TYPE: Tensor

    dim

    The position at which to add the singleton dimension. Zero-based indexing.

    TYPE: int

    RETURNS DESCRIPTION Tensor

    A new tensor with an additional singleton dimension.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @staticmethod\ndef unsqueeze(x: torch.Tensor, dim: int) -> torch.Tensor:\n\"\"\"\n    Add a singleton dimension at a specified position in a tensor.\n    Args:\n        x: A PyTorch tensor.\n        dim: The position at which to add the singleton dimension. Zero-based indexing.\n    Returns:\n        A new tensor with an additional singleton dimension.\n    \"\"\"\nreturn x.unsqueeze(dim)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.lanzcos_low_rank_hessian_approx","title":"lanzcos_low_rank_hessian_approx(hessian_vp, matrix_shape, hessian_perturbation=0.0, rank_estimate=10, krylov_dimension=None, tol=1e-06, max_iter=None, device=None, eigen_computation_on_gpu=False, torch_dtype=None)","text":"

    Calculates a low-rank approximation of the Hessian matrix of a scalar-valued function using the implicitly restarted Lanczos algorithm, i.e.:

    \\[ H_{\\text{approx}} = V D V^T\\]

    where \\(D\\) is a diagonal matrix with the top (in absolute value) rank_estimate eigenvalues of the Hessian and \\(V\\) contains the corresponding eigenvectors.

    PARAMETER DESCRIPTION hessian_vp

    A function that takes a vector and returns the product of the Hessian of the loss function.

    TYPE: Callable[[Tensor], Tensor]

    matrix_shape

    The shape of the matrix, represented by the hessian vector product.

    TYPE: Tuple[int, int]

    hessian_perturbation

    Regularization parameter added to the Hessian-vector product for numerical stability.

    TYPE: float DEFAULT: 0.0

    rank_estimate

    The number of eigenvalues and corresponding eigenvectors to compute. Represents the desired rank of the Hessian approximation.

    TYPE: int DEFAULT: 10

    krylov_dimension

    The number of Krylov vectors to use for the Lanczos method. If not provided, it defaults to \\( \\min(\\text{model.num_parameters}, \\max(2 \\times \\text{rank_estimate} + 1, 20)) \\).

    TYPE: Optional[int] DEFAULT: None

    tol

    The stopping criteria for the Lanczos algorithm, which stops when the difference in the approximated eigenvalue is less than tol. Defaults to 1e-6.

    TYPE: float DEFAULT: 1e-06

    max_iter

    The maximum number of iterations for the Lanczos method. If not provided, it defaults to \\( 10 \\cdot \\text{model.num_parameters}\\).

    TYPE: Optional[int] DEFAULT: None

    device

    The device to use for executing the hessian vector product.

    TYPE: Optional[device] DEFAULT: None

    eigen_computation_on_gpu

    If True, tries to execute the eigen pair approximation on the provided device via cupy implementation. Ensure that either your model is small enough, or you use a small rank_estimate to fit your device's memory. If False, the eigen pair approximation is executed on the CPU with scipy's wrapper to ARPACK.

    TYPE: bool DEFAULT: False

    torch_dtype

    If not provided, the current torch default dtype is used for conversion to torch.

    TYPE: dtype DEFAULT: None

    RETURNS DESCRIPTION LowRankProductRepresentation

    A LowRankProductRepresentation instance that contains the top (up until rank_estimate) eigenvalues and corresponding eigenvectors of the Hessian.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def lanzcos_low_rank_hessian_approx(\nhessian_vp: Callable[[torch.Tensor], torch.Tensor],\nmatrix_shape: Tuple[int, int],\nhessian_perturbation: float = 0.0,\nrank_estimate: int = 10,\nkrylov_dimension: Optional[int] = None,\ntol: float = 1e-6,\nmax_iter: Optional[int] = None,\ndevice: Optional[torch.device] = None,\neigen_computation_on_gpu: bool = False,\ntorch_dtype: torch.dtype = None,\n) -> LowRankProductRepresentation:\nr\"\"\"\n    Calculates a low-rank approximation of the Hessian matrix of a scalar-valued\n    function using the implicitly restarted Lanczos algorithm, i.e.:\n    \\[ H_{\\text{approx}} = V D V^T\\]\n    where \\(D\\) is a diagonal matrix with the top (in absolute value) `rank_estimate` eigenvalues of the Hessian\n    and \\(V\\) contains the corresponding eigenvectors.\n    Args:\n        hessian_vp: A function that takes a vector and returns the product of\n            the Hessian of the loss function.\n        matrix_shape: The shape of the matrix, represented by the hessian vector\n            product.\n        hessian_perturbation: Regularization parameter added to the\n            Hessian-vector product for numerical stability.\n        rank_estimate: The number of eigenvalues and corresponding eigenvectors\n            to compute. Represents the desired rank of the Hessian approximation.\n        krylov_dimension: The number of Krylov vectors to use for the Lanczos\n            method. If not provided, it defaults to\n            \\( \\min(\\text{model.num_parameters}, \\max(2 \\times \\text{rank_estimate} + 1, 20)) \\).\n        tol: The stopping criteria for the Lanczos algorithm, which stops when\n            the difference in the approximated eigenvalue is less than `tol`.\n            Defaults to 1e-6.\n        max_iter: The maximum number of iterations for the Lanczos method. If\n            not provided, it defaults to \\( 10 \\cdot \\text{model.num_parameters}\\).\n        device: The device to use for executing the hessian vector product.\n        eigen_computation_on_gpu: If True, tries to execute the eigen pair\n            approximation on the provided device via [cupy](https://cupy.dev/)\n            implementation. Ensure that either your model is small enough, or you\n            use a small rank_estimate to fit your device's memory. If False, the\n            eigen pair approximation is executed on the CPU with scipy's wrapper to\n            ARPACK.\n        torch_dtype: If not provided, the current torch default dtype is used for\n            conversion to torch.\n    Returns:\n        A [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation]\n            instance that contains the top (up until rank_estimate) eigenvalues\n            and corresponding eigenvectors of the Hessian.\n    \"\"\"\ntorch_dtype = torch.get_default_dtype() if torch_dtype is None else torch_dtype\nif eigen_computation_on_gpu:\ntry:\nimport cupy as cp\nfrom cupyx.scipy.sparse.linalg import LinearOperator, eigsh\nfrom torch.utils.dlpack import from_dlpack, to_dlpack\nexcept ImportError as e:\nraise ImportError(\nf\"Try to install missing dependencies or set eigen_computation_on_gpu to False: {e}\"\n)\nif device is None:\nraise ValueError(\n\"Without setting an explicit device, cupy is not supported\"\n)\ndef to_torch_conversion_function(x):\nreturn from_dlpack(x.toDlpack()).to(torch_dtype)\ndef mv(x):\nx = to_torch_conversion_function(x)\ny = hessian_vp(x) + hessian_perturbation * x\nreturn cp.from_dlpack(to_dlpack(y))\nelse:\nfrom scipy.sparse.linalg import LinearOperator, eigsh\ndef mv(x):\nx_torch = torch.as_tensor(x, device=device, dtype=torch_dtype)\ny: NDArray = (\n(hessian_vp(x_torch) + hessian_perturbation * x_torch)\n.detach()\n.cpu()\n.numpy()\n)\nreturn y\nto_torch_conversion_function = partial(torch.as_tensor, dtype=torch_dtype)\ntry:\neigen_vals, eigen_vecs = eigsh(\nLinearOperator(matrix_shape, matvec=mv),\nk=rank_estimate,\nmaxiter=max_iter,\ntol=tol,\nncv=krylov_dimension,\nreturn_eigenvectors=True,\n)\nexcept ArpackNoConvergence as e:\nlogger.warning(\nf\"ARPACK did not converge for parameters {max_iter=}, {tol=}, {krylov_dimension=}, \"\nf\"{rank_estimate=}. \\n Returning the best approximation found so far. Use those with care or \"\nf\"modify parameters.\\n Original error: {e}\"\n)\neigen_vals, eigen_vecs = e.eigenvalues, e.eigenvectors\neigen_vals = to_torch_conversion_function(eigen_vals)\neigen_vecs = to_torch_conversion_function(eigen_vecs)\nreturn LowRankProductRepresentation(eigen_vals, eigen_vecs)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.model_hessian_low_rank","title":"model_hessian_low_rank(model, training_data, hessian_perturbation=0.0, rank_estimate=10, krylov_dimension=None, tol=1e-06, max_iter=None, eigen_computation_on_gpu=False)","text":"

    Calculates a low-rank approximation of the Hessian matrix of the model's loss function using the implicitly restarted Lanczos algorithm, i.e.

    \\[ H_{\\text{approx}} = V D V^T\\]

    where \\(D\\) is a diagonal matrix with the top (in absolute value) rank_estimate eigenvalues of the Hessian and \\(V\\) contains the corresponding eigenvectors.

    PARAMETER DESCRIPTION model

    A PyTorch model instance that is twice differentiable, wrapped into TorchTwiceDifferential. The Hessian will be calculated with respect to this model's parameters.

    TYPE: TorchTwiceDifferentiable

    training_data

    A DataLoader instance that provides the model's training data. Used in calculating the Hessian-vector products.

    TYPE: DataLoader

    hessian_perturbation

    Optional regularization parameter added to the Hessian-vector product for numerical stability.

    TYPE: float DEFAULT: 0.0

    rank_estimate

    The number of eigenvalues and corresponding eigenvectors to compute. Represents the desired rank of the Hessian approximation.

    TYPE: int DEFAULT: 10

    krylov_dimension

    The number of Krylov vectors to use for the Lanczos method. If not provided, it defaults to min(model.num_parameters, max(2*rank_estimate + 1, 20)).

    TYPE: Optional[int] DEFAULT: None

    tol

    The stopping criteria for the Lanczos algorithm, which stops when the difference in the approximated eigenvalue is less than tol. Defaults to 1e-6.

    TYPE: float DEFAULT: 1e-06

    max_iter

    The maximum number of iterations for the Lanczos method. If not provided, it defaults to 10*model.num_parameters.

    TYPE: Optional[int] DEFAULT: None

    eigen_computation_on_gpu

    If True, tries to execute the eigen pair approximation on the provided device via cupy implementation. Make sure, that either your model is small enough or you use a small rank_estimate to fit your device's memory. If False, the eigen pair approximation is executed on the CPU by scipy wrapper to ARPACK.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION LowRankProductRepresentation

    A LowRankProductRepresentation instance that contains the top (up until rank_estimate) eigenvalues and corresponding eigenvectors of the Hessian.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def model_hessian_low_rank(\nmodel: TorchTwiceDifferentiable,\ntraining_data: DataLoader,\nhessian_perturbation: float = 0.0,\nrank_estimate: int = 10,\nkrylov_dimension: Optional[int] = None,\ntol: float = 1e-6,\nmax_iter: Optional[int] = None,\neigen_computation_on_gpu: bool = False,\n) -> LowRankProductRepresentation:\nr\"\"\"\n    Calculates a low-rank approximation of the Hessian matrix of the model's loss function using the implicitly\n    restarted Lanczos algorithm, i.e.\n    \\[ H_{\\text{approx}} = V D V^T\\]\n    where \\(D\\) is a diagonal matrix with the top (in absolute value) `rank_estimate` eigenvalues of the Hessian\n    and \\(V\\) contains the corresponding eigenvectors.\n    Args:\n        model: A PyTorch model instance that is twice differentiable, wrapped into `TorchTwiceDifferential`.\n            The Hessian will be calculated with respect to this model's parameters.\n        training_data: A DataLoader instance that provides the model's training data.\n            Used in calculating the Hessian-vector products.\n        hessian_perturbation: Optional regularization parameter added to the Hessian-vector product\n            for numerical stability.\n        rank_estimate: The number of eigenvalues and corresponding eigenvectors to compute.\n            Represents the desired rank of the Hessian approximation.\n        krylov_dimension: The number of Krylov vectors to use for the Lanczos method.\n            If not provided, it defaults to min(model.num_parameters, max(2*rank_estimate + 1, 20)).\n        tol: The stopping criteria for the Lanczos algorithm, which stops when the difference\n            in the approximated eigenvalue is less than `tol`. Defaults to 1e-6.\n        max_iter: The maximum number of iterations for the Lanczos method. If not provided, it defaults to\n            10*model.num_parameters.\n        eigen_computation_on_gpu: If True, tries to execute the eigen pair approximation on the provided\n            device via cupy implementation.\n            Make sure, that either your model is small enough or you use a\n            small rank_estimate to fit your device's memory.\n            If False, the eigen pair approximation is executed on the CPU by scipy wrapper to\n            ARPACK.\n    Returns:\n        A [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation]\n            instance that contains the top (up until rank_estimate) eigenvalues\n            and corresponding eigenvectors of the Hessian.\n    \"\"\"\nraw_hvp = get_hvp_function(\nmodel.model, model.loss, training_data, use_hessian_avg=True\n)\nreturn lanzcos_low_rank_hessian_approx(\nhessian_vp=raw_hvp,\nmatrix_shape=(model.num_params, model.num_params),\nhessian_perturbation=hessian_perturbation,\nrank_estimate=rank_estimate,\nkrylov_dimension=krylov_dimension,\ntol=tol,\nmax_iter=max_iter,\ndevice=model.device if hasattr(model, \"device\") else None,\neigen_computation_on_gpu=eigen_computation_on_gpu,\n)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.solve_linear","title":"solve_linear(model, training_data, b, hessian_perturbation=0.0)","text":"

    Given a model and training data, it finds x such that \\(Hx = b\\), with \\(H\\) being the model hessian.

    PARAMETER DESCRIPTION model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TorchTwiceDifferentiable

    training_data

    A DataLoader containing the training data.

    TYPE: DataLoader

    b

    A vector or matrix, the right hand side of the equation \\(Hx = b\\).

    TYPE: Tensor

    hessian_perturbation

    Regularization of the hessian.

    TYPE: float DEFAULT: 0.0

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, having an array that solves the inverse problem, i.e. it returns \\(x\\) such that \\(Hx = b\\), and a dictionary containing information about the solution.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Direct)\ndef solve_linear(\nmodel: TorchTwiceDifferentiable,\ntraining_data: DataLoader,\nb: torch.Tensor,\nhessian_perturbation: float = 0.0,\n) -> InverseHvpResult:\nr\"\"\"\n    Given a model and training data, it finds x such that \\(Hx = b\\), with \\(H\\) being the model hessian.\n    Args:\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data: A DataLoader containing the training data.\n        b: A vector or matrix, the right hand side of the equation \\(Hx = b\\).\n        hessian_perturbation: Regularization of the hessian.\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult],\n            having an array that solves the inverse problem, i.e. it returns \\(x\\) such that \\(Hx = b\\),\n            and a dictionary containing information about the solution.\n    \"\"\"\nall_x, all_y = [], []\nfor x, y in training_data:\nall_x.append(x)\nall_y.append(y)\nhessian = model.hessian(torch.cat(all_x), torch.cat(all_y))\nmatrix = hessian + hessian_perturbation * torch.eye(\nmodel.num_params, device=model.device\n)\ninfo = {\"hessian\": hessian}\nreturn InverseHvpResult(x=torch.linalg.solve(matrix, b.T).T, info=info)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.solve_batch_cg","title":"solve_batch_cg(model, training_data, b, hessian_perturbation=0.0, *, x0=None, rtol=1e-07, atol=1e-07, maxiter=None, progress=False)","text":"

    Given a model and training data, it uses conjugate gradient to calculate the inverse of the Hessian Vector Product. More precisely, it finds x such that \\(Hx = b\\), with \\(H\\) being the model hessian. For more info, see Wikipedia.

    PARAMETER DESCRIPTION model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TorchTwiceDifferentiable

    training_data

    A DataLoader containing the training data.

    TYPE: DataLoader

    b

    A vector or matrix, the right hand side of the equation \\(Hx = b\\).

    TYPE: Tensor

    hessian_perturbation

    Regularization of the hessian.

    TYPE: float DEFAULT: 0.0

    x0

    Initial guess for hvp. If None, defaults to b.

    TYPE: Optional[Tensor] DEFAULT: None

    rtol

    Maximum relative tolerance of result.

    TYPE: float DEFAULT: 1e-07

    atol

    Absolute tolerance of result.

    TYPE: float DEFAULT: 1e-07

    maxiter

    Maximum number of iterations. If None, defaults to 10*len(b).

    TYPE: Optional[int] DEFAULT: None

    progress

    If True, display progress bars.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, having a matrix of shape [NxP] with each line being a solution of \\(Ax=b\\), and a dictionary containing information about the convergence of CG, one entry for each line of the matrix.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Cg)\ndef solve_batch_cg(\nmodel: TorchTwiceDifferentiable,\ntraining_data: DataLoader,\nb: torch.Tensor,\nhessian_perturbation: float = 0.0,\n*,\nx0: Optional[torch.Tensor] = None,\nrtol: float = 1e-7,\natol: float = 1e-7,\nmaxiter: Optional[int] = None,\nprogress: bool = False,\n) -> InverseHvpResult:\nr\"\"\"\n    Given a model and training data, it uses conjugate gradient to calculate the\n    inverse of the Hessian Vector Product. More precisely, it finds x such that \\(Hx =\n    b\\), with \\(H\\) being the model hessian. For more info, see\n    [Wikipedia](https://en.wikipedia.org/wiki/Conjugate_gradient_method).\n    Args:\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data: A DataLoader containing the training data.\n        b: A vector or matrix, the right hand side of the equation \\(Hx = b\\).\n        hessian_perturbation: Regularization of the hessian.\n        x0: Initial guess for hvp. If None, defaults to b.\n        rtol: Maximum relative tolerance of result.\n        atol: Absolute tolerance of result.\n        maxiter: Maximum number of iterations. If None, defaults to 10*len(b).\n        progress: If True, display progress bars.\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult],\n            having a matrix of shape [NxP] with each line being a solution of \\(Ax=b\\),\n            and a dictionary containing information about the convergence of CG,\n            one entry for each line of the matrix.\n    \"\"\"\ntotal_grad_xy = 0\ntotal_points = 0\nfor x, y in maybe_progress(training_data, progress, desc=\"Batch Train Gradients\"):\ngrad_xy = model.grad(x, y, create_graph=True)\ntotal_grad_xy += grad_xy * len(x)\ntotal_points += len(x)\nbackprop_on = model.parameters\nreg_hvp = lambda v: model.mvp(\ntotal_grad_xy / total_points, v, backprop_on\n) + hessian_perturbation * v.type(torch.float64)\nbatch_cg = torch.zeros_like(b)\ninfo = {}\nfor idx, bi in enumerate(maybe_progress(b, progress, desc=\"Conjugate gradient\")):\nbatch_result, batch_info = solve_cg(\nreg_hvp, bi, x0=x0, rtol=rtol, atol=atol, maxiter=maxiter\n)\nbatch_cg[idx] = batch_result\ninfo[f\"batch_{idx}\"] = batch_info\nreturn InverseHvpResult(x=batch_cg, info=info)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.solve_cg","title":"solve_cg(hvp, b, *, x0=None, rtol=1e-07, atol=1e-07, maxiter=None)","text":"

    Conjugate gradient solver for the Hessian vector product.

    PARAMETER DESCRIPTION hvp

    A callable Hvp, operating with tensors of size N.

    TYPE: Callable[[Tensor], Tensor]

    b

    A vector or matrix, the right hand side of the equation \\(Hx = b\\).

    TYPE: Tensor

    x0

    Initial guess for hvp.

    TYPE: Optional[Tensor] DEFAULT: None

    rtol

    Maximum relative tolerance of result.

    TYPE: float DEFAULT: 1e-07

    atol

    Absolute tolerance of result.

    TYPE: float DEFAULT: 1e-07

    maxiter

    Maximum number of iterations. If None, defaults to 10*len(b).

    TYPE: Optional[int] DEFAULT: None

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, with a vector x, solution of \\(Ax=b\\), and a dictionary containing information about the convergence of CG.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def solve_cg(\nhvp: Callable[[torch.Tensor], torch.Tensor],\nb: torch.Tensor,\n*,\nx0: Optional[torch.Tensor] = None,\nrtol: float = 1e-7,\natol: float = 1e-7,\nmaxiter: Optional[int] = None,\n) -> InverseHvpResult:\nr\"\"\"\n    Conjugate gradient solver for the Hessian vector product.\n    Args:\n        hvp: A callable Hvp, operating with tensors of size N.\n        b: A vector or matrix, the right hand side of the equation \\(Hx = b\\).\n        x0: Initial guess for hvp.\n        rtol: Maximum relative tolerance of result.\n        atol: Absolute tolerance of result.\n        maxiter: Maximum number of iterations. If None, defaults to 10*len(b).\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult],\n            with a vector x, solution of \\(Ax=b\\), and a dictionary containing\n            information about the convergence of CG.\n    \"\"\"\nif x0 is None:\nx0 = torch.clone(b)\nif maxiter is None:\nmaxiter = len(b) * 10\ny_norm = torch.sum(torch.matmul(b, b)).item()\nstopping_val = max([rtol**2 * y_norm, atol**2])\nx = x0\np = r = (b - hvp(x)).squeeze().type(torch.float64)\ngamma = torch.sum(torch.matmul(r, r)).item()\noptimal = False\nfor k in range(maxiter):\nif gamma < stopping_val:\noptimal = True\nbreak\nAp = hvp(p).squeeze()\nalpha = gamma / torch.sum(torch.matmul(p, Ap)).item()\nx += alpha * p\nr -= alpha * Ap\ngamma_ = torch.sum(torch.matmul(r, r)).item()\nbeta = gamma_ / gamma\ngamma = gamma_\np = r + beta * p\ninfo = {\"niter\": k, \"optimal\": optimal, \"gamma\": gamma}\nreturn InverseHvpResult(x=x, info=info)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.solve_lissa","title":"solve_lissa(model, training_data, b, hessian_perturbation=0.0, *, maxiter=1000, dampen=0.0, scale=10.0, h0=None, rtol=0.0001, progress=False)","text":"

    Uses LISSA, Linear time Stochastic Second-Order Algorithm, to iteratively approximate the inverse Hessian. More precisely, it finds x s.t. \\(Hx = b\\), with \\(H\\) being the model's second derivative wrt. the parameters. This is done with the update

    \\[H^{-1}_{j+1} b = b + (I - d) \\ H - \\frac{H^{-1}_j b}{s},\\]

    where \\(I\\) is the identity matrix, \\(d\\) is a dampening term and \\(s\\) a scaling factor that are applied to help convergence. For details, see (Koh and Liang, 2017)1 and the original paper (Agarwal et. al.)2.

    PARAMETER DESCRIPTION model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TorchTwiceDifferentiable

    training_data

    A DataLoader containing the training data.

    TYPE: DataLoader

    b

    A vector or matrix, the right hand side of the equation \\(Hx = b\\).

    TYPE: Tensor

    hessian_perturbation

    Regularization of the hessian.

    TYPE: float DEFAULT: 0.0

    maxiter

    Maximum number of iterations.

    TYPE: int DEFAULT: 1000

    dampen

    Dampening factor, defaults to 0 for no dampening.

    TYPE: float DEFAULT: 0.0

    scale

    Scaling factor, defaults to 10.

    TYPE: float DEFAULT: 10.0

    h0

    Initial guess for hvp.

    TYPE: Optional[Tensor] DEFAULT: None

    rtol

    tolerance to use for early stopping

    TYPE: float DEFAULT: 0.0001

    progress

    If True, display progress bars.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, with a matrix of shape [NxP] with each line being a solution of \\(Ax=b\\), and a dictionary containing information about the accuracy of the solution.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Lissa)\ndef solve_lissa(\nmodel: TorchTwiceDifferentiable,\ntraining_data: DataLoader,\nb: torch.Tensor,\nhessian_perturbation: float = 0.0,\n*,\nmaxiter: int = 1000,\ndampen: float = 0.0,\nscale: float = 10.0,\nh0: Optional[torch.Tensor] = None,\nrtol: float = 1e-4,\nprogress: bool = False,\n) -> InverseHvpResult:\nr\"\"\"\n    Uses LISSA, Linear time Stochastic Second-Order Algorithm, to iteratively\n    approximate the inverse Hessian. More precisely, it finds x s.t. \\(Hx = b\\),\n    with \\(H\\) being the model's second derivative wrt. the parameters.\n    This is done with the update\n    \\[H^{-1}_{j+1} b = b + (I - d) \\ H - \\frac{H^{-1}_j b}{s},\\]\n    where \\(I\\) is the identity matrix, \\(d\\) is a dampening term and \\(s\\) a scaling\n    factor that are applied to help convergence. For details, see\n    (Koh and Liang, 2017)<sup><a href=\"#koh_liang_2017\">1</a></sup> and the original paper\n    (Agarwal et. al.)<sup><a href=\"#agarwal_secondorder_2017\">2</a></sup>.\n    Args:\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data: A DataLoader containing the training data.\n        b: A vector or matrix, the right hand side of the equation \\(Hx = b\\).\n        hessian_perturbation: Regularization of the hessian.\n        maxiter: Maximum number of iterations.\n        dampen: Dampening factor, defaults to 0 for no dampening.\n        scale: Scaling factor, defaults to 10.\n        h0: Initial guess for hvp.\n        rtol: tolerance to use for early stopping\n        progress: If True, display progress bars.\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult], with a matrix of shape [NxP] with each line being a solution of \\(Ax=b\\),\n            and a dictionary containing information about the accuracy of the solution.\n    \"\"\"\nif h0 is None:\nh_estimate = torch.clone(b)\nelse:\nh_estimate = h0\nshuffled_training_data = DataLoader(\ntraining_data.dataset, training_data.batch_size, shuffle=True\n)\ndef lissa_step(\nh: torch.Tensor, reg_hvp: Callable[[torch.Tensor], torch.Tensor]\n) -> torch.Tensor:\n\"\"\"Given an estimate of the hessian inverse and the regularised hessian\n        vector product, it computes the next estimate.\n        Args:\n            h: An estimate of the hessian inverse.\n            reg_hvp: Regularised hessian vector product.\n        Returns:\n            The next estimate of the hessian inverse.\n        \"\"\"\nreturn b + (1 - dampen) * h - reg_hvp(h) / scale\nfor _ in maybe_progress(range(maxiter), progress, desc=\"Lissa\"):\nx, y = next(iter(shuffled_training_data))\ngrad_xy = model.grad(x, y, create_graph=True)\nreg_hvp = (\nlambda v: model.mvp(grad_xy, v, model.parameters) + hessian_perturbation * v\n)\nresidual = lissa_step(h_estimate, reg_hvp) - h_estimate\nh_estimate += residual\nif torch.isnan(h_estimate).any():\nraise RuntimeError(\"NaNs in h_estimate. Increase scale or dampening.\")\nmax_residual = torch.max(torch.abs(residual / h_estimate))\nif max_residual < rtol:\nbreak\nmean_residual = torch.mean(torch.abs(residual / h_estimate))\nlogger.info(\nf\"Terminated Lissa with {max_residual*100:.2f} % max residual.\"\nf\" Mean residual: {mean_residual*100:.5f} %\"\n)\ninfo = {\n\"max_perc_residual\": max_residual * 100,\n\"mean_perc_residual\": mean_residual * 100,\n}\nreturn InverseHvpResult(x=h_estimate / scale, info=info)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.solve_arnoldi","title":"solve_arnoldi(model, training_data, b, hessian_perturbation=0.0, *, rank_estimate=10, krylov_dimension=None, low_rank_representation=None, tol=1e-06, max_iter=None, eigen_computation_on_gpu=False)","text":"

    Solves the linear system Hx = b, where H is the Hessian of the model's loss function and b is the given right-hand side vector. It employs the implicitly restarted Arnoldi method for computing a partial eigen decomposition, which is used fo the inversion i.e.

    \\[x = V D^{-1} V^T b\\]

    where \\(D\\) is a diagonal matrix with the top (in absolute value) rank_estimate eigenvalues of the Hessian and \\(V\\) contains the corresponding eigenvectors.

    PARAMETER DESCRIPTION model

    A PyTorch model instance that is twice differentiable, wrapped into TorchTwiceDifferential. The Hessian will be calculated with respect to this model's parameters.

    TYPE: TorchTwiceDifferentiable

    training_data

    A DataLoader instance that provides the model's training data. Used in calculating the Hessian-vector products.

    TYPE: DataLoader

    b

    The right-hand side vector in the system Hx = b.

    TYPE: Tensor

    hessian_perturbation

    Optional regularization parameter added to the Hessian-vector product for numerical stability.

    TYPE: float DEFAULT: 0.0

    rank_estimate

    The number of eigenvalues and corresponding eigenvectors to compute. Represents the desired rank of the Hessian approximation.

    TYPE: int DEFAULT: 10

    krylov_dimension

    The number of Krylov vectors to use for the Lanczos method. Defaults to min(model's number of parameters, max(2 times rank_estimate + 1, 20)).

    TYPE: Optional[int] DEFAULT: None

    low_rank_representation

    An instance of LowRankProductRepresentation containing a previously computed low-rank representation of the Hessian. If provided, all other parameters are ignored; otherwise, a new low-rank representation is computed using provided parameters.

    TYPE: Optional[LowRankProductRepresentation] DEFAULT: None

    tol

    The stopping criteria for the Lanczos algorithm. Ignored if low_rank_representation is provided.

    TYPE: float DEFAULT: 1e-06

    max_iter

    The maximum number of iterations for the Lanczos method. Ignored if low_rank_representation is provided.

    TYPE: Optional[int] DEFAULT: None

    eigen_computation_on_gpu

    If True, tries to execute the eigen pair approximation on the model's device via a cupy implementation. Ensure the model size or rank_estimate is appropriate for device memory. If False, the eigen pair approximation is executed on the CPU by the scipy wrapper to ARPACK.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, having the solution vector x that satisfies the system \\(Ax = b\\), where \\(A\\) is a low-rank approximation of the Hessian \\(H\\) of the model's loss function, and an instance of LowRankProductRepresentation, which represents the approximation of H.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Arnoldi)\ndef solve_arnoldi(\nmodel: TorchTwiceDifferentiable,\ntraining_data: DataLoader,\nb: torch.Tensor,\nhessian_perturbation: float = 0.0,\n*,\nrank_estimate: int = 10,\nkrylov_dimension: Optional[int] = None,\nlow_rank_representation: Optional[LowRankProductRepresentation] = None,\ntol: float = 1e-6,\nmax_iter: Optional[int] = None,\neigen_computation_on_gpu: bool = False,\n) -> InverseHvpResult:\nr\"\"\"\n    Solves the linear system Hx = b, where H is the Hessian of the model's loss function and b is the given\n    right-hand side vector.\n    It employs the [implicitly restarted Arnoldi method](https://en.wikipedia.org/wiki/Arnoldi_iteration) for\n    computing a partial eigen decomposition, which is used fo the inversion i.e.\n    \\[x = V D^{-1} V^T b\\]\n    where \\(D\\) is a diagonal matrix with the top (in absolute value) `rank_estimate` eigenvalues of the Hessian\n    and \\(V\\) contains the corresponding eigenvectors.\n    Args:\n        model: A PyTorch model instance that is twice differentiable, wrapped into\n            [TorchTwiceDifferential][pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable].\n            The Hessian will be calculated with respect to this model's parameters.\n        training_data: A DataLoader instance that provides the model's training data.\n            Used in calculating the Hessian-vector products.\n        b: The right-hand side vector in the system Hx = b.\n        hessian_perturbation: Optional regularization parameter added to the Hessian-vector\n            product for numerical stability.\n        rank_estimate: The number of eigenvalues and corresponding eigenvectors to compute.\n            Represents the desired rank of the Hessian approximation.\n        krylov_dimension: The number of Krylov vectors to use for the Lanczos method.\n            Defaults to min(model's number of parameters, max(2 times rank_estimate + 1, 20)).\n        low_rank_representation: An instance of\n            [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation]\n            containing a previously computed low-rank representation of the Hessian. If provided, all other parameters\n            are ignored; otherwise, a new low-rank representation is computed\n            using provided parameters.\n        tol: The stopping criteria for the Lanczos algorithm.\n            Ignored if `low_rank_representation` is provided.\n        max_iter: The maximum number of iterations for the Lanczos method.\n            Ignored if `low_rank_representation` is provided.\n        eigen_computation_on_gpu: If True, tries to execute the eigen pair approximation on the model's device\n            via a cupy implementation. Ensure the model size or rank_estimate is appropriate for device memory.\n            If False, the eigen pair approximation is executed on the CPU by the scipy wrapper to ARPACK.\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.torch.torch_differentiable.InverseHvpResult],\n            having the solution vector x that satisfies the system \\(Ax = b\\),\n            where \\(A\\) is a low-rank approximation of the Hessian \\(H\\) of the model's loss function, and an instance\n            of [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation],\n            which represents the approximation of H.\n    \"\"\"\nb_device = b.device if hasattr(b, \"device\") else torch.device(\"cpu\")\nif low_rank_representation is None:\nif b_device.type == \"cuda\" and not eigen_computation_on_gpu:\nraise ValueError(\n\"Using 'eigen_computation_on_gpu=False' while 'b' is on a 'cuda' device is not supported. \"\n\"To address this, consider the following options:\\n\"\n\" - Set eigen_computation_on_gpu=True if your model and data are small enough \"\n\"and if 'cupy' is available in your environment.\\n\"\n\" - Move 'b' to the CPU with b.to('cpu').\\n\"\n\" - Precompute a low rank representation and move it to the 'b' device using:\\n\"\n\"     low_rank_representation = model_hessian_low_rank(model, training_data, ..., \"\n\"eigen_computation_on_gpu=False).to(b.device)\"\n)\nlow_rank_representation = model_hessian_low_rank(\nmodel,\ntraining_data,\nhessian_perturbation=hessian_perturbation,\nrank_estimate=rank_estimate,\nkrylov_dimension=krylov_dimension,\ntol=tol,\nmax_iter=max_iter,\neigen_computation_on_gpu=eigen_computation_on_gpu,\n)\nelse:\nif b_device.type != low_rank_representation.device.type:\nraise RuntimeError(\nf\"The devices for 'b' and 'low_rank_representation' do not match.\\n\"\nf\" - 'b' is on device: {b_device}\\n\"\nf\" - 'low_rank_representation' is on device: {low_rank_representation.device}\\n\"\nf\"\\nTo resolve this, consider moving 'low_rank_representation' to '{b_device}' by using:\\n\"\nf\"low_rank_representation = low_rank_representation.to(b.device)\"\n)\nlogger.info(\"Using provided low rank representation, ignoring other parameters\")\nresult = low_rank_representation.projections @ (\ntorch.diag_embed(1.0 / low_rank_representation.eigen_vals)\n@ (low_rank_representation.projections.t() @ b.t())\n)\nreturn InverseHvpResult(\nx=result.t(),\ninfo={\n\"eigenvalues\": low_rank_representation.eigen_vals,\n\"eigenvectors\": low_rank_representation.projections,\n},\n)\n
    "},{"location":"api/pydvl/influence/torch/util/","title":"Util","text":""},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.TorchTensorContainerType","title":"TorchTensorContainerType = TypeVar('TorchTensorContainerType', torch.Tensor, Tuple[torch.Tensor, ...], Dict[str, torch.Tensor]) module-attribute","text":"

    Type variable for a PyTorch tensor or a container thereof.

    "},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.to_model_device","title":"to_model_device(x, model)","text":"

    Returns the tensor x moved to the device of the model, if device of model is set

    PARAMETER DESCRIPTION x

    The tensor to be moved to the device of the model.

    TYPE: Tensor

    model

    The model whose device will be used to move the tensor.

    TYPE: Module

    RETURNS DESCRIPTION Tensor

    The tensor x moved to the device of the model, if device of model is set.

    Source code in src/pydvl/influence/torch/util.py
    def to_model_device(x: torch.Tensor, model: torch.nn.Module) -> torch.Tensor:\n\"\"\"\n    Returns the tensor `x` moved to the device of the `model`, if device of model is set\n    Args:\n        x: The tensor to be moved to the device of the model.\n        model: The model whose device will be used to move the tensor.\n    Returns:\n        The tensor `x` moved to the device of the `model`, if device of model is set.\n    \"\"\"\nif hasattr(model, \"device\"):\nreturn x.to(model.device)\nreturn x\n
    "},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.flatten_tensors_to_vector","title":"flatten_tensors_to_vector(tensors)","text":"

    Flatten multiple tensors into a single 1D tensor (vector).

    This function takes an iterable of tensors and reshapes each of them into a 1D tensor. These reshaped tensors are then concatenated together into a single 1D tensor in the order they were given.

    PARAMETER DESCRIPTION tensors

    An iterable of tensors to be reshaped and concatenated.

    TYPE: Iterable[Tensor]

    RETURNS DESCRIPTION Tensor

    A 1D tensor that is the concatenation of all the reshaped input tensors.

    Source code in src/pydvl/influence/torch/util.py
    def flatten_tensors_to_vector(tensors: Iterable[torch.Tensor]) -> torch.Tensor:\n\"\"\"\n    Flatten multiple tensors into a single 1D tensor (vector).\n    This function takes an iterable of tensors and reshapes each of them into a 1D tensor.\n    These reshaped tensors are then concatenated together into a single 1D tensor in the order they were given.\n    Args:\n        tensors: An iterable of tensors to be reshaped and concatenated.\n    Returns:\n        A 1D tensor that is the concatenation of all the reshaped input tensors.\n    \"\"\"\nreturn torch.cat([t.contiguous().view(-1) for t in tensors])\n
    "},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.reshape_vector_to_tensors","title":"reshape_vector_to_tensors(input_vector, target_shapes)","text":"

    Reshape a 1D tensor into multiple tensors with specified shapes.

    This function takes a 1D tensor (input_vector) and reshapes it into a series of tensors with shapes given by 'target_shapes'. The reshaped tensors are returned as a tuple in the same order as their corresponding shapes.

    Note: The total number of elements in 'input_vector' must be equal to the sum of the products of the shapes in 'target_shapes'.

    PARAMETER DESCRIPTION input_vector

    The 1D tensor to be reshaped. Must be 1D.

    TYPE: Tensor

    target_shapes

    An iterable of tuples. Each tuple defines the shape of a tensor to be reshaped from the 'input_vector'.

    TYPE: Iterable[Tuple[int, ...]]

    RETURNS DESCRIPTION Tuple[Tensor, ...]

    A tuple of reshaped tensors.

    RAISES DESCRIPTION ValueError

    If 'input_vector' is not a 1D tensor or if the total number of elements in 'input_vector' does not match the sum of the products of the shapes in 'target_shapes'.

    Source code in src/pydvl/influence/torch/util.py
    def reshape_vector_to_tensors(\ninput_vector: torch.Tensor, target_shapes: Iterable[Tuple[int, ...]]\n) -> Tuple[torch.Tensor, ...]:\n\"\"\"\n    Reshape a 1D tensor into multiple tensors with specified shapes.\n    This function takes a 1D tensor (input_vector) and reshapes it into a series of tensors with shapes given by 'target_shapes'.\n    The reshaped tensors are returned as a tuple in the same order as their corresponding shapes.\n    Note: The total number of elements in 'input_vector' must be equal to the sum of the products of the shapes in 'target_shapes'.\n    Args:\n        input_vector: The 1D tensor to be reshaped. Must be 1D.\n        target_shapes: An iterable of tuples. Each tuple defines the shape of a tensor to be reshaped from the 'input_vector'.\n    Returns:\n        A tuple of reshaped tensors.\n    Raises:\n        ValueError: If 'input_vector' is not a 1D tensor or if the total number of elements in 'input_vector' does not match the sum of the products of the shapes in 'target_shapes'.\n    \"\"\"\nif input_vector.dim() != 1:\nraise ValueError(\"Input vector must be a 1D tensor\")\ntotal_elements = sum(math.prod(shape) for shape in target_shapes)\nif total_elements != input_vector.shape[0]:\nraise ValueError(\nf\"The total elements in shapes {total_elements} does not match the vector length {input_vector.shape[0]}\"\n)\ntensors = []\nstart = 0\nfor shape in target_shapes:\nsize = math.prod(shape)  # compute the total size of the tensor with this shape\ntensors.append(\ninput_vector[start : start + size].view(shape)\n)  # slice the vector and reshape it\nstart += size\nreturn tuple(tensors)\n
    "},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.align_structure","title":"align_structure(source, target)","text":"

    This function transforms target to have the same structure as source, i.e., it should be a dictionary with the same keys as source and each corresponding value in target should have the same shape as the value in source.

    PARAMETER DESCRIPTION source

    The reference dictionary containing PyTorch tensors.

    TYPE: Dict[str, Tensor]

    target

    The input to be harmonized. It can be a dictionary, tuple, or tensor.

    TYPE: TorchTensorContainerType

    RETURNS DESCRIPTION Dict[str, Tensor]

    The harmonized version of target.

    RAISES DESCRIPTION ValueError

    If target cannot be harmonized to match source.

    Source code in src/pydvl/influence/torch/util.py
    def align_structure(\nsource: Dict[str, torch.Tensor],\ntarget: TorchTensorContainerType,\n) -> Dict[str, torch.Tensor]:\n\"\"\"\n    This function transforms `target` to have the same structure as `source`, i.e.,\n    it should be a dictionary with the same keys as `source` and each corresponding\n    value in `target` should have the same shape as the value in `source`.\n    Args:\n        source: The reference dictionary containing PyTorch tensors.\n        target: The input to be harmonized. It can be a dictionary, tuple, or tensor.\n    Returns:\n        The harmonized version of `target`.\n    Raises:\n        ValueError: If `target` cannot be harmonized to match `source`.\n    \"\"\"\ntangent_dict: Dict[str, torch.Tensor]\nif isinstance(target, dict):\nif list(target.keys()) != list(source.keys()):\nraise ValueError(\"The keys in 'target' do not match the keys in 'source'.\")\nif [v.shape for v in target.values()] != [v.shape for v in source.values()]:\nraise ValueError(\n\"The shapes of the values in 'target' do not match the shapes of the values in 'source'.\"\n)\ntangent_dict = target\nelif isinstance(target, tuple) or isinstance(target, list):\nif [v.shape for v in target] != [v.shape for v in source.values()]:\nraise ValueError(\n\"'target' is a tuple/list but its elements' shapes do not match the shapes \"\n\"of the values in 'source'.\"\n)\ntangent_dict = dict(zip(source.keys(), target))\nelif isinstance(target, torch.Tensor):\ntry:\ntangent_dict = dict(\nzip(\nsource.keys(),\nreshape_vector_to_tensors(\ntarget, [p.shape for p in source.values()]\n),\n)\n)\nexcept Exception as e:\nraise ValueError(\nf\"'target' is a tensor but cannot be reshaped to match 'source'. Original error: {e}\"\n)\nelse:\nraise ValueError(f\"'target' is of type {type(target)} which is not supported.\")\nreturn tangent_dict\n
    "},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.as_tensor","title":"as_tensor(a, warn=True, **kwargs)","text":"

    Converts an array into a torch tensor.

    PARAMETER DESCRIPTION a

    Array to convert to tensor.

    TYPE: Any

    warn

    If True, warns that a will be converted.

    DEFAULT: True

    RETURNS DESCRIPTION Tensor

    A torch tensor converted from the input array.

    Source code in src/pydvl/influence/torch/util.py
    def as_tensor(a: Any, warn=True, **kwargs) -> torch.Tensor:\n\"\"\"\n    Converts an array into a torch tensor.\n    Args:\n        a: Array to convert to tensor.\n        warn: If True, warns that `a` will be converted.\n    Returns:\n        A torch tensor converted from the input array.\n    \"\"\"\nif warn and not isinstance(a, torch.Tensor):\nlogger.warning(\"Converting tensor to type torch.Tensor.\")\nreturn torch.as_tensor(a, **kwargs)\n
    "},{"location":"api/pydvl/parallel/","title":"Parallel","text":"

    This module provides a common interface to parallelization backends. The list of supported backends is here. Backends can be selected with the backend argument of an instance of ParallelConfig, as seen in the examples below.

    We use executors to submit tasks in parallel. The basic high-level pattern is

    from pydvl.parallel import init_executor, ParallelConfig\nconfig = ParallelConfig(backend=\"ray\")\nwith init_executor(max_workers=1, config=config) as executor:\nfuture = executor.submit(lambda x: x + 1, 1)\nresult = future.result()\nassert result == 2\n

    Running a map-reduce job is also easy:

    from pydvl.parallel import init_executor, ParallelConfig\nconfig = ParallelConfig(backend=\"joblib\")\nwith init_executor(config=config) as executor:\nresults = list(executor.map(lambda x: x + 1, range(5)))\nassert results == [1, 2, 3, 4, 5]\n

    There is an alternative map-reduce implementation MapReduceJob which internally uses joblib's higher level API with Parallel()

    "},{"location":"api/pydvl/parallel/backend/","title":"Backend","text":""},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.CancellationPolicy","title":"CancellationPolicy","text":"

    Bases: Flag

    Policy to use when cancelling futures after exiting an Executor.

    Note

    Not all backends support all policies.

    ATTRIBUTE DESCRIPTION NONE

    Do not cancel any futures.

    PENDING

    Cancel all pending futures, but not running ones.

    RUNNING

    Cancel all running futures, but not pending ones.

    ALL

    Cancel all pending and running futures.

    "},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.BaseParallelBackend","title":"BaseParallelBackend","text":"

    Abstract base class for all parallel backends.

    "},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.BaseParallelBackend.executor","title":"executor(max_workers=None, config=ParallelConfig(), cancel_futures=CancellationPolicy.PENDING) abstractmethod classmethod","text":"

    Returns an executor for the parallel backend.

    Source code in src/pydvl/parallel/backend.py
    @classmethod\n@abstractmethod\ndef executor(\ncls,\nmax_workers: int | None = None,\nconfig: ParallelConfig = ParallelConfig(),\ncancel_futures: CancellationPolicy = CancellationPolicy.PENDING,\n) -> Executor:\n\"\"\"Returns an executor for the parallel backend.\"\"\"\n...\n
    "},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.init_parallel_backend","title":"init_parallel_backend(config)","text":"

    Initializes the parallel backend and returns an instance of it.

    The following example creates a parallel backend instance with the default configuration, which is a local joblib backend.

    Example
    config = ParallelConfig()\nparallel_backend = init_parallel_backend(config)\n

    To create a parallel backend instance with a different backend, e.g. ray, you can pass the backend name as a string to the constructor of ParallelConfig.

    Example
    config = ParallelConfig(backend=\"ray\")\nparallel_backend = init_parallel_backend(config)\n
    PARAMETER DESCRIPTION config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig

    Source code in src/pydvl/parallel/backend.py
    def init_parallel_backend(config: ParallelConfig) -> BaseParallelBackend:\n\"\"\"Initializes the parallel backend and returns an instance of it.\n    The following example creates a parallel backend instance with the default\n    configuration, which is a local joblib backend.\n    ??? Example\n        ``` python\n        config = ParallelConfig()\n        parallel_backend = init_parallel_backend(config)\n        ```\n    To create a parallel backend instance with a different backend, e.g. ray,\n    you can pass the backend name as a string to the constructor of\n    [ParallelConfig][pydvl.utils.config.ParallelConfig].\n    ??? Example\n        ```python\n        config = ParallelConfig(backend=\"ray\")\n        parallel_backend = init_parallel_backend(config)\n        ```\n    Args:\n        config: instance of [ParallelConfig][pydvl.utils.config.ParallelConfig]\n            with cluster address, number of cpus, etc.\n    \"\"\"\ntry:\nparallel_backend_cls = BaseParallelBackend.BACKENDS[config.backend]\nexcept KeyError:\nraise NotImplementedError(f\"Unexpected parallel backend {config.backend}\")\nreturn parallel_backend_cls.create(config)  # type: ignore\n
    "},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.available_cpus","title":"available_cpus()","text":"

    Platform-independent count of available cores.

    FIXME: do we really need this or is os.cpu_count enough? Is this portable?

    RETURNS DESCRIPTION int

    Number of cores, or 1 if it is not possible to determine.

    Source code in src/pydvl/parallel/backend.py
    def available_cpus() -> int:\n\"\"\"Platform-independent count of available cores.\n    FIXME: do we really need this or is `os.cpu_count` enough? Is this portable?\n    Returns:\n        Number of cores, or 1 if it is not possible to determine.\n    \"\"\"\nfrom platform import system\nif system() != \"Linux\":\nreturn os.cpu_count() or 1\nreturn len(os.sched_getaffinity(0))  # type: ignore\n
    "},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.effective_n_jobs","title":"effective_n_jobs(n_jobs, config=ParallelConfig())","text":"

    Returns the effective number of jobs.

    This number may vary depending on the parallel backend and the resources available.

    PARAMETER DESCRIPTION n_jobs

    the number of jobs requested. If -1, the number of available CPUs is returned.

    TYPE: int

    config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    RETURNS DESCRIPTION int

    The effective number of jobs, guaranteed to be >= 1.

    RAISES DESCRIPTION RuntimeError

    if the effective number of jobs returned by the backend is < 1.

    Source code in src/pydvl/parallel/backend.py
    def effective_n_jobs(n_jobs: int, config: ParallelConfig = ParallelConfig()) -> int:\n\"\"\"Returns the effective number of jobs.\n    This number may vary depending on the parallel backend and the resources\n    available.\n    Args:\n        n_jobs: the number of jobs requested. If -1, the number of available\n            CPUs is returned.\n        config: instance of [ParallelConfig][pydvl.utils.config.ParallelConfig] with\n            cluster address, number of cpus, etc.\n    Returns:\n        The effective number of jobs, guaranteed to be >= 1.\n    Raises:\n        RuntimeError: if the effective number of jobs returned by the backend\n            is < 1.\n    \"\"\"\nparallel_backend = init_parallel_backend(config)\nif (eff_n_jobs := parallel_backend.effective_n_jobs(n_jobs)) < 1:\nraise RuntimeError(\nf\"Invalid number of jobs {eff_n_jobs} obtained from parallel backend {config.backend}\"\n)\nreturn eff_n_jobs\n
    "},{"location":"api/pydvl/parallel/config/","title":"Config","text":""},{"location":"api/pydvl/parallel/config/#pydvl.parallel.config.ParallelConfig","title":"ParallelConfig dataclass","text":"

    Configuration for parallel computation backend.

    PARAMETER DESCRIPTION backend

    Type of backend to use. Defaults to 'joblib'

    TYPE: Literal['joblib', 'ray'] DEFAULT: 'joblib'

    address

    Address of existing remote or local cluster to use.

    TYPE: Optional[Union[str, Tuple[str, int]]] DEFAULT: None

    n_cpus_local

    Number of CPUs to use when creating a local ray cluster. This has no effect when using an existing ray cluster.

    TYPE: Optional[int] DEFAULT: None

    logging_level

    Logging level for the parallel backend's worker.

    TYPE: int DEFAULT: WARNING

    wait_timeout

    Timeout in seconds for waiting on futures.

    TYPE: float DEFAULT: 1.0

    "},{"location":"api/pydvl/parallel/map_reduce/","title":"Map reduce","text":"

    This module contains a wrapper around joblib's Parallel() class that makes it easy to run map-reduce jobs.

    Deprecation

    This interface might be deprecated or changed in a future release before 1.0

    "},{"location":"api/pydvl/parallel/map_reduce/#pydvl.parallel.map_reduce.MapReduceJob","title":"MapReduceJob(inputs, map_func, reduce_func=identity, map_kwargs=None, reduce_kwargs=None, config=ParallelConfig(), *, n_jobs=-1, timeout=None)","text":"

    Bases: Generic[T, R]

    Takes an embarrassingly parallel fun and runs it in n_jobs parallel jobs, splitting the data evenly into a number of chunks equal to the number of jobs.

    Typing information for objects of this class requires the type of the inputs that are split for map_func and the type of its output.

    PARAMETER DESCRIPTION inputs

    The input that will be split and passed to map_func. if it's not a sequence object. It will be repeat n_jobs number of times.

    TYPE: Union[Collection[T], T]

    map_func

    Function that will be applied to the input chunks in each job.

    TYPE: MapFunction[R]

    reduce_func

    Function that will be applied to the results of map_func to reduce them.

    TYPE: ReduceFunction[R] DEFAULT: identity

    map_kwargs

    Keyword arguments that will be passed to map_func in each job. Alternatively, one can use functools.partial.

    TYPE: Optional[Dict] DEFAULT: None

    reduce_kwargs

    Keyword arguments that will be passed to reduce_func in each job. Alternatively, one can use functools.partial.

    TYPE: Optional[Dict] DEFAULT: None

    config

    Instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    n_jobs

    Number of parallel jobs to run. Does not accept 0

    TYPE: int DEFAULT: -1

    Example

    A simple usage example with 2 jobs:

    >>> from pydvl.parallel import MapReduceJob\n>>> import numpy as np\n>>> map_reduce_job: MapReduceJob[np.ndarray, np.ndarray] = MapReduceJob(\n...     np.arange(5),\n...     map_func=np.sum,\n...     reduce_func=np.sum,\n...     n_jobs=2,\n... )\n>>> map_reduce_job()\n10\n

    When passed a single object as input, it will be repeated for each job:

    >>> from pydvl.parallel import MapReduceJob\n>>> import numpy as np\n>>> map_reduce_job: MapReduceJob[int, np.ndarray] = MapReduceJob(\n...     5,\n...     map_func=lambda x: np.array([x]),\n...     reduce_func=np.sum,\n...     n_jobs=2,\n... )\n>>> map_reduce_job()\n10\n

    Source code in src/pydvl/parallel/map_reduce.py
    def __init__(\nself,\ninputs: Union[Collection[T], T],\nmap_func: MapFunction[R],\nreduce_func: ReduceFunction[R] = identity,\nmap_kwargs: Optional[Dict] = None,\nreduce_kwargs: Optional[Dict] = None,\nconfig: ParallelConfig = ParallelConfig(),\n*,\nn_jobs: int = -1,\ntimeout: Optional[float] = None,\n):\nself.config = config\nparallel_backend = init_parallel_backend(self.config)\nself.parallel_backend = parallel_backend\nself.timeout = timeout\n# This uses the setter defined below\nself.n_jobs = n_jobs\nself.inputs_ = inputs\nself.map_kwargs = map_kwargs if map_kwargs is not None else dict()\nself.reduce_kwargs = reduce_kwargs if reduce_kwargs is not None else dict()\nself._map_func = reduce(maybe_add_argument, [\"job_id\", \"seed\"], map_func)\nself._reduce_func = reduce_func\n
    "},{"location":"api/pydvl/parallel/map_reduce/#pydvl.parallel.map_reduce.MapReduceJob.n_jobs","title":"n_jobs: int property writable","text":"

    Effective number of jobs according to the used ParallelBackend instance.

    "},{"location":"api/pydvl/parallel/map_reduce/#pydvl.parallel.map_reduce.MapReduceJob.__call__","title":"__call__(seed=None)","text":"

    Runs the map-reduce job.

    PARAMETER DESCRIPTION seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Union[Seed, SeedSequence]] DEFAULT: None

    RETURNS DESCRIPTION R

    The result of the reduce function.

    Source code in src/pydvl/parallel/map_reduce.py
    def __call__(\nself,\nseed: Optional[Union[Seed, SeedSequence]] = None,\n) -> R:\n\"\"\"\n    Runs the map-reduce job.\n    Args:\n        seed: Either an instance of a numpy random number generator or a seed for\n            it.\n    Returns:\n         The result of the reduce function.\n    \"\"\"\nif self.config.backend == \"joblib\":\nbackend = \"loky\"\nelse:\nbackend = self.config.backend\n# In joblib the levels are reversed.\n# 0 means no logging and 50 means log everything to stdout\nverbose = 50 - self.config.logging_level\nseed_seq = ensure_seed_sequence(seed)\nwith Parallel(backend=backend, n_jobs=self.n_jobs, verbose=verbose) as parallel:\nchunks = self._chunkify(self.inputs_, n_chunks=self.n_jobs)\nmap_results: List[R] = parallel(\ndelayed(self._map_func)(\nnext_chunk, job_id=j, seed=seed, **self.map_kwargs\n)\nfor j, (next_chunk, seed) in enumerate(\nzip(chunks, seed_seq.spawn(len(chunks)))\n)\n)\nreduce_results: R = self._reduce_func(map_results, **self.reduce_kwargs)\nreturn reduce_results\n
    "},{"location":"api/pydvl/parallel/backends/","title":"Backends","text":""},{"location":"api/pydvl/parallel/backends/joblib/","title":"Joblib","text":""},{"location":"api/pydvl/parallel/backends/joblib/#pydvl.parallel.backends.joblib.JoblibParallelBackend","title":"JoblibParallelBackend(config)","text":"

    Bases: BaseParallelBackend

    Class used to wrap joblib to make it transparent to algorithms.

    It shouldn't be initialized directly. You should instead call init_parallel_backend().

    PARAMETER DESCRIPTION config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig

    Source code in src/pydvl/parallel/backends/joblib.py
    def __init__(self, config: ParallelConfig):\nself.config = {\n\"logging_level\": config.logging_level,\n\"n_jobs\": config.n_cpus_local,\n}\n
    "},{"location":"api/pydvl/parallel/backends/joblib/#pydvl.parallel.backends.joblib.JoblibParallelBackend.wrap","title":"wrap(fun, **kwargs)","text":"

    Wraps a function as a joblib delayed.

    PARAMETER DESCRIPTION fun

    the function to wrap

    TYPE: Callable

    RETURNS DESCRIPTION Callable

    The delayed function.

    Source code in src/pydvl/parallel/backends/joblib.py
    def wrap(self, fun: Callable, **kwargs) -> Callable:\n\"\"\"Wraps a function as a joblib delayed.\n    Args:\n        fun: the function to wrap\n    Returns:\n        The delayed function.\n    \"\"\"\nreturn delayed(fun)  # type: ignore\n
    "},{"location":"api/pydvl/parallel/backends/ray/","title":"Ray","text":""},{"location":"api/pydvl/parallel/backends/ray/#pydvl.parallel.backends.ray.RayParallelBackend","title":"RayParallelBackend(config)","text":"

    Bases: BaseParallelBackend

    Class used to wrap ray to make it transparent to algorithms.

    It shouldn't be initialized directly. You should instead call init_parallel_backend().

    PARAMETER DESCRIPTION config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig

    Source code in src/pydvl/parallel/backends/ray.py
    def __init__(self, config: ParallelConfig):\nself.config = {\"address\": config.address, \"logging_level\": config.logging_level}\nif self.config[\"address\"] is None:\nself.config[\"num_cpus\"] = config.n_cpus_local\nif not ray.is_initialized():\nray.init(**self.config)\n# Register ray joblib backend\nregister_ray()\n
    "},{"location":"api/pydvl/parallel/backends/ray/#pydvl.parallel.backends.ray.RayParallelBackend.wrap","title":"wrap(fun, **kwargs)","text":"

    Wraps a function as a ray remote.

    PARAMETER DESCRIPTION fun

    the function to wrap

    TYPE: Callable

    kwargs

    keyword arguments to pass to @ray.remote

    DEFAULT: {}

    RETURNS DESCRIPTION Callable

    The .remote method of the ray RemoteFunction.

    Source code in src/pydvl/parallel/backends/ray.py
    def wrap(self, fun: Callable, **kwargs) -> Callable:\n\"\"\"Wraps a function as a ray remote.\n    Args:\n        fun: the function to wrap\n        kwargs: keyword arguments to pass to @ray.remote\n    Returns:\n        The `.remote` method of the ray `RemoteFunction`.\n    \"\"\"\nif len(kwargs) > 0:\nreturn ray.remote(**kwargs)(fun).remote  # type: ignore\nreturn ray.remote(fun).remote  # type: ignore\n
    "},{"location":"api/pydvl/parallel/futures/","title":"Futures","text":""},{"location":"api/pydvl/parallel/futures/#pydvl.parallel.futures.init_executor","title":"init_executor(max_workers=None, config=ParallelConfig(), **kwargs)","text":"

    Initializes a futures executor for the given parallel configuration.

    PARAMETER DESCRIPTION max_workers

    Maximum number of concurrent tasks.

    TYPE: Optional[int] DEFAULT: None

    config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    kwargs

    Other optional parameter that will be passed to the executor.

    DEFAULT: {}

    Examples

    from pydvl.parallel import init_executor, ParallelConfig\nconfig = ParallelConfig(backend=\"ray\")\nwith init_executor(max_workers=1, config=config) as executor:\nfuture = executor.submit(lambda x: x + 1, 1)\nresult = future.result()\nassert result == 2\n
    from pydvl.parallel.futures import init_executor\nwith init_executor() as executor:\nresults = list(executor.map(lambda x: x + 1, range(5)))\nassert results == [1, 2, 3, 4, 5]\n

    Source code in src/pydvl/parallel/futures/__init__.py
    @contextmanager\ndef init_executor(\nmax_workers: Optional[int] = None,\nconfig: ParallelConfig = ParallelConfig(),\n**kwargs,\n) -> Generator[Executor, None, None]:\n\"\"\"Initializes a futures executor for the given parallel configuration.\n    Args:\n        max_workers: Maximum number of concurrent tasks.\n        config: instance of [ParallelConfig][pydvl.utils.config.ParallelConfig]\n            with cluster address, number of cpus, etc.\n        kwargs: Other optional parameter that will be passed to the executor.\n    ??? Examples\n        ``` python\n        from pydvl.parallel import init_executor, ParallelConfig\n        config = ParallelConfig(backend=\"ray\")\n        with init_executor(max_workers=1, config=config) as executor:\n            future = executor.submit(lambda x: x + 1, 1)\n            result = future.result()\n        assert result == 2\n        ```\n        ``` python\n        from pydvl.parallel.futures import init_executor\n        with init_executor() as executor:\n            results = list(executor.map(lambda x: x + 1, range(5)))\n        assert results == [1, 2, 3, 4, 5]\n        ```\n    \"\"\"\ntry:\ncls = BaseParallelBackend.BACKENDS[config.backend]\nwith cls.executor(max_workers=max_workers, config=config, **kwargs) as e:\nyield e\nexcept KeyError:\nraise NotImplementedError(f\"Unexpected parallel backend {config.backend}\")\n
    "},{"location":"api/pydvl/parallel/futures/ray/","title":"Ray","text":""},{"location":"api/pydvl/parallel/futures/ray/#pydvl.parallel.futures.ray.RayExecutor","title":"RayExecutor(max_workers=None, *, config=ParallelConfig(), cancel_futures=CancellationPolicy.ALL)","text":"

    Bases: Executor

    Asynchronous executor using Ray that implements the concurrent.futures API.

    It shouldn't be initialized directly. You should instead call init_executor().

    PARAMETER DESCRIPTION max_workers

    Maximum number of concurrent tasks. Each task can request itself any number of vCPUs. You must ensure the product of this value and the n_cpus_per_job parameter passed to submit() does not exceed available cluster resources. If set to None, it will default to the total number of vCPUs in the ray cluster.

    TYPE: Optional[int] DEFAULT: None

    config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    cancel_futures

    Select which futures will be cancelled when exiting this context manager. Pending is the default, which will cancel all pending futures, but not running ones, as done by concurrent.futures.ProcessPoolExecutor. Additionally, All cancels all pending and running futures, and None doesn't cancel any. See CancellationPolicy

    TYPE: CancellationPolicy DEFAULT: ALL

    Source code in src/pydvl/parallel/futures/ray.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.8.0\",\nargs_mapping={\"cancel_futures_on_exit\": \"cancel_futures\"},\n)\ndef __init__(\nself,\nmax_workers: Optional[int] = None,\n*,\nconfig: ParallelConfig = ParallelConfig(),\ncancel_futures: CancellationPolicy = CancellationPolicy.ALL,\n):\nif config.backend != \"ray\":\nraise ValueError(\nf\"Parallel backend must be set to 'ray' and not '{config.backend}'\"\n)\nif max_workers is not None:\nif max_workers <= 0:\nraise ValueError(\"max_workers must be greater than 0\")\nmax_workers = max_workers\nif isinstance(cancel_futures, CancellationPolicy):\nself._cancel_futures = cancel_futures\nelse:\nself._cancel_futures = (\nCancellationPolicy.PENDING\nif cancel_futures\nelse CancellationPolicy.NONE\n)\nself.config = {\"address\": config.address, \"logging_level\": config.logging_level}\nif config.address is None:\nself.config[\"num_cpus\"] = config.n_cpus_local\nif not ray.is_initialized():\nray.init(**self.config)\nself._max_workers = max_workers\nif self._max_workers is None:\nself._max_workers = int(ray._private.state.cluster_resources()[\"CPU\"])\nself._shutdown = False\nself._shutdown_lock = threading.Lock()\nself._queue_lock = threading.Lock()\nself._work_queue: \"queue.Queue[Optional[_WorkItem]]\" = queue.Queue(\nmaxsize=self._max_workers\n)\nself._pending_queue: \"queue.SimpleQueue[Optional[_WorkItem]]\" = (\nqueue.SimpleQueue()\n)\n# Work Item Manager Thread\nself._work_item_manager_thread: Optional[_WorkItemManagerThread] = None\n
    "},{"location":"api/pydvl/parallel/futures/ray/#pydvl.parallel.futures.ray.RayExecutor.submit","title":"submit(fn, *args, **kwargs)","text":"

    Submits a callable to be executed with the given arguments.

    Schedules the callable to be executed as fn(*args, **kwargs) and returns a Future instance representing the execution of the callable.

    PARAMETER DESCRIPTION fn

    Callable.

    TYPE: Callable[..., T]

    args

    Positional arguments that will be passed to fn.

    DEFAULT: ()

    kwargs

    Keyword arguments that will be passed to fn. It can also optionally contain options for the ray remote function as a dictionary as the keyword argument remote_function_options.

    DEFAULT: {}

    Returns: A Future representing the given call.

    RAISES DESCRIPTION RuntimeError

    If a task is submitted after the executor has been shut down.

    Source code in src/pydvl/parallel/futures/ray.py
    def submit(self, fn: Callable[..., T], *args, **kwargs) -> \"Future[T]\":\nr\"\"\"Submits a callable to be executed with the given arguments.\n    Schedules the callable to be executed as fn(\\*args, \\**kwargs)\n    and returns a Future instance representing the execution of the callable.\n    Args:\n        fn: Callable.\n        args: Positional arguments that will be passed to `fn`.\n        kwargs: Keyword arguments that will be passed to `fn`.\n            It can also optionally contain options for the ray remote function\n            as a dictionary as the keyword argument `remote_function_options`.\n    Returns:\n        A Future representing the given call.\n    Raises:\n        RuntimeError: If a task is submitted after the executor has been shut down.\n    \"\"\"\nwith self._shutdown_lock:\nlogger.debug(\"executor acquired shutdown lock\")\nif self._shutdown:\nraise RuntimeError(\"cannot schedule new futures after shutdown\")\nlogging.debug(\"Creating future and putting work item in work queue\")\nfuture: \"Future[T]\" = Future()\nremote_function_options = kwargs.pop(\"remote_function_options\", None)\nw = _WorkItem(\nfuture,\nfn,\nargs,\nkwargs,\nremote_function_options=remote_function_options,\n)\nself._put_work_item_in_queue(w)\n# We delay starting the thread until the first call to submit\nself._start_work_item_manager_thread()\nreturn future\n
    "},{"location":"api/pydvl/parallel/futures/ray/#pydvl.parallel.futures.ray.RayExecutor.shutdown","title":"shutdown(wait=True, *, cancel_futures=None)","text":"

    Clean up the resources associated with the Executor.

    This method tries to mimic the behaviour of Executor.shutdown while allowing one more value for cancel_futures which instructs it to use the CancellationPolicy defined upon construction.

    PARAMETER DESCRIPTION wait

    Whether to wait for pending futures to finish.

    TYPE: bool DEFAULT: True

    cancel_futures

    Overrides the executor's default policy for cancelling futures on exit. If True, all pending futures are cancelled, and if False, no futures are cancelled. If None (default), the executor's policy set at initialization is used.

    TYPE: Optional[bool] DEFAULT: None

    Source code in src/pydvl/parallel/futures/ray.py
    def shutdown(\nself, wait: bool = True, *, cancel_futures: Optional[bool] = None\n) -> None:\n\"\"\"Clean up the resources associated with the Executor.\n    This method tries to mimic the behaviour of\n    [Executor.shutdown][concurrent.futures.Executor.shutdown]\n    while allowing one more value for ``cancel_futures`` which instructs it\n    to use the [CancellationPolicy][pydvl.parallel.backend.CancellationPolicy]\n    defined upon construction.\n    Args:\n        wait: Whether to wait for pending futures to finish.\n        cancel_futures: Overrides the executor's default policy for\n            cancelling futures on exit. If ``True``, all pending futures are\n            cancelled, and if ``False``, no futures are cancelled. If ``None``\n            (default), the executor's policy set at initialization is used.\n    \"\"\"\nlogger.debug(\"executor shutting down\")\nwith self._shutdown_lock:\nlogger.debug(\"executor acquired shutdown lock\")\nself._shutdown = True\nself._cancel_futures = {\nNone: self._cancel_futures,\nTrue: CancellationPolicy.PENDING,\nFalse: CancellationPolicy.NONE,\n}[cancel_futures]\nif wait:\nlogger.debug(\"executor waiting for futures to finish\")\nif self._work_item_manager_thread is not None:\n# Putting None in the queue to signal\n# to work item manager thread that we are shutting down\nself._put_work_item_in_queue(None)\nlogger.debug(\n\"executor waiting for work item manager thread to terminate\"\n)\nself._work_item_manager_thread.join()\n# To reduce the risk of opening too many files, remove references to\n# objects that use file descriptors.\nself._work_item_manager_thread = None\ndel self._work_queue\ndel self._pending_queue\n
    "},{"location":"api/pydvl/parallel/futures/ray/#pydvl.parallel.futures.ray.RayExecutor.__exit__","title":"__exit__(exc_type, exc_val, exc_tb)","text":"

    Exit the runtime context related to the RayExecutor object.

    Source code in src/pydvl/parallel/futures/ray.py
    def __exit__(self, exc_type, exc_val, exc_tb):\n\"\"\"Exit the runtime context related to the RayExecutor object.\"\"\"\nself.shutdown()\nreturn False\n
    "},{"location":"api/pydvl/reporting/","title":"Reporting","text":""},{"location":"api/pydvl/reporting/plots/","title":"Plots","text":""},{"location":"api/pydvl/reporting/plots/#pydvl.reporting.plots.shaded_mean_std","title":"shaded_mean_std(data, abscissa=None, num_std=1.0, mean_color='dodgerblue', shade_color='lightblue', title=None, xlabel=None, ylabel=None, ax=None, **kwargs)","text":"

    The usual mean \\(\\pm\\) std deviation plot to aggregate runs of experiments.

    PARAMETER DESCRIPTION data

    axis 0 is to be aggregated on (e.g. runs) and axis 1 is the data for each run.

    TYPE: ndarray

    abscissa

    values for the x-axis. Leave empty to use increasing integers.

    TYPE: Optional[Sequence[Any]] DEFAULT: None

    num_std

    number of standard deviations to shade around the mean.

    TYPE: float DEFAULT: 1.0

    mean_color

    color for the mean

    TYPE: Optional[str] DEFAULT: 'dodgerblue'

    shade_color

    color for the shaded region

    TYPE: Optional[str] DEFAULT: 'lightblue'

    title

    Title text. To use mathematics, use LaTeX notation.

    TYPE: Optional[str] DEFAULT: None

    xlabel

    Text for the horizontal axis.

    TYPE: Optional[str] DEFAULT: None

    ylabel

    Text for the vertical axis

    TYPE: Optional[str] DEFAULT: None

    ax

    If passed, axes object into which to insert the figure. Otherwise, a new figure is created and returned

    TYPE: Optional[Axes] DEFAULT: None

    kwargs

    these are forwarded to the ax.plot() call for the mean.

    DEFAULT: {}

    RETURNS DESCRIPTION Axes

    The axes used (or created)

    Source code in src/pydvl/reporting/plots.py
    def shaded_mean_std(\ndata: np.ndarray,\nabscissa: Optional[Sequence[Any]] = None,\nnum_std: float = 1.0,\nmean_color: Optional[str] = \"dodgerblue\",\nshade_color: Optional[str] = \"lightblue\",\ntitle: Optional[str] = None,\nxlabel: Optional[str] = None,\nylabel: Optional[str] = None,\nax: Optional[Axes] = None,\n**kwargs,\n) -> Axes:\n\"\"\"The usual mean \\(\\pm\\) std deviation plot to aggregate runs of experiments.\n    Args:\n        data: axis 0 is to be aggregated on (e.g. runs) and axis 1 is the\n            data for each run.\n        abscissa: values for the x-axis. Leave empty to use increasing integers.\n        num_std: number of standard deviations to shade around the mean.\n        mean_color: color for the mean\n        shade_color: color for the shaded region\n        title: Title text. To use mathematics, use LaTeX notation.\n        xlabel: Text for the horizontal axis.\n        ylabel: Text for the vertical axis\n        ax: If passed, axes object into which to insert the figure. Otherwise,\n            a new figure is created and returned\n        kwargs: these are forwarded to the ax.plot() call for the mean.\n    Returns:\n        The axes used (or created)\n    \"\"\"\nassert len(data.shape) == 2\nmean = data.mean(axis=0)\nstd = num_std * data.std(axis=0)\nif ax is None:\nfig, ax = plt.subplots()\nif abscissa is None:\nabscissa = list(range(data.shape[1]))\nax.fill_between(abscissa, mean - std, mean + std, alpha=0.3, color=shade_color)\nax.plot(abscissa, mean, color=mean_color, **kwargs)\nax.set_title(title)\nax.set_xlabel(xlabel)\nax.set_ylabel(ylabel)\nreturn ax\n
    "},{"location":"api/pydvl/reporting/plots/#pydvl.reporting.plots.spearman_correlation","title":"spearman_correlation(vv, num_values, pvalue)","text":"

    Simple matrix plots with spearman correlation for each pair in vv.

    PARAMETER DESCRIPTION vv

    list of OrderedDicts with index: value. Spearman correlation is computed for the keys.

    TYPE: List[OrderedDict]

    num_values

    Use only these many values from the data (from the start of the OrderedDicts)

    TYPE: int

    pvalue

    correlation coefficients for which the p-value is below the threshold pvalue/len(vv) will be discarded.

    TYPE: float

    Source code in src/pydvl/reporting/plots.py
    def spearman_correlation(vv: List[OrderedDict], num_values: int, pvalue: float):\n\"\"\"Simple matrix plots with spearman correlation for each pair in vv.\n    Args:\n        vv: list of OrderedDicts with index: value. Spearman correlation\n            is computed for the keys.\n        num_values: Use only these many values from the data (from the start\n            of the OrderedDicts)\n        pvalue: correlation coefficients for which the p-value is below the\n            threshold `pvalue/len(vv)` will be discarded.\n    \"\"\"\nr: np.ndarray = np.ndarray((len(vv), len(vv)))\np: np.ndarray = np.ndarray((len(vv), len(vv)))\nfor i, a in enumerate(vv):\nfor j, b in enumerate(vv):\nfrom scipy.stats._stats_py import SpearmanrResult\nspearman: SpearmanrResult = sp.stats.spearmanr(\nlist(a.keys())[:num_values], list(b.keys())[:num_values]\n)\nr[i][j] = (\nspearman.correlation if spearman.pvalue < pvalue / len(vv) else np.nan\n)  # Bonferroni correction\np[i][j] = spearman.pvalue\nfig, axs = plt.subplots(1, 2, figsize=(16, 7))\nplot1 = axs[0].matshow(r, vmin=-1, vmax=1)\naxs[0].set_title(f\"Spearman correlation (top {num_values} values)\")\naxs[0].set_xlabel(\"Runs\")\naxs[0].set_ylabel(\"Runs\")\nfig.colorbar(plot1, ax=axs[0])\nplot2 = axs[1].matshow(p, vmin=0, vmax=1)\naxs[1].set_title(\"p-value\")\naxs[1].set_xlabel(\"Runs\")\naxs[1].set_ylabel(\"Runs\")\nfig.colorbar(plot2, ax=axs[1])\nreturn fig\n
    "},{"location":"api/pydvl/reporting/plots/#pydvl.reporting.plots.plot_shapley","title":"plot_shapley(df, *, level=0.05, ax=None, title=None, xlabel=None, ylabel=None)","text":"

    Plots the shapley values, as returned from compute_shapley_values, with error bars corresponding to an \\(\\alpha\\)-level confidence interval.

    PARAMETER DESCRIPTION df

    dataframe with the shapley values

    TYPE: DataFrame

    level

    confidence level for the error bars

    TYPE: float DEFAULT: 0.05

    ax

    axes to plot on or None if a new subplots should be created

    TYPE: Optional[Axes] DEFAULT: None

    title

    string, title of the plot

    TYPE: Optional[str] DEFAULT: None

    xlabel

    string, x label of the plot

    TYPE: Optional[str] DEFAULT: None

    ylabel

    string, y label of the plot

    TYPE: Optional[str] DEFAULT: None

    RETURNS DESCRIPTION Axes

    The axes created or used

    Source code in src/pydvl/reporting/plots.py
    def plot_shapley(\ndf: pd.DataFrame,\n*,\nlevel: float = 0.05,\nax: Optional[plt.Axes] = None,\ntitle: Optional[str] = None,\nxlabel: Optional[str] = None,\nylabel: Optional[str] = None,\n) -> plt.Axes:\nr\"\"\"Plots the shapley values, as returned from\n    [compute_shapley_values][pydvl.value.shapley.common.compute_shapley_values], with error bars\n    corresponding to an $\\alpha$-level confidence interval.\n    Args:\n        df: dataframe with the shapley values\n        level: confidence level for the error bars\n        ax: axes to plot on or None if a new subplots should be created\n        title: string, title of the plot\n        xlabel: string, x label of the plot\n        ylabel: string, y label of the plot\n    Returns:\n        The axes created or used\n    \"\"\"\nif ax is None:\n_, ax = plt.subplots()\nyerr = norm.ppf(1 - level / 2) * df[\"data_value_stderr\"]\nax.errorbar(x=df.index, y=df[\"data_value\"], yerr=yerr, fmt=\"o\", capsize=6)\nax.set_xlabel(xlabel)\nax.set_ylabel(ylabel)\nax.set_title(title)\nplt.xticks(rotation=60)\nreturn ax\n
    "},{"location":"api/pydvl/reporting/plots/#pydvl.reporting.plots.plot_influence_distribution_by_label","title":"plot_influence_distribution_by_label(influences, labels, title_extra='')","text":"

    Plots the histogram of the influence that all samples in the training set have over a single sample index, separated by labels.

    PARAMETER DESCRIPTION influences

    array of influences (training samples x test samples)

    TYPE: NDArray[float_]

    labels

    labels for the training set.

    TYPE: NDArray[float_]

    title_extra

    TYPE: str DEFAULT: ''

    Source code in src/pydvl/reporting/plots.py
    def plot_influence_distribution_by_label(\ninfluences: NDArray[np.float_], labels: NDArray[np.float_], title_extra: str = \"\"\n):\n\"\"\"Plots the histogram of the influence that all samples in the training set\n    have over a single sample index, separated by labels.\n    Args:\n       influences: array of influences (training samples x test samples)\n       labels: labels for the training set.\n       title_extra:\n    \"\"\"\n_, ax = plt.subplots()\nunique_labels = np.unique(labels)\nfor label in unique_labels:\nax.hist(influences[labels == label], label=label, alpha=0.7)\nax.set_xlabel(\"Influence values\")\nax.set_ylabel(\"Number of samples\")\nax.set_title(f\"Distribution of influences \" + title_extra)\nax.legend()\nplt.show()\n
    "},{"location":"api/pydvl/reporting/scores/","title":"Scores","text":""},{"location":"api/pydvl/reporting/scores/#pydvl.reporting.scores.compute_removal_score","title":"compute_removal_score(u, values, percentages, *, remove_best=False, progress=False)","text":"

    Fits model and computes score on the test set after incrementally removing a percentage of data points from the training set, based on their values.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    values

    Data values of data instances in the training set.

    TYPE: ValuationResult

    percentages

    Sequence of removal percentages.

    TYPE: Union[NDArray[float_], Iterable[float]]

    remove_best

    If True, removes data points in order of decreasing valuation.

    TYPE: bool DEFAULT: False

    progress

    If True, display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION Dict[float, float]

    Dictionary that maps the percentages to their respective scores.

    Source code in src/pydvl/reporting/scores.py
    def compute_removal_score(\nu: Utility,\nvalues: ValuationResult,\npercentages: Union[NDArray[np.float_], Iterable[float]],\n*,\nremove_best: bool = False,\nprogress: bool = False,\n) -> Dict[float, float]:\nr\"\"\"Fits model and computes score on the test set after incrementally removing\n    a percentage of data points from the training set, based on their values.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        values: Data values of data instances in the training set.\n        percentages: Sequence of removal percentages.\n        remove_best: If True, removes data points in order of decreasing valuation.\n        progress: If True, display a progress bar.\n    Returns:\n        Dictionary that maps the percentages to their respective scores.\n    \"\"\"\n# Sanity checks\nif np.any([x >= 1.0 or x < 0.0 for x in percentages]):\nraise ValueError(\"All percentages should be in the range [0.0, 1.0)\")\nif len(values) != len(u.data.indices):\nraise ValueError(\nf\"The number of values, {len(values) }, should be equal to the number of data indices, {len(u.data.indices)}\"\n)\nscores = {}\n# We sort in descending order if we want to remove the best values\nvalues.sort(reverse=remove_best)\nfor pct in maybe_progress(percentages, display=progress, desc=\"Removal Scores\"):\nn_removal = int(pct * len(u.data))\nindices = values.indices[n_removal:]\nscore = u(indices)\nscores[pct] = score\nreturn scores\n
    "},{"location":"api/pydvl/utils/","title":"Utils","text":""},{"location":"api/pydvl/utils/caching/","title":"Caching","text":"

    Distributed caching of functions.

    pyDVL uses memcached to cache utility values, through pymemcache. This allows sharing evaluations across processes and nodes in a cluster. You can run memcached as a service, locally or remotely, see Setting up the cache

    Warning

    Function evaluations are cached with a key based on the function's signature and code. This can lead to undesired cache hits, see Cache reuse.

    Remember not to reuse utility objects for different datasets.

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching--configuration","title":"Configuration","text":"

    Memoization is disabled by default but can be enabled easily, see Setting up the cache. When enabled, it will be added to any callable used to construct a Utility (done with the decorator @memcached). Depending on the nature of the utility you might want to enable the computation of a running average of function values, see Usage with stochastic functions. You can see all configuration options under MemcachedConfig.

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching--default-configuration","title":"Default configuration","text":"
    default_config = dict(\nserver=('localhost', 11211),\nconnect_timeout=1.0,\ntimeout=0.1,\n# IMPORTANT! Disable small packet consolidation:\nno_delay=True,\nserde=serde.PickleSerde(pickle_version=PICKLE_VERSION)\n)\n
    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching--usage-with-stochastic-functions","title":"Usage with stochastic functions","text":"

    In addition to standard memoization, the decorator memcached() can compute running average and standard error of repeated evaluations for the same input. This can be useful for stochastic functions with high variance (e.g. model training for small sample sizes), but drastically reduces the speed benefits of memoization.

    This behaviour can be activated with the argument allow_repeated_evaluations to memcached().

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching--cache-reuse","title":"Cache reuse","text":"

    When working directly with memcached(), it is essential to only cache pure functions. If they have any kind of state, either internal or external (e.g. a closure over some data that may change), then the cache will fail to notice this and the same value will be returned.

    When a function is wrapped with memcached() for memoization, its signature (input and output names) and code are used as a key for the cache. Alternatively you can pass a custom value to be used as key with

    cached_fun = memcached(**asdict(cache_options))(fun, signature=custom_signature)\n

    If you are running experiments with the same Utility but different datasets, this will lead to evaluations of the utility on new data returning old values because utilities only use sample indices as arguments (so there is no way to tell the difference between '1' for dataset A and '1' for dataset 2 from the point of view of the cache). One solution is to empty the cache between runs, but the preferred one is to use a different Utility object for each dataset.

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching--unexpected-cache-misses","title":"Unexpected cache misses","text":"

    Because all arguments to a function are used as part of the key for the cache, sometimes one must exclude some of them. For example, If a function is going to run across multiple processes and some reporting arguments are added (like a job_id for logging purposes), these will be part of the signature and make the functions distinct to the eyes of the cache. This can be avoided with the use of ignore_args in the configuration.

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching.CacheStats","title":"CacheStats dataclass","text":"

    Statistics gathered by cached functions.

    ATTRIBUTE DESCRIPTION sets

    number of times a value was set in the cache

    TYPE: int

    misses

    number of times a value was not found in the cache

    TYPE: int

    hits

    number of times a value was found in the cache

    TYPE: int

    timeouts

    number of times a timeout occurred

    TYPE: int

    errors

    number of times an error occurred

    TYPE: int

    reconnects

    number of times the client reconnected to the server

    TYPE: int

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching.serialize","title":"serialize(x)","text":"

    Serialize an object to bytes. Args: x: object to serialize.

    RETURNS DESCRIPTION bytes

    serialized object.

    Source code in src/pydvl/utils/caching.py
    def serialize(x: Any) -> bytes:\n\"\"\"Serialize an object to bytes.\n    Args:\n        x: object to serialize.\n    Returns:\n        serialized object.\n    \"\"\"\npickled_output = BytesIO()\npickler = Pickler(pickled_output, PICKLE_VERSION)\npickler.dump(x)\nreturn pickled_output.getvalue()\n
    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching.memcached","title":"memcached(client_config=None, time_threshold=0.3, allow_repeated_evaluations=False, rtol_stderr=0.1, min_repetitions=3, ignore_args=None)","text":"

    Transparent, distributed memoization of function calls.

    Given a function and its signature, memcached uses a distributed cache that, for each set of inputs, keeps track of the average returned value, with variance and number of times it was calculated.

    If the function is deterministic, i.e. same input corresponds to the same exact output, set allow_repeated_evaluations to False. If instead the function is stochastic (like the training of a model depending on random initializations), memcached() allows to set a minimum number of evaluations to compute a running average, and a tolerance after which the function will not be called anymore. In other words, the function will be recomputed until the value has stabilized with a standard error smaller than rtol_stderr * running average.

    Warning

    Do not cache functions with state! See Cache reuse

    Example
    cached_fun = memcached(**asdict(cache_options))(heavy_computation)\n
    PARAMETER DESCRIPTION client_config

    configuration for pymemcache's Client. Will be merged on top of the default configuration (see below).

    TYPE: Optional[MemcachedClientConfig] DEFAULT: None

    time_threshold

    computations taking less time than this many seconds are not cached.

    TYPE: float DEFAULT: 0.3

    allow_repeated_evaluations

    If True, repeated calls to a function with the same arguments will be allowed and outputs averaged until the running standard deviation of the mean stabilizes below rtol_stderr * mean.

    TYPE: bool DEFAULT: False

    rtol_stderr

    relative tolerance for repeated evaluations. More precisely, memcached() will stop evaluating the function once the standard deviation of the mean is smaller than rtol_stderr * mean.

    TYPE: float DEFAULT: 0.1

    min_repetitions

    minimum number of times that a function evaluation on the same arguments is repeated before returning cached values. Useful for stochastic functions only. If the model training is very noisy, set this number to higher values to reduce variance.

    TYPE: int DEFAULT: 3

    ignore_args

    Do not take these keyword arguments into account when hashing the wrapped function for usage as key in memcached. This allows sharing the cache among different jobs for the same experiment run if the callable happens to have \"nuisance\" parameters like job_id which do not affect the result of the computation.

    TYPE: Optional[Iterable[str]] DEFAULT: None

    RETURNS DESCRIPTION Callable[[Callable[..., T], bytes | None], Callable[..., T]]

    A wrapped function

    Source code in src/pydvl/utils/caching.py
    def memcached(\nclient_config: Optional[MemcachedClientConfig] = None,\ntime_threshold: float = 0.3,\nallow_repeated_evaluations: bool = False,\nrtol_stderr: float = 0.1,\nmin_repetitions: int = 3,\nignore_args: Optional[Iterable[str]] = None,\n) -> Callable[[Callable[..., T], bytes | None], Callable[..., T]]:\n\"\"\"\n    Transparent, distributed memoization of function calls.\n    Given a function and its signature, memcached uses a distributed cache\n    that, for each set of inputs, keeps track of the average returned value,\n    with variance and number of times it was calculated.\n    If the function is deterministic, i.e. same input corresponds to the same\n    exact output, set `allow_repeated_evaluations` to `False`. If instead the\n    function is stochastic (like the training of a model depending on random\n    initializations), memcached() allows to set a minimum number of evaluations\n    to compute a running average, and a tolerance after which the function will\n    not be called anymore. In other words, the function will be recomputed\n    until the value has stabilized with a standard error smaller than\n    `rtol_stderr * running average`.\n    !!! Warning\n        Do not cache functions with state! See [Cache reuse](cache-reuse)\n    ??? Example\n        ```python\n        cached_fun = memcached(**asdict(cache_options))(heavy_computation)\n        ```\n    Args:\n        client_config: configuration for pymemcache's\n            [Client][pymemcache.client.base.Client].\n            Will be merged on top of the default configuration (see below).\n        time_threshold: computations taking less time than this many seconds are\n            not cached.\n        allow_repeated_evaluations: If `True`, repeated calls to a function\n            with the same arguments will be allowed and outputs averaged until the\n            running standard deviation of the mean stabilizes below\n            `rtol_stderr * mean`.\n        rtol_stderr: relative tolerance for repeated evaluations. More precisely,\n            [memcached()][pydvl.utils.caching.memcached] will stop evaluating the function once the\n            standard deviation of the mean is smaller than `rtol_stderr * mean`.\n        min_repetitions: minimum number of times that a function evaluation\n            on the same arguments is repeated before returning cached values. Useful\n            for stochastic functions only. If the model training is very noisy, set\n            this number to higher values to reduce variance.\n        ignore_args: Do not take these keyword arguments into account when\n            hashing the wrapped function for usage as key in memcached. This allows\n            sharing the cache among different jobs for the same experiment run if\n            the callable happens to have \"nuisance\" parameters like `job_id` which\n            do not affect the result of the computation.\n    Returns:\n        A wrapped function\n    \"\"\"\nif ignore_args is None:\nignore_args = []\n# Do I really need this?\ndef connect(config: MemcachedClientConfig):\n\"\"\"First tries to establish a connection, then tries setting and\n        getting a value.\"\"\"\ntry:\nclient = RetryingClient(\nClient(**asdict(config)),\nattempts=3,\nretry_delay=0.1,\nretry_for=[MemcacheUnexpectedCloseError],\n)\ntemp_key = str(uuid.uuid4())\nclient.set(temp_key, 7)\nassert client.get(temp_key) == 7\nclient.delete(temp_key, 0)\nreturn client\nexcept ConnectionRefusedError as e:\nlogger.error(  # type: ignore\nf\"@memcached: Timeout connecting \"\nf\"to {config.server} after \"\nf\"{config.connect_timeout} seconds: {str(e)}. Did you start memcached?\"\n)\nraise e\nexcept AssertionError as e:\nlogger.error(  # type: ignore\nf\"@memcached: Failure saving dummy value \"\nf\"to {config.server}: {str(e)}\"\n)\ndef wrapper(fun: Callable[..., T], signature: Optional[bytes] = None):\nif signature is None:\nsignature = serialize((fun.__code__.co_code, fun.__code__.co_consts))\n@wraps(fun, updated=[])  # don't try to use update() for a class\nclass Wrapped:\nconfig: MemcachedClientConfig\nstats: CacheStats\nclient: RetryingClient\ndef __init__(self, config: MemcachedClientConfig):\nself.config = config\nself.stats = CacheStats()\nself.client = connect(self.config)\nself._signature = signature\ndef __call__(self, *args, **kwargs) -> T:\nkey_kwargs = {k: v for k, v in kwargs.items() if k not in ignore_args}  # type: ignore\narg_signature: bytes = serialize((args, list(key_kwargs.items())))\nkey = blake2b(self._signature + arg_signature).hexdigest().encode(\"ASCII\")  # type: ignore\nresult_dict: Dict[str, float] = self.get_key_value(key)\nif result_dict is None:\nresult_dict = {}\nstart = time()\nvalue = fun(*args, **kwargs)\nend = time()\nresult_dict[\"value\"] = value\nif end - start >= time_threshold or allow_repeated_evaluations:\nresult_dict[\"count\"] = 1\nresult_dict[\"variance\"] = 0\nself.client.set(key, result_dict, noreply=True)\nself.stats.sets += 1\nself.stats.misses += 1\nelif allow_repeated_evaluations:\nself.stats.hits += 1\nvalue = result_dict[\"value\"]\ncount = result_dict[\"count\"]\nvariance = result_dict[\"variance\"]\nerror_on_average = (variance / count) ** (1 / 2)\nif (\nerror_on_average > rtol_stderr * value\nor count <= min_repetitions\n):\nnew_value = fun(*args, **kwargs)\nnew_avg, new_var = running_moments(\nvalue, variance, int(count), cast(float, new_value)\n)\nresult_dict[\"value\"] = new_avg\nresult_dict[\"count\"] = count + 1\nresult_dict[\"variance\"] = new_var\nself.client.set(key, result_dict, noreply=True)\nself.stats.sets += 1\nelse:\nself.stats.hits += 1\nreturn result_dict[\"value\"]  # type: ignore\ndef __getstate__(self):\n\"\"\"Enables pickling after a socket has been opened to the\n                memcached server, by removing the client from the stored\n                data.\"\"\"\nodict = self.__dict__.copy()\ndel odict[\"client\"]\nreturn odict\ndef __setstate__(self, d: dict):\n\"\"\"Restores a client connection after loading from a pickle.\"\"\"\nself.config = d[\"config\"]\nself.stats = d[\"stats\"]\nself.client = Client(**asdict(self.config))\nself._signature = signature\ndef get_key_value(self, key: bytes):\nresult = None\ntry:\nresult = self.client.get(key)\nexcept socket.timeout as e:\nself.stats.timeouts += 1\nwarnings.warn(f\"{type(self).__name__}: {str(e)}\", RuntimeWarning)\nexcept OSError as e:\nself.stats.errors += 1\nwarnings.warn(f\"{type(self).__name__}: {str(e)}\", RuntimeWarning)\nexcept AttributeError as e:\n# FIXME: this depends on _recv() failing on invalid sockets\n# See pymemcache.base.py,\nself.stats.reconnects += 1\nwarnings.warn(f\"{type(self).__name__}: {str(e)}\", RuntimeWarning)\nself.client = connect(self.config)\nreturn result\nWrapped.__doc__ = (\nf\"A wrapper around {fun.__name__}() with remote caching enabled.\\n\"\n+ (Wrapped.__doc__ or \"\")\n)\nWrapped.__name__ = f\"memcached_{fun.__name__}\"\npath = list(reversed(fun.__qualname__.split(\".\")))\npatched = [f\"memcached_{path[0]}\"] + path[1:]\nWrapped.__qualname__ = \".\".join(reversed(patched))\n# TODO: pick from some config file or something\nreturn Wrapped(client_config or MemcachedClientConfig())\nreturn wrapper\n
    "},{"location":"api/pydvl/utils/config/","title":"Config","text":""},{"location":"api/pydvl/utils/config/#pydvl.utils.config.ParallelConfig","title":"ParallelConfig dataclass","text":"

    Configuration for parallel computation backend.

    PARAMETER DESCRIPTION backend

    Type of backend to use. Defaults to 'joblib'

    TYPE: Literal['joblib', 'ray'] DEFAULT: 'joblib'

    address

    Address of existing remote or local cluster to use.

    TYPE: Optional[Union[str, Tuple[str, int]]] DEFAULT: None

    n_cpus_local

    Number of CPUs to use when creating a local ray cluster. This has no effect when using an existing ray cluster.

    TYPE: Optional[int] DEFAULT: None

    logging_level

    Logging level for the parallel backend's worker.

    TYPE: int DEFAULT: WARNING

    wait_timeout

    Timeout in seconds for waiting on futures.

    TYPE: float DEFAULT: 1.0

    "},{"location":"api/pydvl/utils/config/#pydvl.utils.config.MemcachedClientConfig","title":"MemcachedClientConfig dataclass","text":"

    Configuration of the memcached client.

    PARAMETER DESCRIPTION server

    A tuple of (IP|domain name, port).

    TYPE: Tuple[str, int] DEFAULT: ('localhost', 11211)

    connect_timeout

    How many seconds to wait before raising ConnectionRefusedError on failure to connect.

    TYPE: float DEFAULT: 1.0

    timeout

    seconds to wait for send or recv calls on the socket connected to memcached.

    TYPE: float DEFAULT: 1.0

    no_delay

    set the TCP_NODELAY flag, which may help with performance in some cases.

    TYPE: bool DEFAULT: True

    serde

    a serializer / deserializer (\"serde\"). The default PickleSerde should work in most cases. See pymemcached's documentation for details.

    TYPE: PickleSerde DEFAULT: PickleSerde(pickle_version=PICKLE_VERSION)

    "},{"location":"api/pydvl/utils/config/#pydvl.utils.config.MemcachedConfig","title":"MemcachedConfig dataclass","text":"

    Configuration for memcached(), providing memoization of function calls.

    Instances of this class are typically used as arguments for the construction of a Utility.

    PARAMETER DESCRIPTION client_config

    Configuration for the connection to the memcached server.

    TYPE: MemcachedClientConfig DEFAULT: field(default_factory=MemcachedClientConfig)

    time_threshold

    computations taking less time than this many seconds are not cached.

    TYPE: float DEFAULT: 0.3

    allow_repeated_evaluations

    If True, repeated calls to a function with the same arguments will be allowed and outputs averaged until the running standard deviation of the mean stabilises below rtol_stderr * mean.

    TYPE: bool DEFAULT: False

    rtol_stderr

    relative tolerance for repeated evaluations. More precisely, memcached() will stop evaluating the function once the standard deviation of the mean is smaller than rtol_stderr * mean.

    TYPE: float DEFAULT: 0.1

    min_repetitions

    minimum number of times that a function evaluation on the same arguments is repeated before returning cached values. Useful for stochastic functions only. If the model training is very noisy, set this number to higher values to reduce variance.

    TYPE: int DEFAULT: 3

    ignore_args

    Do not take these keyword arguments into account when hashing the wrapped function for usage as key in memcached.

    TYPE: Optional[Iterable[str]] DEFAULT: None

    "},{"location":"api/pydvl/utils/dataset/","title":"Dataset","text":"

    This module contains convenience classes to handle data and groups thereof.

    Shapley and Least Core value computations require evaluation of a scoring function (the utility). This is typically the performance of the model on a test set (as an approximation to its true expected performance). It is therefore convenient to keep both the training data and the test data together to be passed around to methods in shapley and least_core. This is done with Dataset.

    This abstraction layer also seamlessly grouping data points together if one is interested in computing their value as a group, see GroupedDataset.

    Objects of both types are used to construct a Utility object.

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset","title":"Dataset(x_train, y_train, x_test, y_test, feature_names=None, target_names=None, data_names=None, description=None, is_multi_output=False)","text":"

    A convenience class to handle datasets.

    It holds a dataset, split into training and test data, together with several labels on feature names, data point names and a description.

    PARAMETER DESCRIPTION x_train

    training data

    TYPE: Union[NDArray, DataFrame]

    y_train

    labels for training data

    TYPE: Union[NDArray, DataFrame]

    x_test

    test data

    TYPE: Union[NDArray, DataFrame]

    y_test

    labels for test data

    TYPE: Union[NDArray, DataFrame]

    feature_names

    name of the features of input data

    TYPE: Optional[Sequence[str]] DEFAULT: None

    target_names

    names of the features of target data

    TYPE: Optional[Sequence[str]] DEFAULT: None

    data_names

    names assigned to data points. For example, if the dataset is a time series, each entry can be a timestamp which can be referenced directly instead of using a row number.

    TYPE: Optional[Sequence[str]] DEFAULT: None

    description

    A textual description of the dataset.

    TYPE: Optional[str] DEFAULT: None

    is_multi_output

    set to False if labels are scalars, or to True if they are vectors of dimension > 1.

    TYPE: bool DEFAULT: False

    Source code in src/pydvl/utils/dataset.py
    def __init__(\nself,\nx_train: Union[NDArray, pd.DataFrame],\ny_train: Union[NDArray, pd.DataFrame],\nx_test: Union[NDArray, pd.DataFrame],\ny_test: Union[NDArray, pd.DataFrame],\nfeature_names: Optional[Sequence[str]] = None,\ntarget_names: Optional[Sequence[str]] = None,\ndata_names: Optional[Sequence[str]] = None,\ndescription: Optional[str] = None,\n# FIXME: use same parameter name as in check_X_y()\nis_multi_output: bool = False,\n):\n\"\"\"Constructs a Dataset from data and labels.\n    Args:\n        x_train: training data\n        y_train: labels for training data\n        x_test: test data\n        y_test: labels for test data\n        feature_names: name of the features of input data\n        target_names: names of the features of target data\n        data_names: names assigned to data points.\n            For example, if the dataset is a time series, each entry can be a\n            timestamp which can be referenced directly instead of using a row\n            number.\n        description: A textual description of the dataset.\n        is_multi_output: set to `False` if labels are scalars, or to\n            `True` if they are vectors of dimension > 1.\n    \"\"\"\nself.x_train, self.y_train = check_X_y(\nx_train, y_train, multi_output=is_multi_output\n)\nself.x_test, self.y_test = check_X_y(\nx_test, y_test, multi_output=is_multi_output\n)\nif x_train.shape[-1] != x_test.shape[-1]:\nraise ValueError(\nf\"Mismatching number of features: \"\nf\"{x_train.shape[-1]} and {x_test.shape[-1]}\"\n)\nif x_train.shape[0] != y_train.shape[0]:\nraise ValueError(\nf\"Mismatching number of samples: \"\nf\"{x_train.shape[-1]} and {x_test.shape[-1]}\"\n)\nif x_test.shape[0] != y_test.shape[0]:\nraise ValueError(\nf\"Mismatching number of samples: \"\nf\"{x_test.shape[-1]} and {y_test.shape[-1]}\"\n)\ndef make_names(s: str, a: np.ndarray) -> List[str]:\nn = a.shape[1] if len(a.shape) > 1 else 1\nreturn [f\"{s}{i:0{1 + int(np.log10(n))}d}\" for i in range(1, n + 1)]\nself.feature_names = feature_names\nself.target_names = target_names\nif self.feature_names is None:\nif isinstance(x_train, pd.DataFrame):\nself.feature_names = x_train.columns.tolist()\nelse:\nself.feature_names = make_names(\"x\", x_train)\nif self.target_names is None:\nif isinstance(y_train, pd.DataFrame):\nself.target_names = y_train.columns.tolist()\nelse:\nself.target_names = make_names(\"y\", y_train)\nif len(self.x_train.shape) > 1:\nif (\nlen(self.feature_names) != self.x_train.shape[-1]\nor len(self.feature_names) != self.x_test.shape[-1]\n):\nraise ValueError(\"Mismatching number of features and names\")\nif len(self.y_train.shape) > 1:\nif (\nlen(self.target_names) != self.y_train.shape[-1]\nor len(self.target_names) != self.y_test.shape[-1]\n):\nraise ValueError(\"Mismatching number of targets and names\")\nself.description = description or \"No description\"\nself._indices = np.arange(len(self.x_train), dtype=np.int_)\nself._data_names = (\nnp.array(data_names, dtype=object)\nif data_names is not None\nelse self._indices.astype(object)\n)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.indices","title":"indices: NDArray[np.int_] property","text":"

    Index of positions in data.x_train.

    Contiguous integers from 0 to len(Dataset).

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.data_names","title":"data_names: NDArray[np.object_] property","text":"

    Names of each individual datapoint.

    Used for reporting Shapley values.

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.dim","title":"dim: int property","text":"

    Returns the number of dimensions of a sample.

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.get_training_data","title":"get_training_data(indices=None)","text":"

    Given a set of indices, returns the training data that refer to those indices.

    This is used mainly by Utility to retrieve subsets of the data from indices. It is typically not needed in algorithms.

    PARAMETER DESCRIPTION indices

    Optional indices that will be used to select points from the training data. If None, the entire training data will be returned.

    TYPE: Optional[Iterable[int]] DEFAULT: None

    RETURNS DESCRIPTION Tuple[NDArray, NDArray]

    If indices is not None, the selected x and y arrays from the training data. Otherwise, the entire dataset.

    Source code in src/pydvl/utils/dataset.py
    def get_training_data(\nself, indices: Optional[Iterable[int]] = None\n) -> Tuple[NDArray, NDArray]:\n\"\"\"Given a set of indices, returns the training data that refer to those\n    indices.\n    This is used mainly by [Utility][pydvl.utils.utility.Utility] to retrieve\n    subsets of the data from indices. It is typically **not needed in\n    algorithms**.\n    Args:\n        indices: Optional indices that will be used to select points from\n            the training data. If `None`, the entire training data will be\n            returned.\n    Returns:\n        If `indices` is not `None`, the selected x and y arrays from the\n            training data. Otherwise, the entire dataset.\n    \"\"\"\nif indices is None:\nreturn self.x_train, self.y_train\nx = self.x_train[indices]\ny = self.y_train[indices]\nreturn x, y\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.get_test_data","title":"get_test_data(indices=None)","text":"

    Returns the entire test set regardless of the passed indices.

    The passed indices will not be used because for data valuation we generally want to score the trained model on the entire test data.

    Additionally, the way this method is used in the Utility class, the passed indices will be those of the training data and would not work on the test data.

    There may be cases where it is desired to use parts of the test data. In those cases, it is recommended to inherit from Dataset and override get_test_data().

    For example, the following snippet shows how one could go about mapping the training data indices into test data indices inside get_test_data():

    Example
    >>> from pydvl.utils import Dataset\n>>> import numpy as np\n>>> class DatasetWithTestDataIndices(Dataset):\n...    def get_test_data(self, indices=None):\n...        if indices is None:\n...            return self.x_test, self.y_test\n...        fraction = len(list(indices)) / len(self)\n...        mapped_indices = len(self.x_test) / len(self) * np.asarray(indices)\n...        mapped_indices = np.unique(mapped_indices.astype(int))\n...        return self.x_test[mapped_indices], self.y_test[mapped_indices]\n...\n>>> X = np.random.rand(100, 10)\n>>> y = np.random.randint(0, 2, 100)\n>>> dataset = DatasetWithTestDataIndices.from_arrays(X, y)\n>>> indices = np.random.choice(dataset.indices, 30, replace=False)\n>>> _ = dataset.get_training_data(indices)\n>>> _ = dataset.get_test_data(indices)\n
    PARAMETER DESCRIPTION indices

    Optional indices into the test data. This argument is unused left for compatibility with get_training_data().

    TYPE: Optional[Iterable[int]] DEFAULT: None

    RETURNS DESCRIPTION Tuple[NDArray, NDArray]

    The entire test data.

    Source code in src/pydvl/utils/dataset.py
    def get_test_data(\nself, indices: Optional[Iterable[int]] = None\n) -> Tuple[NDArray, NDArray]:\n\"\"\"Returns the entire test set regardless of the passed indices.\n    The passed indices will not be used because for data valuation\n    we generally want to score the trained model on the entire test data.\n    Additionally, the way this method is used in the\n    [Utility][pydvl.utils.utility.Utility] class, the passed indices will\n    be those of the training data and would not work on the test data.\n    There may be cases where it is desired to use parts of the test data.\n    In those cases, it is recommended to inherit from\n    [Dataset][pydvl.utils.dataset.Dataset] and override\n    [get_test_data()][pydvl.utils.dataset.Dataset.get_test_data].\n    For example, the following snippet shows how one could go about\n    mapping the training data indices into test data indices\n    inside [get_test_data()][pydvl.utils.dataset.Dataset.get_test_data]:\n    ??? Example\n        ```pycon\n        >>> from pydvl.utils import Dataset\n        >>> import numpy as np\n        >>> class DatasetWithTestDataIndices(Dataset):\n        ...    def get_test_data(self, indices=None):\n        ...        if indices is None:\n        ...            return self.x_test, self.y_test\n        ...        fraction = len(list(indices)) / len(self)\n        ...        mapped_indices = len(self.x_test) / len(self) * np.asarray(indices)\n        ...        mapped_indices = np.unique(mapped_indices.astype(int))\n        ...        return self.x_test[mapped_indices], self.y_test[mapped_indices]\n        ...\n        >>> X = np.random.rand(100, 10)\n        >>> y = np.random.randint(0, 2, 100)\n        >>> dataset = DatasetWithTestDataIndices.from_arrays(X, y)\n        >>> indices = np.random.choice(dataset.indices, 30, replace=False)\n        >>> _ = dataset.get_training_data(indices)\n        >>> _ = dataset.get_test_data(indices)\n        ```\n    Args:\n        indices: Optional indices into the test data. This argument is\n            unused left for compatibility with\n            [get_training_data()][pydvl.utils.dataset.Dataset.get_training_data].\n    Returns:\n        The entire test data.\n    \"\"\"\nreturn self.x_test, self.y_test\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.from_sklearn","title":"from_sklearn(data, train_size=0.8, random_state=None, stratify_by_target=False, **kwargs) classmethod","text":"

    Constructs a Dataset object from a sklearn.utils.Bunch, as returned by the load_* functions in scikit-learn toy datasets.

    Example
    >>> from pydvl.utils import Dataset\n>>> from sklearn.datasets import load_boston\n>>> dataset = Dataset.from_sklearn(load_boston())\n
    PARAMETER DESCRIPTION data

    scikit-learn Bunch object. The following attributes are supported:

    • data: covariates.
    • target: target variables (labels).
    • feature_names (optional): the feature names.
    • target_names (optional): the target names.
    • DESCR (optional): a description.

    TYPE: Bunch

    train_size

    size of the training dataset. Used in train_test_split

    TYPE: float DEFAULT: 0.8

    random_state

    seed for train / test split

    TYPE: Optional[int] DEFAULT: None

    stratify_by_target

    If True, data is split in a stratified fashion, using the target variable as labels. Read more in scikit-learn's user guide.

    TYPE: bool DEFAULT: False

    kwargs

    Additional keyword arguments to pass to the Dataset constructor. Use this to pass e.g. is_multi_output.

    DEFAULT: {}

    RETURNS DESCRIPTION Dataset

    Object with the sklearn dataset

    Changed in version 0.6.0

    Added kwargs to pass to the Dataset constructor.

    Source code in src/pydvl/utils/dataset.py
    @classmethod\ndef from_sklearn(\ncls,\ndata: Bunch,\ntrain_size: float = 0.8,\nrandom_state: Optional[int] = None,\nstratify_by_target: bool = False,\n**kwargs,\n) -> \"Dataset\":\n\"\"\"Constructs a [Dataset][pydvl.utils.Dataset] object from a\n    [sklearn.utils.Bunch][], as returned by the `load_*`\n    functions in [scikit-learn toy datasets](https://scikit-learn.org/stable/datasets/toy_dataset.html).\n    ??? Example\n        ```pycon\n        >>> from pydvl.utils import Dataset\n        >>> from sklearn.datasets import load_boston\n        >>> dataset = Dataset.from_sklearn(load_boston())\n        ```\n    Args:\n        data: scikit-learn Bunch object. The following attributes are supported:\n            - `data`: covariates.\n            - `target`: target variables (labels).\n            - `feature_names` (**optional**): the feature names.\n            - `target_names` (**optional**): the target names.\n            - `DESCR` (**optional**): a description.\n        train_size: size of the training dataset. Used in `train_test_split`\n        random_state: seed for train / test split\n        stratify_by_target: If `True`, data is split in a stratified\n            fashion, using the target variable as labels. Read more in\n            [scikit-learn's user guide](https://scikit-learn.org/stable/modules/cross_validation.html#stratification).\n        kwargs: Additional keyword arguments to pass to the\n            [Dataset][pydvl.utils.Dataset] constructor. Use this to pass e.g. `is_multi_output`.\n    Returns:\n        Object with the sklearn dataset\n    !!! tip \"Changed in version 0.6.0\"\n        Added kwargs to pass to the [Dataset][pydvl.utils.Dataset] constructor.\n    \"\"\"\nx_train, x_test, y_train, y_test = train_test_split(\ndata.data,\ndata.target,\ntrain_size=train_size,\nrandom_state=random_state,\nstratify=data.target if stratify_by_target else None,\n)\nreturn cls(\nx_train,\ny_train,\nx_test,\ny_test,\nfeature_names=data.get(\"feature_names\"),\ntarget_names=data.get(\"target_names\"),\ndescription=data.get(\"DESCR\"),\n**kwargs,\n)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.from_arrays","title":"from_arrays(X, y, train_size=0.8, random_state=None, stratify_by_target=False, **kwargs) classmethod","text":"

    Constructs a Dataset object from X and y numpy arrays as returned by the make_* functions in sklearn generated datasets.

    Example
    >>> from pydvl.utils import Dataset\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression()\n>>> dataset = Dataset.from_arrays(X, y)\n
    PARAMETER DESCRIPTION X

    numpy array of shape (n_samples, n_features)

    TYPE: NDArray

    y

    numpy array of shape (n_samples,)

    TYPE: NDArray

    train_size

    size of the training dataset. Used in train_test_split

    TYPE: float DEFAULT: 0.8

    random_state

    seed for train / test split

    TYPE: Optional[int] DEFAULT: None

    stratify_by_target

    If True, data is split in a stratified fashion, using the y variable as labels. Read more in sklearn's user guide.

    TYPE: bool DEFAULT: False

    kwargs

    Additional keyword arguments to pass to the Dataset constructor. Use this to pass e.g. feature_names or target_names.

    DEFAULT: {}

    RETURNS DESCRIPTION Dataset

    Object with the passed X and y arrays split across training and test sets.

    New in version 0.4.0

    Changed in version 0.6.0

    Added kwargs to pass to the Dataset constructor.

    Source code in src/pydvl/utils/dataset.py
    @classmethod\ndef from_arrays(\ncls,\nX: NDArray,\ny: NDArray,\ntrain_size: float = 0.8,\nrandom_state: Optional[int] = None,\nstratify_by_target: bool = False,\n**kwargs,\n) -> \"Dataset\":\n\"\"\"Constructs a [Dataset][pydvl.utils.Dataset] object from X and y numpy arrays  as\n    returned by the `make_*` functions in [sklearn generated datasets](https://scikit-learn.org/stable/datasets/sample_generators.html).\n    ??? Example\n        ```pycon\n        >>> from pydvl.utils import Dataset\n        >>> from sklearn.datasets import make_regression\n        >>> X, y = make_regression()\n        >>> dataset = Dataset.from_arrays(X, y)\n        ```\n    Args:\n        X: numpy array of shape (n_samples, n_features)\n        y: numpy array of shape (n_samples,)\n        train_size: size of the training dataset. Used in `train_test_split`\n        random_state: seed for train / test split\n        stratify_by_target: If `True`, data is split in a stratified fashion,\n            using the y variable as labels. Read more in [sklearn's user\n            guide](https://scikit-learn.org/stable/modules/cross_validation.html#stratification).\n        kwargs: Additional keyword arguments to pass to the\n            [Dataset][pydvl.utils.Dataset] constructor. Use this to pass e.g. `feature_names`\n            or `target_names`.\n    Returns:\n        Object with the passed X and y arrays split across training and test sets.\n    !!! tip \"New in version 0.4.0\"\n    !!! tip \"Changed in version 0.6.0\"\n        Added kwargs to pass to the [Dataset][pydvl.utils.Dataset] constructor.\n    \"\"\"\nx_train, x_test, y_train, y_test = train_test_split(\nX,\ny,\ntrain_size=train_size,\nrandom_state=random_state,\nstratify=y if stratify_by_target else None,\n)\nreturn cls(x_train, y_train, x_test, y_test, **kwargs)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset","title":"GroupedDataset(x_train, y_train, x_test, y_test, data_groups, feature_names=None, target_names=None, group_names=None, description=None, **kwargs)","text":"

    Bases: Dataset

    Used for calculating Shapley values of subsets of the data considered as logical units. For instance, one can group by value of a categorical feature, by bin into which a continuous feature falls, or by label.

    PARAMETER DESCRIPTION x_train

    training data

    TYPE: NDArray

    y_train

    labels of training data

    TYPE: NDArray

    x_test

    test data

    TYPE: NDArray

    y_test

    labels of test data

    TYPE: NDArray

    data_groups

    Iterable of the same length as x_train containing a group label for each training data point. The label can be of any type, e.g. str or int. Data points with the same label will then be grouped by this object and considered as one for effects of valuation.

    TYPE: Sequence

    feature_names

    names of the covariates' features.

    TYPE: Optional[Sequence[str]] DEFAULT: None

    target_names

    names of the labels or targets y

    TYPE: Optional[Sequence[str]] DEFAULT: None

    group_names

    names of the groups. If not provided, the labels from data_groups will be used.

    TYPE: Optional[Sequence[str]] DEFAULT: None

    description

    A textual description of the dataset

    TYPE: Optional[str] DEFAULT: None

    kwargs

    Additional keyword arguments to pass to the Dataset constructor.

    DEFAULT: {}

    Changed in version 0.6.0

    Added group_names and forwarding of kwargs

    Source code in src/pydvl/utils/dataset.py
    def __init__(\nself,\nx_train: NDArray,\ny_train: NDArray,\nx_test: NDArray,\ny_test: NDArray,\ndata_groups: Sequence,\nfeature_names: Optional[Sequence[str]] = None,\ntarget_names: Optional[Sequence[str]] = None,\ngroup_names: Optional[Sequence[str]] = None,\ndescription: Optional[str] = None,\n**kwargs,\n):\n\"\"\"Class for grouping datasets.\n    Used for calculating Shapley values of subsets of the data considered\n    as logical units. For instance, one can group by value of a categorical\n    feature, by bin into which a continuous feature falls, or by label.\n    Args:\n        x_train: training data\n        y_train: labels of training data\n        x_test: test data\n        y_test: labels of test data\n        data_groups: Iterable of the same length as `x_train` containing\n            a group label for each training data point. The label can be of any\n            type, e.g. `str` or `int`. Data points with the same label will\n            then be grouped by this object and considered as one for effects of\n            valuation.\n        feature_names: names of the covariates' features.\n        target_names: names of the labels or targets y\n        group_names: names of the groups. If not provided, the labels\n            from `data_groups` will be used.\n        description: A textual description of the dataset\n        kwargs: Additional keyword arguments to pass to the\n            [Dataset][pydvl.utils.Dataset] constructor.\n    !!! tip \"Changed in version 0.6.0\"\n    Added `group_names` and forwarding of `kwargs`\n    \"\"\"\nsuper().__init__(\nx_train=x_train,\ny_train=y_train,\nx_test=x_test,\ny_test=y_test,\nfeature_names=feature_names,\ntarget_names=target_names,\ndescription=description,\n**kwargs,\n)\nif len(data_groups) != len(x_train):\nraise ValueError(\nf\"data_groups and x_train must have the same length.\"\nf\"Instead got {len(data_groups)=} and {len(x_train)=}\"\n)\nself.groups: OrderedDict[Any, List[int]] = OrderedDict(\n{k: [] for k in set(data_groups)}\n)\nfor idx, group in enumerate(data_groups):\nself.groups[group].append(idx)\nself.group_items = list(self.groups.items())\nself._indices = np.arange(len(self.groups.keys()))\nself._data_names = (\nnp.array(group_names, dtype=object)\nif group_names is not None\nelse np.array(list(self.groups.keys()), dtype=object)\n)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.indices","title":"indices property","text":"

    Indices of the groups.

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.data_names","title":"data_names property","text":"

    Names of the groups.

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.get_training_data","title":"get_training_data(indices=None)","text":"

    Returns the data and labels of all samples in the given groups.

    PARAMETER DESCRIPTION indices

    group indices whose elements to return. If None, all data from all groups are returned.

    TYPE: Optional[Iterable[int]] DEFAULT: None

    RETURNS DESCRIPTION Tuple[NDArray, NDArray]

    Tuple of training data x and labels y.

    Source code in src/pydvl/utils/dataset.py
    def get_training_data(\nself, indices: Optional[Iterable[int]] = None\n) -> Tuple[NDArray, NDArray]:\n\"\"\"Returns the data and labels of all samples in the given groups.\n    Args:\n        indices: group indices whose elements to return. If `None`,\n            all data from all groups are returned.\n    Returns:\n        Tuple of training data x and labels y.\n    \"\"\"\nif indices is None:\nindices = self.indices\ndata_indices = [\nidx for group_id in indices for idx in self.group_items[group_id][1]\n]\nreturn super().get_training_data(data_indices)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.from_sklearn","title":"from_sklearn(data, train_size=0.8, random_state=None, stratify_by_target=False, data_groups=None, **kwargs) classmethod","text":"

    Constructs a GroupedDataset object from a sklearn.utils.Bunch as returned by the load_* functions in scikit-learn toy datasets and groups it.

    Example
    >>> from sklearn.datasets import load_iris\n>>> from pydvl.utils import GroupedDataset\n>>> iris = load_iris()\n>>> data_groups = iris.data[:, 0] // 0.5\n>>> dataset = GroupedDataset.from_sklearn(iris, data_groups=data_groups)\n
    PARAMETER DESCRIPTION data

    scikit-learn Bunch object. The following attributes are supported:

    • data: covariates.
    • target: target variables (labels).
    • feature_names (optional): the feature names.
    • target_names (optional): the target names.
    • DESCR (optional): a description.

    TYPE: Bunch

    train_size

    size of the training dataset. Used in train_test_split.

    TYPE: float DEFAULT: 0.8

    random_state

    seed for train / test split.

    TYPE: Optional[int] DEFAULT: None

    stratify_by_target

    If True, data is split in a stratified fashion, using the target variable as labels. Read more in sklearn's user guide.

    TYPE: bool DEFAULT: False

    data_groups

    an array holding the group index or name for each data point. The length of this array must be equal to the number of data points in the dataset.

    TYPE: Optional[Sequence] DEFAULT: None

    kwargs

    Additional keyword arguments to pass to the Dataset constructor.

    DEFAULT: {}

    RETURNS DESCRIPTION GroupedDataset

    Dataset with the selected sklearn data

    Source code in src/pydvl/utils/dataset.py
    @classmethod\ndef from_sklearn(\ncls,\ndata: Bunch,\ntrain_size: float = 0.8,\nrandom_state: Optional[int] = None,\nstratify_by_target: bool = False,\ndata_groups: Optional[Sequence] = None,\n**kwargs,\n) -> \"GroupedDataset\":\n\"\"\"Constructs a [GroupedDataset][pydvl.utils.GroupedDataset] object from a\n    [sklearn.utils.Bunch][sklearn.utils.Bunch] as returned by the `load_*` functions in\n    [scikit-learn toy datasets](https://scikit-learn.org/stable/datasets/toy_dataset.html) and groups\n    it.\n    ??? Example\n        ```pycon\n        >>> from sklearn.datasets import load_iris\n        >>> from pydvl.utils import GroupedDataset\n        >>> iris = load_iris()\n        >>> data_groups = iris.data[:, 0] // 0.5\n        >>> dataset = GroupedDataset.from_sklearn(iris, data_groups=data_groups)\n        ```\n    Args:\n        data: scikit-learn Bunch object. The following attributes are supported:\n            - `data`: covariates.\n            - `target`: target variables (labels).\n            - `feature_names` (**optional**): the feature names.\n            - `target_names` (**optional**): the target names.\n            - `DESCR` (**optional**): a description.\n        train_size: size of the training dataset. Used in `train_test_split`.\n        random_state: seed for train / test split.\n        stratify_by_target: If `True`, data is split in a stratified\n            fashion, using the target variable as labels. Read more in\n            [sklearn's user guide](https://scikit-learn.org/stable/modules/cross_validation.html#stratification).\n        data_groups: an array holding the group index or name for each\n            data point. The length of this array must be equal to the number of\n            data points in the dataset.\n        kwargs: Additional keyword arguments to pass to the\n            [Dataset][pydvl.utils.Dataset] constructor.\n    Returns:\n        Dataset with the selected sklearn data\n    \"\"\"\nif data_groups is None:\nraise ValueError(\n\"data_groups must be provided when constructing a GroupedDataset\"\n)\nx_train, x_test, y_train, y_test, data_groups_train, _ = train_test_split(\ndata.data,\ndata.target,\ndata_groups,\ntrain_size=train_size,\nrandom_state=random_state,\nstratify=data.target if stratify_by_target else None,\n)\ndataset = Dataset(\nx_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, **kwargs\n)\nreturn cls.from_dataset(dataset, data_groups_train)  # type: ignore\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.from_arrays","title":"from_arrays(X, y, train_size=0.8, random_state=None, stratify_by_target=False, data_groups=None, **kwargs) classmethod","text":"

    Constructs a GroupedDataset object from X and y numpy arrays as returned by the make_* functions in scikit-learn generated datasets.

    Example
    >>> from sklearn.datasets import make_classification\n>>> from pydvl.utils import GroupedDataset\n>>> X, y = make_classification(\n...     n_samples=100,\n...     n_features=4,\n...     n_informative=2,\n...     n_redundant=0,\n...     random_state=0,\n...     shuffle=False\n... )\n>>> data_groups = X[:, 0] // 0.5\n>>> dataset = GroupedDataset.from_arrays(X, y, data_groups=data_groups)\n
    PARAMETER DESCRIPTION X

    array of shape (n_samples, n_features)

    TYPE: NDArray

    y

    array of shape (n_samples,)

    TYPE: NDArray

    train_size

    size of the training dataset. Used in train_test_split.

    TYPE: float DEFAULT: 0.8

    random_state

    seed for train / test split.

    TYPE: Optional[int] DEFAULT: None

    stratify_by_target

    If True, data is split in a stratified fashion, using the y variable as labels. Read more in sklearn's user guide.

    TYPE: bool DEFAULT: False

    data_groups

    an array holding the group index or name for each data point. The length of this array must be equal to the number of data points in the dataset.

    TYPE: Optional[Sequence] DEFAULT: None

    kwargs

    Additional keyword arguments that will be passed to the Dataset constructor.

    DEFAULT: {}

    RETURNS DESCRIPTION Dataset

    Dataset with the passed X and y arrays split across training and test sets.

    New in version 0.4.0

    Changed in version 0.6.0

    Added kwargs to pass to the Dataset constructor.

    Source code in src/pydvl/utils/dataset.py
    @classmethod\ndef from_arrays(\ncls,\nX: NDArray,\ny: NDArray,\ntrain_size: float = 0.8,\nrandom_state: Optional[int] = None,\nstratify_by_target: bool = False,\ndata_groups: Optional[Sequence] = None,\n**kwargs,\n) -> \"Dataset\":\n\"\"\"Constructs a [GroupedDataset][pydvl.utils.GroupedDataset] object from X and y numpy arrays\n    as returned by the `make_*` functions in\n    [scikit-learn generated datasets](https://scikit-learn.org/stable/datasets/sample_generators.html).\n    ??? Example\n        ```pycon\n        >>> from sklearn.datasets import make_classification\n        >>> from pydvl.utils import GroupedDataset\n        >>> X, y = make_classification(\n        ...     n_samples=100,\n        ...     n_features=4,\n        ...     n_informative=2,\n        ...     n_redundant=0,\n        ...     random_state=0,\n        ...     shuffle=False\n        ... )\n        >>> data_groups = X[:, 0] // 0.5\n        >>> dataset = GroupedDataset.from_arrays(X, y, data_groups=data_groups)\n        ```\n    Args:\n        X: array of shape (n_samples, n_features)\n        y: array of shape (n_samples,)\n        train_size: size of the training dataset. Used in `train_test_split`.\n        random_state: seed for train / test split.\n        stratify_by_target: If `True`, data is split in a stratified\n            fashion, using the y variable as labels. Read more in\n            [sklearn's user guide](https://scikit-learn.org/stable/modules/cross_validation.html#stratification).\n        data_groups: an array holding the group index or name for each data\n            point. The length of this array must be equal to the number of\n            data points in the dataset.\n        kwargs: Additional keyword arguments that will be passed to the\n            [Dataset][pydvl.utils.Dataset] constructor.\n    Returns:\n        Dataset with the passed X and y arrays split across training and\n            test sets.\n    !!! tip \"New in version 0.4.0\"\n    !!! tip \"Changed in version 0.6.0\"\n        Added kwargs to pass to the [Dataset][pydvl.utils.Dataset] constructor.\n    \"\"\"\nif data_groups is None:\nraise ValueError(\n\"data_groups must be provided when constructing a GroupedDataset\"\n)\nx_train, x_test, y_train, y_test, data_groups_train, _ = train_test_split(\nX,\ny,\ndata_groups,\ntrain_size=train_size,\nrandom_state=random_state,\nstratify=y if stratify_by_target else None,\n)\ndataset = Dataset(\nx_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, **kwargs\n)\nreturn cls.from_dataset(dataset, data_groups_train)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.from_dataset","title":"from_dataset(dataset, data_groups) classmethod","text":"

    Creates a GroupedDataset object from the data a Dataset object and a mapping of data groups.

    Example
    >>> import numpy as np\n>>> from pydvl.utils import Dataset, GroupedDataset\n>>> dataset = Dataset.from_arrays(\n...     X=np.asarray([[1, 2], [3, 4], [5, 6], [7, 8]]),\n...     y=np.asarray([0, 1, 0, 1]),\n... )\n>>> dataset = GroupedDataset.from_dataset(dataset, data_groups=[0, 0, 1, 1])\n
    PARAMETER DESCRIPTION dataset

    The original data.

    TYPE: Dataset

    data_groups

    An array holding the group index or name for each data point. The length of this array must be equal to the number of data points in the dataset.

    TYPE: Sequence[Any]

    RETURNS DESCRIPTION GroupedDataset

    A GroupedDataset with the initial Dataset grouped by data_groups.

    Source code in src/pydvl/utils/dataset.py
    @classmethod\ndef from_dataset(\ncls, dataset: Dataset, data_groups: Sequence[Any]\n) -> \"GroupedDataset\":\n\"\"\"Creates a [GroupedDataset][pydvl.utils.GroupedDataset] object from the data a\n    [Dataset][pydvl.utils.Dataset] object and a mapping of data groups.\n    ??? Example\n        ```pycon\n        >>> import numpy as np\n        >>> from pydvl.utils import Dataset, GroupedDataset\n        >>> dataset = Dataset.from_arrays(\n        ...     X=np.asarray([[1, 2], [3, 4], [5, 6], [7, 8]]),\n        ...     y=np.asarray([0, 1, 0, 1]),\n        ... )\n        >>> dataset = GroupedDataset.from_dataset(dataset, data_groups=[0, 0, 1, 1])\n        ```\n    Args:\n        dataset: The original data.\n        data_groups: An array holding the group index or name for each data\n            point. The length of this array must be equal to the number of\n            data points in the dataset.\n    Returns:\n        A [GroupedDataset][pydvl.utils.GroupedDataset] with the initial\n            [Dataset][pydvl.utils.Dataset] grouped by data_groups.\n    \"\"\"\nreturn cls(\nx_train=dataset.x_train,\ny_train=dataset.y_train,\nx_test=dataset.x_test,\ny_test=dataset.y_test,\ndata_groups=data_groups,\nfeature_names=dataset.feature_names,\ntarget_names=dataset.target_names,\ndescription=dataset.description,\n)\n
    "},{"location":"api/pydvl/utils/functional/","title":"Functional","text":"

    Supporting utilities for manipulating arguments of functions.

    "},{"location":"api/pydvl/utils/functional/#pydvl.utils.functional.free_arguments","title":"free_arguments(fun)","text":"

    Computes the set of free arguments for a function or functools.partial object.

    All arguments of a function are considered free unless they are set by a partial. For example, if f = partial(g, a=1), then a is not a free argument of f.

    PARAMETER DESCRIPTION fun

    A callable or a [partial object][].

    TYPE: Union[Callable, partial]

    RETURNS DESCRIPTION Set[str]

    The set of free arguments of fun.

    New in version 0.7.0

    Source code in src/pydvl/utils/functional.py
    def free_arguments(fun: Union[Callable, partial]) -> Set[str]:\n\"\"\"Computes the set of free arguments for a function or\n    [functools.partial][] object.\n    All arguments of a function are considered free unless they are set by a\n    partial. For example, if `f = partial(g, a=1)`, then `a` is not a free\n    argument of `f`.\n    Args:\n        fun: A callable or a [partial object][].\n    Returns:\n        The set of free arguments of `fun`.\n    !!! tip \"New in version 0.7.0\"\n    \"\"\"\nargs_set_by_partial: Set[str] = set()\ndef _rec_unroll_partial_function_args(g: Union[Callable, partial]) -> Callable:\n\"\"\"Stores arguments and recursively call itself if `g` is a\n        [functools.partial][] object. In the end, returns the initially wrapped\n        function.\n        This handles the construct `partial(_accept_additional_argument, *args,\n        **kwargs)` that is used by `maybe_add_argument`.\n        Args:\n            g: A partial or a function to unroll.\n        Returns:\n            Initial wrapped function.\n        \"\"\"\nnonlocal args_set_by_partial\nif isinstance(g, partial) and g.func == _accept_additional_argument:\narg = g.keywords[\"arg\"]\nif arg in args_set_by_partial:\nargs_set_by_partial.remove(arg)\nreturn _rec_unroll_partial_function_args(g.keywords[\"fun\"])\nelif isinstance(g, partial):\nargs_set_by_partial.update(g.keywords.keys())\nargs_set_by_partial.update(g.args)\nreturn _rec_unroll_partial_function_args(g.func)\nelse:\nreturn g\nwrapped_fn = _rec_unroll_partial_function_args(fun)\nsig = inspect.signature(wrapped_fn)\nreturn args_set_by_partial | set(sig.parameters.keys())\n
    "},{"location":"api/pydvl/utils/functional/#pydvl.utils.functional.maybe_add_argument","title":"maybe_add_argument(fun, new_arg)","text":"

    Wraps a function to accept the given keyword parameter if it doesn't already.

    If fun already takes a keyword parameter of name new_arg, then it is returned as is. Otherwise, a wrapper is returned which merely ignores the argument.

    PARAMETER DESCRIPTION fun

    The function to wrap

    TYPE: Callable

    new_arg

    The name of the argument that the new function will accept (and ignore).

    TYPE: str

    RETURNS DESCRIPTION Callable

    A new function accepting one more keyword argument.

    Changed in version 0.7.0

    Ability to work with partials.

    Source code in src/pydvl/utils/functional.py
    def maybe_add_argument(fun: Callable, new_arg: str) -> Callable:\n\"\"\"Wraps a function to accept the given keyword parameter if it doesn't\n    already.\n    If `fun` already takes a keyword parameter of name `new_arg`, then it is\n    returned as is. Otherwise, a wrapper is returned which merely ignores the\n    argument.\n    Args:\n        fun: The function to wrap\n        new_arg: The name of the argument that the new function will accept\n            (and ignore).\n    Returns:\n        A new function accepting one more keyword argument.\n    !!! tip \"Changed in version 0.7.0\"\n        Ability to work with partials.\n    \"\"\"\nif new_arg in free_arguments(fun):\nreturn fun\nreturn partial(_accept_additional_argument, fun=fun, arg=new_arg)\n
    "},{"location":"api/pydvl/utils/numeric/","title":"Numeric","text":"

    This module contains routines for numerical computations used across the library.

    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.powerset","title":"powerset(s)","text":"

    Returns an iterator for the power set of the argument.

    Subsets are generated in sequence by growing size. See random_powerset() for random sampling.

    Example
    >>> import numpy as np\n>>> from pydvl.utils.numeric import powerset\n>>> list(powerset(np.array((1,2))))\n[(), (1,), (2,), (1, 2)]\n
    PARAMETER DESCRIPTION s

    The set to use

    TYPE: NDArray[T]

    RETURNS DESCRIPTION Iterator[Collection[T]]

    An iterator over all subsets of the set of indices s.

    Source code in src/pydvl/utils/numeric.py
    def powerset(s: NDArray[T]) -> Iterator[Collection[T]]:\n\"\"\"Returns an iterator for the power set of the argument.\n     Subsets are generated in sequence by growing size. See\n     [random_powerset()][pydvl.utils.numeric.random_powerset] for random\n     sampling.\n    ??? Example\n        ``` pycon\n        >>> import numpy as np\n        >>> from pydvl.utils.numeric import powerset\n        >>> list(powerset(np.array((1,2))))\n        [(), (1,), (2,), (1, 2)]\n        ```\n    Args:\n         s: The set to use\n    Returns:\n        An iterator over all subsets of the set of indices `s`.\n    \"\"\"\nreturn chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.num_samples_permutation_hoeffding","title":"num_samples_permutation_hoeffding(eps, delta, u_range)","text":"

    Lower bound on the number of samples required for MonteCarlo Shapley to obtain an (\u03b5,\u03b4)-approximation.

    That is: with probability 1-\u03b4, the estimated value for one data point will be \u03b5-close to the true quantity, if at least this many permutations are sampled.

    PARAMETER DESCRIPTION eps

    \u03b5 > 0

    TYPE: float

    delta

    0 < \u03b4 <= 1

    TYPE: float

    u_range

    Range of the Utility function

    TYPE: float

    RETURNS DESCRIPTION int

    Number of permutations required to guarantee \u03b5-correct Shapley values with probability 1-\u03b4

    Source code in src/pydvl/utils/numeric.py
    def num_samples_permutation_hoeffding(eps: float, delta: float, u_range: float) -> int:\n\"\"\"Lower bound on the number of samples required for MonteCarlo Shapley to\n    obtain an (\u03b5,\u03b4)-approximation.\n    That is: with probability 1-\u03b4, the estimated value for one data point will\n    be \u03b5-close to the true quantity, if at least this many permutations are\n    sampled.\n    Args:\n        eps: \u03b5 > 0\n        delta: 0 < \u03b4 <= 1\n        u_range: Range of the [Utility][pydvl.utils.utility.Utility] function\n    Returns:\n        Number of _permutations_ required to guarantee \u03b5-correct Shapley\n            values with probability 1-\u03b4\n    \"\"\"\nreturn int(np.ceil(np.log(2 / delta) * 2 * u_range**2 / eps**2))\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.random_subset","title":"random_subset(s, q=0.5, seed=None)","text":"

    Returns one subset at random from s.

    PARAMETER DESCRIPTION s

    set to sample from

    TYPE: NDArray[T]

    q

    Sampling probability for elements. The default 0.5 yields a uniform distribution over the power set of s.

    TYPE: float DEFAULT: 0.5

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION NDArray[T]

    The subset

    Source code in src/pydvl/utils/numeric.py
    def random_subset(\ns: NDArray[T], q: float = 0.5, seed: Optional[Seed] = None\n) -> NDArray[T]:\n\"\"\"Returns one subset at random from ``s``.\n    Args:\n        s: set to sample from\n        q: Sampling probability for elements. The default 0.5 yields a\n            uniform distribution over the power set of s.\n        seed: Either an instance of a numpy random number generator or a seed\n            for it.\n    Returns:\n        The subset\n    \"\"\"\nrng = np.random.default_rng(seed)\nselection = rng.uniform(size=len(s)) > q\nreturn s[selection]\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.random_powerset","title":"random_powerset(s, n_samples=None, q=0.5, seed=None)","text":"

    Samples subsets from the power set of the argument, without pre-generating all subsets and in no order.

    See powerset if you wish to deterministically generate all subsets.

    To generate subsets, len(s) Bernoulli draws with probability q are drawn. The default value of q = 0.5 provides a uniform distribution over the power set of s. Other choices can be used e.g. to implement owen_sampling_shapley.

    PARAMETER DESCRIPTION s

    set to sample from

    TYPE: NDArray[T]

    n_samples

    if set, stop the generator after this many steps. Defaults to np.iinfo(np.int32).max

    TYPE: Optional[int] DEFAULT: None

    q

    Sampling probability for elements. The default 0.5 yields a uniform distribution over the power set of s.

    TYPE: float DEFAULT: 0.5

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION Generator[NDArray[T], None, None]

    Samples from the power set of s.

    RAISES DESCRIPTION ValueError

    if the element sampling probability is not in [0,1]

    Source code in src/pydvl/utils/numeric.py
    def random_powerset(\ns: NDArray[T],\nn_samples: Optional[int] = None,\nq: float = 0.5,\nseed: Optional[Seed] = None,\n) -> Generator[NDArray[T], None, None]:\n\"\"\"Samples subsets from the power set of the argument, without\n    pre-generating all subsets and in no order.\n    See [powerset][pydvl.utils.numeric.powerset] if you wish to deterministically generate all subsets.\n    To generate subsets, `len(s)` Bernoulli draws with probability `q` are\n    drawn. The default value of `q = 0.5` provides a uniform distribution over\n    the power set of `s`. Other choices can be used e.g. to implement\n    [owen_sampling_shapley][pydvl.value.shapley.owen.owen_sampling_shapley].\n    Args:\n        s: set to sample from\n        n_samples: if set, stop the generator after this many steps.\n            Defaults to `np.iinfo(np.int32).max`\n        q: Sampling probability for elements. The default 0.5 yields a\n            uniform distribution over the power set of s.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        Samples from the power set of `s`.\n    Raises:\n        ValueError: if the element sampling probability is not in [0,1]\n    \"\"\"\nif q < 0 or q > 1:\nraise ValueError(\"Element sampling probability must be in [0,1]\")\nrng = np.random.default_rng(seed)\ntotal = 1\nif n_samples is None:\nn_samples = np.iinfo(np.int32).max\nwhile total <= n_samples:\nyield random_subset(s, q, seed=rng)\ntotal += 1\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.random_subset_of_size","title":"random_subset_of_size(s, size, seed=None)","text":"

    Samples a random subset of given size uniformly from the powerset of s.

    PARAMETER DESCRIPTION s

    Set to sample from

    TYPE: NDArray[T]

    size

    Size of the subset to generate

    TYPE: int

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION NDArray[T]

    The subset

    Raises ValueError: If size > len(s)

    Source code in src/pydvl/utils/numeric.py
    def random_subset_of_size(\ns: NDArray[T], size: int, seed: Optional[Seed] = None\n) -> NDArray[T]:\n\"\"\"Samples a random subset of given size uniformly from the powerset\n    of `s`.\n    Args:\n        s: Set to sample from\n        size: Size of the subset to generate\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        The subset\n    Raises\n        ValueError: If size > len(s)\n    \"\"\"\nif size > len(s):\nraise ValueError(\"Cannot sample subset larger than set\")\nrng = np.random.default_rng(seed)\nreturn rng.choice(s, size=size, replace=False)\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.random_matrix_with_condition_number","title":"random_matrix_with_condition_number(n, condition_number, seed=None)","text":"

    Constructs a square matrix with a given condition number.

    Taken from: https://gist.github.com/bstellato/23322fe5d87bb71da922fbc41d658079#file-random_mat_condition_number-py

    Also see: https://math.stackexchange.com/questions/1351616/condition-number-of-ata.

    PARAMETER DESCRIPTION n

    size of the matrix

    TYPE: int

    condition_number

    duh

    TYPE: float

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION NDArray

    An (n,n) matrix with the requested condition number.

    Source code in src/pydvl/utils/numeric.py
    def random_matrix_with_condition_number(\nn: int, condition_number: float, seed: Optional[Seed] = None\n) -> NDArray:\n\"\"\"Constructs a square matrix with a given condition number.\n    Taken from:\n    [https://gist.github.com/bstellato/23322fe5d87bb71da922fbc41d658079#file-random_mat_condition_number-py](\n    https://gist.github.com/bstellato/23322fe5d87bb71da922fbc41d658079#file-random_mat_condition_number-py)\n    Also see:\n    [https://math.stackexchange.com/questions/1351616/condition-number-of-ata](\n    https://math.stackexchange.com/questions/1351616/condition-number-of-ata).\n    Args:\n        n: size of the matrix\n        condition_number: duh\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        An (n,n) matrix with the requested condition number.\n    \"\"\"\nif n < 2:\nraise ValueError(\"Matrix size must be at least 2\")\nif condition_number <= 1:\nraise ValueError(\"Condition number must be greater than 1\")\nrng = np.random.default_rng(seed)\nlog_condition_number = np.log(condition_number)\nexp_vec = np.arange(\n-log_condition_number / 4.0,\nlog_condition_number * (n + 1) / (4 * (n - 1)),\nlog_condition_number / (2.0 * (n - 1)),\n)\nexp_vec = exp_vec[:n]\ns: np.ndarray = np.exp(exp_vec)\nS = np.diag(s)\nU, _ = np.linalg.qr((rng.uniform(size=(n, n)) - 5.0) * 200)\nV, _ = np.linalg.qr((rng.uniform(size=(n, n)) - 5.0) * 200)\nP: np.ndarray = U.dot(S).dot(V.T)\nP = P.dot(P.T)\nreturn P\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.running_moments","title":"running_moments(previous_avg, previous_variance, count, new_value)","text":"

    Uses Welford's algorithm to calculate the running average and variance of a set of numbers.

    See Welford's algorithm in wikipedia

    Warning

    This is not really using Welford's correction for numerical stability for the variance. (FIXME)

    Todo

    This could be generalised to arbitrary moments. See this paper

    PARAMETER DESCRIPTION previous_avg

    average value at previous step

    TYPE: float | NDArray[float_]

    previous_variance

    variance at previous step

    TYPE: float | NDArray[float_]

    count

    number of points seen so far

    TYPE: int

    new_value

    new value in the series of numbers

    TYPE: float | NDArray[float_]

    RETURNS DESCRIPTION Tuple[float | NDArray[float_], float | NDArray[float_]]

    new_average, new_variance, calculated with the new count

    Source code in src/pydvl/utils/numeric.py
    def running_moments(\nprevious_avg: float | NDArray[np.float_],\nprevious_variance: float | NDArray[np.float_],\ncount: int,\nnew_value: float | NDArray[np.float_],\n) -> Tuple[float | NDArray[np.float_], float | NDArray[np.float_]]:\n\"\"\"Uses Welford's algorithm to calculate the running average and variance of\n     a set of numbers.\n    See [Welford's algorithm in wikipedia](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm)\n    !!! Warning\n        This is not really using Welford's correction for numerical stability\n        for the variance. (FIXME)\n    !!! Todo\n        This could be generalised to arbitrary moments. See [this paper](https://www.osti.gov/biblio/1028931)\n    Args:\n        previous_avg: average value at previous step\n        previous_variance: variance at previous step\n        count: number of points seen so far\n        new_value: new value in the series of numbers\n    Returns:\n        new_average, new_variance, calculated with the new count\n    \"\"\"\n# broadcasted operations seem not to be supported by mypy, so we ignore the type\nnew_average = (new_value + count * previous_avg) / (count + 1)  # type: ignore\nnew_variance = previous_variance + (\n(new_value - previous_avg) * (new_value - new_average) - previous_variance\n) / (count + 1)\nreturn new_average, new_variance\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.top_k_value_accuracy","title":"top_k_value_accuracy(y_true, y_pred, k=3)","text":"

    Computes the top-k accuracy for the estimated values by comparing indices of the highest k values.

    PARAMETER DESCRIPTION y_true

    Exact/true value

    TYPE: NDArray[float_]

    y_pred

    Predicted/estimated value

    TYPE: NDArray[float_]

    k

    Number of the highest values taken into account

    TYPE: int DEFAULT: 3

    RETURNS DESCRIPTION float

    Accuracy

    Source code in src/pydvl/utils/numeric.py
    def top_k_value_accuracy(\ny_true: NDArray[np.float_], y_pred: NDArray[np.float_], k: int = 3\n) -> float:\n\"\"\"Computes the top-k accuracy for the estimated values by comparing indices\n    of the highest k values.\n    Args:\n        y_true: Exact/true value\n        y_pred: Predicted/estimated value\n        k: Number of the highest values taken into account\n    Returns:\n        Accuracy\n    \"\"\"\ntop_k_exact_values = np.argsort(y_true)[-k:]\ntop_k_pred_values = np.argsort(y_pred)[-k:]\ntop_k_accuracy = len(np.intersect1d(top_k_exact_values, top_k_pred_values)) / k\nreturn top_k_accuracy\n
    "},{"location":"api/pydvl/utils/parallel/","title":"Parallel","text":""},{"location":"api/pydvl/utils/parallel/#pydvl.utils.parallel--this-module-is-deprecated","title":"This module is deprecated","text":"

    Redirects

    Imports from this module will be redirected to pydvl.parallel only until v0.9.0. Please update your imports.

    "},{"location":"api/pydvl/utils/progress/","title":"Progress","text":"

    Warning

    This module is deprecated and will be removed in a future release. It implements a wrapper for the tqdm progress bar iterator for easy toggling, but this functionality is already provided by the disable argument of tqdm.

    "},{"location":"api/pydvl/utils/progress/#pydvl.utils.progress.MockProgress","title":"MockProgress(iterator)","text":"

    Bases: Iterator

    A Naive mock class to use with maybe_progress and tqdm. Mocked methods don't support return values. Mocked properties don't do anything

    Source code in src/pydvl/utils/progress.py
    def __init__(self, iterator: Union[Iterator, Iterable]):\n# Since there is no _it in __dict__ at this point, doing here\n# self._it = iterator\n# results in a call to __getattr__() and the assignment fails, so we\n# use __dict__ instead\nself.__dict__[\"_it\"] = iterator\n
    "},{"location":"api/pydvl/utils/progress/#pydvl.utils.progress.maybe_progress","title":"maybe_progress(it, display=False, **kwargs)","text":"

    Returns either a tqdm progress bar or a mock object which wraps the iterator as well, but ignores any accesses to methods or properties.

    PARAMETER DESCRIPTION it

    the iterator to wrap

    TYPE: Union[int, Iterable, Iterator]

    display

    set to True to return a tqdm bar

    TYPE: bool DEFAULT: False

    kwargs

    Keyword arguments that will be forwarded to tqdm

    DEFAULT: {}

    Source code in src/pydvl/utils/progress.py
    def maybe_progress(\nit: Union[int, Iterable, Iterator], display: bool = False, **kwargs\n) -> Union[tqdm, MockProgress]:\n\"\"\"Returns either a tqdm progress bar or a mock object which wraps the\n    iterator as well, but ignores any accesses to methods or properties.\n    Args:\n        it: the iterator to wrap\n        display: set to True to return a tqdm bar\n        kwargs: Keyword arguments that will be forwarded to tqdm\n    \"\"\"\nif isinstance(it, int):\nit = range(it)  # type: ignore\nreturn tqdm(it, **kwargs) if display else MockProgress(it)\n
    "},{"location":"api/pydvl/utils/score/","title":"Score","text":"

    This module provides a Scorer class that wraps scoring functions with additional information.

    Scorers are the fundamental building block of many data valuation methods. They are typically used by the Utility class to evaluate the quality of a model when trained on subsets of the training data.

    Scorers can be constructed in the same way as in scikit-learn: either from known strings or from a callable. Greater values must be better. If they are not, a negated version can be used, see scikit-learn's make_scorer().

    Scorer provides additional information about the scoring function, like its range and default values, which can be used by some data valuation methods (like group_testing_shapley()) to estimate the number of samples required for a certain quality of approximation.

    "},{"location":"api/pydvl/utils/score/#pydvl.utils.score.squashed_r2","title":"squashed_r2 = compose_score(Scorer('r2'), _sigmoid, (0, 1), 'squashed r2') module-attribute","text":"

    A scorer that squashes the R\u00b2 score into the range [0, 1] using a sigmoid.

    "},{"location":"api/pydvl/utils/score/#pydvl.utils.score.squashed_variance","title":"squashed_variance = compose_score(Scorer('explained_variance'), _sigmoid, (0, 1), 'squashed explained variance') module-attribute","text":"

    A scorer that squashes the explained variance score into the range [0, 1] using a sigmoid.

    "},{"location":"api/pydvl/utils/score/#pydvl.utils.score.ScorerCallable","title":"ScorerCallable","text":"

    Bases: Protocol

    Signature for a scorer

    "},{"location":"api/pydvl/utils/score/#pydvl.utils.score.Scorer","title":"Scorer(scoring, default=np.nan, range=(-np.inf, np.inf), name=None)","text":"

    A scoring callable that takes a model, data, and labels and returns a scalar.

    PARAMETER DESCRIPTION scoring

    Either a string or callable that can be passed to get_scorer.

    TYPE: Union[str, ScorerCallable]

    default

    score to be used when a model cannot be fit, e.g. when too little data is passed, or errors arise.

    TYPE: float DEFAULT: nan

    range

    numerical range of the score function. Some Monte Carlo methods can use this to estimate the number of samples required for a certain quality of approximation. If not provided, it can be read from the scoring object if it provides it, for instance if it was constructed with compose_score().

    TYPE: Tuple DEFAULT: (-inf, inf)

    name

    The name of the scorer. If not provided, the name of the function passed will be used.

    TYPE: Optional[str] DEFAULT: None

    New in version 0.5.0

    Source code in src/pydvl/utils/score.py
    def __init__(\nself,\nscoring: Union[str, ScorerCallable],\ndefault: float = np.nan,\nrange: Tuple = (-np.inf, np.inf),\nname: Optional[str] = None,\n):\nif name is None and isinstance(scoring, str):\nname = scoring\nself._scorer = get_scorer(scoring)\nself.default = default\n# TODO: auto-fill from known scorers ?\nself.range = np.array(range)\nself._name = getattr(self._scorer, \"__name__\", name or \"scorer\")\n
    "},{"location":"api/pydvl/utils/score/#pydvl.utils.score.compose_score","title":"compose_score(scorer, transformation, range, name)","text":"

    Composes a scoring function with an arbitrary scalar transformation.

    Useful to squash unbounded scores into ranges manageable by data valuation methods.

    Example:

    sigmoid = lambda x: 1/(1+np.exp(-x))\ncompose_score(Scorer(\"r2\"), sigmoid, range=(0,1), name=\"squashed r2\")\n
    PARAMETER DESCRIPTION scorer

    The object to be composed.

    TYPE: Scorer

    transformation

    A scalar transformation

    TYPE: Callable[[float], float]

    range

    The range of the transformation. This will be used e.g. by Utility for the range of the composed.

    TYPE: Tuple[float, float]

    name

    A string representation for the composition, for str().

    TYPE: str

    RETURNS DESCRIPTION Scorer

    The composite Scorer.

    Source code in src/pydvl/utils/score.py
    def compose_score(\nscorer: Scorer,\ntransformation: Callable[[float], float],\nrange: Tuple[float, float],\nname: str,\n) -> Scorer:\n\"\"\"Composes a scoring function with an arbitrary scalar transformation.\n    Useful to squash unbounded scores into ranges manageable by data valuation\n    methods.\n    Example:\n    ```python\n    sigmoid = lambda x: 1/(1+np.exp(-x))\n    compose_score(Scorer(\"r2\"), sigmoid, range=(0,1), name=\"squashed r2\")\n    ```\n    Args:\n        scorer: The object to be composed.\n        transformation: A scalar transformation\n        range: The range of the transformation. This will be used e.g. by\n            [Utility][pydvl.utils.utility.Utility] for the range of the composed.\n        name: A string representation for the composition, for `str()`.\n    Returns:\n        The composite [Scorer][pydvl.utils.score.Scorer].\n    \"\"\"\nclass CompositeScorer(Scorer):\ndef __call__(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float:\nscore = self._scorer(model=model, X=X, y=y)\nreturn transformation(score)\nreturn CompositeScorer(scorer, range=range, name=name)\n
    "},{"location":"api/pydvl/utils/status/","title":"Status","text":""},{"location":"api/pydvl/utils/status/#pydvl.utils.status.Status","title":"Status","text":"

    Bases: Enum

    Status of a computation.

    Statuses can be combined using bitwise or (|) and bitwise and (&) to get the status of a combined computation. For example, if we have two computations, one that has converged and one that has failed, then the combined status is Status.Converged | Status.Failed == Status.Converged, but Status.Converged & Status.Failed == Status.Failed.

    "},{"location":"api/pydvl/utils/status/#pydvl.utils.status.Status--or","title":"OR","text":"

    The result of bitwise or-ing two valuation statuses with | is given by the following table:

    P C F P P C P C C C C F P C F

    where P = Pending, C = Converged, F = Failed.

    "},{"location":"api/pydvl/utils/status/#pydvl.utils.status.Status--and","title":"AND","text":"

    The result of bitwise and-ing two valuation statuses with & is given by the following table:

    P C F P P P F C P C F F F F F

    where P = Pending, C = Converged, F = Failed.

    "},{"location":"api/pydvl/utils/status/#pydvl.utils.status.Status--not","title":"NOT","text":"

    The result of bitwise negation of a Status with ~ is Failed if the status is Converged, or Converged otherwise:

    ~P == C, ~C == F, ~F == C\n
    "},{"location":"api/pydvl/utils/status/#pydvl.utils.status.Status--boolean-casting","title":"Boolean casting","text":"

    A Status evaluates to True iff it's Converged or Failed:

    bool(Status.Pending) == False\nbool(Status.Converged) == True\nbool(Status.Failed) == True\n

    Warning

    These truth values are inconsistent with the usual boolean operations. In particular the XOR of two instances of Status is not the same as the XOR of their boolean values.

    "},{"location":"api/pydvl/utils/types/","title":"Types","text":"

    This module contains types, protocols, decorators and generic function transformations. Some of it probably belongs elsewhere.

    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.SupervisedModel","title":"SupervisedModel","text":"

    Bases: Protocol

    This is the minimal Protocol that valuation methods require from models in order to work.

    All that is needed are the standard sklearn methods fit(), predict() and score().

    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.SupervisedModel.fit","title":"fit(x, y)","text":"

    Fit the model to the data

    PARAMETER DESCRIPTION x

    Independent variables

    TYPE: NDArray

    y

    Dependent variable

    TYPE: NDArray

    Source code in src/pydvl/utils/types.py
    def fit(self, x: NDArray, y: NDArray):\n\"\"\"Fit the model to the data\n    Args:\n        x: Independent variables\n        y: Dependent variable\n    \"\"\"\npass\n
    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.SupervisedModel.predict","title":"predict(x)","text":"

    Compute predictions for the input

    PARAMETER DESCRIPTION x

    Independent variables for which to compute predictions

    TYPE: NDArray

    RETURNS DESCRIPTION NDArray

    Predictions for the input

    Source code in src/pydvl/utils/types.py
    def predict(self, x: NDArray) -> NDArray:\n\"\"\"Compute predictions for the input\n    Args:\n        x: Independent variables for which to compute predictions\n    Returns:\n        Predictions for the input\n    \"\"\"\npass\n
    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.SupervisedModel.score","title":"score(x, y)","text":"

    Compute the score of the model given test data

    PARAMETER DESCRIPTION x

    Independent variables

    TYPE: NDArray

    y

    Dependent variable

    TYPE: NDArray

    RETURNS DESCRIPTION float

    The score of the model on (x, y)

    Source code in src/pydvl/utils/types.py
    def score(self, x: NDArray, y: NDArray) -> float:\n\"\"\"Compute the score of the model given test data\n    Args:\n        x: Independent variables\n        y: Dependent variable\n    Returns:\n        The score of the model on `(x, y)`\n    \"\"\"\npass\n
    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.NoPublicConstructor","title":"NoPublicConstructor","text":"

    Bases: ABCMeta

    Metaclass that ensures a private constructor

    If a class uses this metaclass like this:

    class SomeClass(metaclass=NoPublicConstructor):\n    pass\n

    If you try to instantiate your class (SomeClass()), a TypeError will be thrown.

    Taken almost verbatim from: https://stackoverflow.com/a/64682734

    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.NoPublicConstructor.create","title":"create(*args, **kwargs)","text":"

    Create an instance of the class

    Source code in src/pydvl/utils/types.py
    def create(cls, *args: Any, **kwargs: Any):\n\"\"\"Create an instance of the class\"\"\"\nreturn super().__call__(*args, **kwargs)\n
    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.ensure_seed_sequence","title":"ensure_seed_sequence(seed=None)","text":"

    If the passed seed is a SeedSequence object then it is returned as is. If it is a Generator the internal protected seed sequence from the generator gets extracted. Otherwise, a new SeedSequence object is created from the passed (optional) seed.

    PARAMETER DESCRIPTION seed

    Either an int, a Generator object a SeedSequence object or None.

    TYPE: Optional[Union[Seed, SeedSequence]] DEFAULT: None

    RETURNS DESCRIPTION SeedSequence

    A SeedSequence object.

    New in version 0.7.0

    Source code in src/pydvl/utils/types.py
    def ensure_seed_sequence(\nseed: Optional[Union[Seed, SeedSequence]] = None\n) -> SeedSequence:\n\"\"\"\n    If the passed seed is a SeedSequence object then it is returned as is. If it is\n    a Generator the internal protected seed sequence from the generator gets extracted.\n    Otherwise, a new SeedSequence object is created from the passed (optional) seed.\n    Args:\n        seed: Either an int, a Generator object a SeedSequence object or None.\n    Returns:\n        A SeedSequence object.\n    !!! tip \"New in version 0.7.0\"\n    \"\"\"\nif isinstance(seed, SeedSequence):\nreturn seed\nelif isinstance(seed, Generator):\nreturn cast(SeedSequence, seed.bit_generator.seed_seq)  # type: ignore\nelse:\nreturn SeedSequence(seed)\n
    "},{"location":"api/pydvl/utils/utility/","title":"Utility","text":"

    This module contains classes to manage and learn utility functions for the computation of values. Please see the documentation on Computing Data Values for more information.

    Utility holds information about model, data and scoring function (the latter being what one usually understands under utility in the general definition of Shapley value). It is automatically cached across machines when the cache is configured and it is enabled upon construction.

    DataUtilityLearning adds support for learning the scoring function to avoid repeated re-training of the model to compute the score.

    This module also contains derived Utility classes for toy games that are used for testing and for demonstration purposes.

    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility--references","title":"References","text":"
    1. Wang, T., Yang, Y. and Jia, R., 2021. Improving cooperative game theory-based data valuation via data utility learning. arXiv preprint arXiv:2107.06336.\u00a0\u21a9

    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.Utility","title":"Utility(model, data, scorer=None, *, default_score=0.0, score_range=(-np.inf, np.inf), catch_errors=True, show_warnings=False, enable_cache=False, cache_options=None, clone_before_fit=True)","text":"

    Convenience wrapper with configurable memoization of the scoring function.

    An instance of Utility holds the triple of model, dataset and scoring function which determines the value of data points. This is used for the computation of all game-theoretic values like Shapley values and the Least Core.

    The Utility expect the model to fulfill the SupervisedModel interface i.e. to have fit(), predict(), and score() methods.

    When calling the utility, the model will be cloned if it is a Sci-Kit Learn model, otherwise a copy is created using copy.deepcopy

    Since evaluating the scoring function requires retraining the model and that can be time-consuming, this class wraps it and caches the results of each execution. Caching is available both locally and across nodes, but must always be enabled for your project first, see Setting up the cache.

    ATTRIBUTE DESCRIPTION model

    The supervised model.

    TYPE: SupervisedModel

    data

    An object containing the split data.

    TYPE: Dataset

    scorer

    A scoring function. If None, the score() method of the model will be used. See score for ways to create and compose scorers, in particular how to set default values and ranges.

    TYPE: Scorer

    PARAMETER DESCRIPTION model

    Any supervised model. Typical choices can be found in the [sci-kit learn documentation][https://scikit-learn.org/stable/supervised_learning.html].

    TYPE: SupervisedModel

    data

    Dataset or GroupedDataset instance.

    TYPE: Dataset

    scorer

    A scoring object. If None, the score() method of the model will be used. See score for ways to create and compose scorers, in particular how to set default values and ranges. For convenience, a string can be passed, which will be used to construct a Scorer.

    TYPE: Optional[Union[str, Scorer]] DEFAULT: None

    default_score

    As a convenience when no scorer object is passed (where a default value can be provided), this argument also allows to set the default score for models that have not been fit, e.g. when too little data is passed, or errors arise.

    TYPE: float DEFAULT: 0.0

    score_range

    As with default_score, this is a convenience argument for when no scorer argument is provided, to set the numerical range of the score function. Some Monte Carlo methods can use this to estimate the number of samples required for a certain quality of approximation.

    TYPE: Tuple[float, float] DEFAULT: (-inf, inf)

    catch_errors

    set to True to catch the errors when fit() fails. This could happen in several steps of the pipeline, e.g. when too little training data is passed, which happens often during Shapley value calculations. When this happens, the default_score is returned as a score and computation continues.

    TYPE: bool DEFAULT: True

    show_warnings

    Set to False to suppress warnings thrown by fit().

    TYPE: bool DEFAULT: False

    enable_cache

    If True, use memcached for memoization.

    TYPE: bool DEFAULT: False

    cache_options

    Optional configuration object for memcached.

    TYPE: Optional[MemcachedConfig] DEFAULT: None

    clone_before_fit

    If True, the model will be cloned before calling fit().

    TYPE: bool DEFAULT: True

    Example
    >>> from pydvl.utils import Utility, DataUtilityLearning, Dataset\n>>> from sklearn.linear_model import LinearRegression, LogisticRegression\n>>> from sklearn.datasets import load_iris\n>>> dataset = Dataset.from_sklearn(load_iris(), random_state=16)\n>>> u = Utility(LogisticRegression(random_state=16), dataset)\n>>> u(dataset.indices)\n0.9\n
    Source code in src/pydvl/utils/utility.py
    def __init__(\nself,\nmodel: SupervisedModel,\ndata: Dataset,\nscorer: Optional[Union[str, Scorer]] = None,\n*,\ndefault_score: float = 0.0,\nscore_range: Tuple[float, float] = (-np.inf, np.inf),\ncatch_errors: bool = True,\nshow_warnings: bool = False,\nenable_cache: bool = False,\ncache_options: Optional[MemcachedConfig] = None,\nclone_before_fit: bool = True,\n):\nself.model = self._clone_model(model)\nself.data = data\nif isinstance(scorer, str):\nscorer = Scorer(scorer, default=default_score, range=score_range)\nself.scorer = check_scoring(self.model, scorer)\nself.default_score = scorer.default if scorer is not None else default_score\n# TODO: auto-fill from known scorers ?\nself.score_range = scorer.range if scorer is not None else np.array(score_range)\nself.catch_errors = catch_errors\nself.show_warnings = show_warnings\nself.enable_cache = enable_cache\nself.cache_options: MemcachedConfig = cache_options or MemcachedConfig()\nself.clone_before_fit = clone_before_fit\nself._signature = serialize((hash(self.model), hash(data), hash(scorer)))\nself._initialize_utility_wrapper()\n
    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.Utility.signature","title":"signature property","text":"

    Signature used for caching model results.

    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.Utility.cache_stats","title":"cache_stats: Optional[CacheStats] property","text":"

    Cache statistics are gathered when cache is enabled. See CacheStats for all fields returned.

    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.Utility.__call__","title":"__call__(indices)","text":"PARAMETER DESCRIPTION indices

    a subset of valid indices for the x_train attribute of Dataset.

    TYPE: Iterable[int]

    Source code in src/pydvl/utils/utility.py
    def __call__(self, indices: Iterable[int]) -> float:\n\"\"\"\n    Args:\n        indices: a subset of valid indices for the\n            `x_train` attribute of [Dataset][pydvl.utils.dataset.Dataset].\n    \"\"\"\nutility: float = self._utility_wrapper(frozenset(indices))\nreturn utility\n
    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.DataUtilityLearning","title":"DataUtilityLearning(u, training_budget, model)","text":"

    Implementation of Data Utility Learning (Wang et al., 2022)1.

    This object wraps a Utility and delegates calls to it, up until a given budget (number of iterations). Every tuple of input and output (a so-called utility sample) is stored. Once the budget is exhausted, DataUtilityLearning fits the given model to the utility samples. Subsequent calls will use the learned model to predict the utility instead of delegating.

    PARAMETER DESCRIPTION u

    The Utility to learn.

    TYPE: Utility

    training_budget

    Number of utility samples to collect before fitting the given model.

    TYPE: int

    model

    A supervised regression model

    TYPE: SupervisedModel

    Example
    >>> from pydvl.utils import Utility, DataUtilityLearning, Dataset\n>>> from sklearn.linear_model import LinearRegression, LogisticRegression\n>>> from sklearn.datasets import load_iris\n>>> dataset = Dataset.from_sklearn(load_iris())\n>>> u = Utility(LogisticRegression(), dataset)\n>>> wrapped_u = DataUtilityLearning(u, 3, LinearRegression())\n... # First 3 calls will be computed normally\n>>> for i in range(3):\n...     _ = wrapped_u((i,))\n>>> wrapped_u((1, 2, 3)) # Subsequent calls will be computed using the fit model for DUL\n0.0\n
    Source code in src/pydvl/utils/utility.py
    def __init__(\nself, u: Utility, training_budget: int, model: SupervisedModel\n) -> None:\nself.utility = u\nself.training_budget = training_budget\nself.model = model\nself._current_iteration = 0\nself._is_model_fit = False\nself._utility_samples: Dict[FrozenSet, Tuple[NDArray[np.bool_], float]] = {}\n
    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.DataUtilityLearning.data","title":"data: Dataset property","text":"

    Returns the wrapped utility's Dataset.

    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.MinerGameUtility","title":"MinerGameUtility(n_miners, **kwargs)","text":"

    Bases: Utility

    Toy game utility that is used for testing and demonstration purposes.

    Consider a group of n miners, who have discovered large bars of gold.

    If two miners can carry one piece of gold, then the payoff of a coalition \\(S\\) is:

    \\[{ v(S) = \\left\\{\\begin{array}{lll} \\mid S \\mid / 2 & \\text{, if} & \\mid S \\mid \\text{ is even} \\\\ ( \\mid S \\mid - 1)/2 & \\text{, if} & \\mid S \\mid \\text{ is odd} \\end{array}\\right. }\\]

    If there are more than two miners and there is an even number of miners, then the core consists of the single payoff where each miner gets 1/2.

    If there is an odd number of miners, then the core is empty.

    Taken from Wikipedia

    PARAMETER DESCRIPTION n_miners

    Number of miners that participate in the game.

    TYPE: int

    Source code in src/pydvl/utils/utility.py
    def __init__(self, n_miners: int, **kwargs):\nif n_miners <= 2:\nraise ValueError(f\"n_miners, {n_miners} should be > 2\")\nself.n_miners = n_miners\nx = np.arange(n_miners)[..., np.newaxis]\n# The y values don't matter here\ny = np.zeros_like(x)\nself.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y)\n
    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.GlovesGameUtility","title":"GlovesGameUtility(left, right, **kwargs)","text":"

    Bases: Utility

    Toy game utility that is used for testing and demonstration purposes.

    In this game, some players have a left glove and others a right glove. Single gloves have a worth of zero while pairs have a worth of 1.

    The payoff of a coalition \\(S\\) is:

    \\[{ v(S) = \\min( \\mid S \\cap L \\mid, \\mid S \\cap R \\mid ) }\\]

    Where \\(L\\), respectively \\(R\\), is the set of players with left gloves, respectively right gloves.

    PARAMETER DESCRIPTION left

    Number of players with a left glove.

    TYPE: int

    right

    Number of player with a right glove.

    TYPE: int

    Source code in src/pydvl/utils/utility.py
    def __init__(self, left: int, right: int, **kwargs):\nself.left = left\nself.right = right\nx = np.empty(left + right)[..., np.newaxis]\n# The y values don't matter here\ny = np.zeros_like(x)\nself.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y)\n
    "},{"location":"api/pydvl/value/","title":"Value","text":"

    This module implements algorithms for the exact and approximate computation of values and semi-values.

    See Data valuation for an introduction to the concepts and methods implemented here.

    "},{"location":"api/pydvl/value/result/","title":"Result","text":"

    This module collects types and methods for the inspection of the results of valuation algorithms.

    The most important class is ValuationResult, which provides access to raw values, as well as convenient behaviour as a Sequence with extended indexing and updating abilities, and conversion to pandas DataFrames.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result--operating-on-results","title":"Operating on results","text":"

    Results can be added together with the standard + operator. Because values are typically running averages of iterative algorithms, addition behaves like a weighted average of the two results, with the weights being the number of updates in each result: adding two results is the same as generating one result with the mean of the values of the two results as values. The variances are updated accordingly. See ValuationResult for details.

    Results can also be sorted by value, variance or number of updates, see sort(). The arrays of ValuationResult.values, ValuationResult.variances, ValuationResult.counts, ValuationResult.indices, ValuationResult.names are sorted in the same way.

    Indexing and slicing of results is supported and ValueItem objects are returned. These objects can be compared with the usual operators, which take only the ValueItem.value into account.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result--creating-result-objects","title":"Creating result objects","text":"

    The most commonly used factory method is ValuationResult.zeros(), which creates a result object with all values, variances and counts set to zero. ValuationResult.empty() creates an empty result object, which can be used as a starting point for adding results together. Empty results are discarded when added to other results. Finally, ValuationResult.from_random() samples random values uniformly.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValueItem","title":"ValueItem dataclass","text":"

    Bases: Generic[IndexT, NameT]

    The result of a value computation for one datum.

    ValueItems can be compared with the usual operators, forming a total order. Comparisons take only the value into account.

    Todo

    Maybe have a mode of comparing similar to np.isclose, or taking the variance into account.

    ATTRIBUTE DESCRIPTION index

    Index of the sample with this value in the original Dataset

    TYPE: IndexT

    name

    Name of the sample if it was provided. Otherwise, str(index)

    TYPE: NameT

    value

    The value

    TYPE: float

    variance

    Variance of the value if it was computed with an approximate method

    TYPE: Optional[float]

    count

    Number of updates for this value

    TYPE: Optional[int]

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValueItem.stderr","title":"stderr: Optional[float] property","text":"

    Standard error of the value.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult","title":"ValuationResult(*, values, variances=None, counts=None, indices=None, data_names=None, algorithm='', status=Status.Pending, sort=False, **extra_values)","text":"

    Bases: Sequence, Iterable[ValueItem[IndexT, NameT]], Generic[IndexT, NameT]

    Objects of this class hold the results of valuation algorithms.

    These include indices in the original Dataset, any data names (e.g. group names in GroupedDataset), the values themselves, and variance of the computation in the case of Monte Carlo methods. ValuationResults can be iterated over like any Sequence: iter(valuation_result) returns a generator of ValueItem in the order in which the object is sorted.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult--indexing","title":"Indexing","text":"

    Indexing can be position-based, when accessing any of the attributes values, variances, counts and indices, as well as when iterating over the object, or using the item access operator, both getter and setter. The \"position\" is either the original sequence in which the data was passed to the constructor, or the sequence in which the object is sorted, see below.

    Alternatively, indexing can be data-based, i.e. using the indices in the original dataset. This is the case for the methods get() and update().

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult--sorting","title":"Sorting","text":"

    Results can be sorted in-place with sort(), or alternatively using python's standard sorted() and reversed() Note that sorting values affects how iterators and the object itself as Sequence behave: values[0] returns a ValueItem with the highest or lowest ranking point if this object is sorted by descending or ascending value, respectively. If unsorted, values[0] returns the ValueItem at position 0, which has data index indices[0] in the Dataset.

    The same applies to direct indexing of the ValuationResult: the index is positional, according to the sorting. It does not refer to the \"data index\". To sort according to data index, use sort() with key=\"index\".

    In order to access ValueItem objects by their data index, use get().

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult--operating-on-results","title":"Operating on results","text":"

    Results can be added to each other with the + operator. Means and variances are correctly updated, using the counts attribute.

    Results can also be updated with new values using update(). Means and variances are updated accordingly using the Welford algorithm.

    Empty objects behave in a special way, see empty().

    PARAMETER DESCRIPTION values

    An array of values. If omitted, defaults to an empty array or to an array of zeros if indices are given.

    TYPE: NDArray[float_]

    indices

    An optional array of indices in the original dataset. If omitted, defaults to np.arange(len(values)). Warning: It is common to pass the indices of a Dataset here. Attention must be paid in a parallel context to copy them to the local process. Just do indices=np.copy(data.indices).

    TYPE: Optional[NDArray[IndexT]] DEFAULT: None

    variances

    An optional array of variances in the computation of each value.

    TYPE: Optional[NDArray[float_]] DEFAULT: None

    counts

    An optional array with the number of updates for each value. Defaults to an array of ones.

    TYPE: Optional[NDArray[int_]] DEFAULT: None

    data_names

    Names for the data points. Defaults to index numbers if not set.

    TYPE: Optional[Sequence[NameT] | NDArray[NameT]] DEFAULT: None

    algorithm

    The method used.

    TYPE: str DEFAULT: ''

    status

    The end status of the algorithm.

    TYPE: Status DEFAULT: Pending

    sort

    Whether to sort the indices by ascending value. See above how this affects usage as an iterable or sequence.

    TYPE: bool DEFAULT: False

    extra_values

    Additional values that can be passed as keyword arguments. This can contain, for example, the least core value.

    DEFAULT: {}

    RAISES DESCRIPTION ValueError

    If input arrays have mismatching lengths.

    Source code in src/pydvl/value/result.py
    def __init__(\nself,\n*,\nvalues: NDArray[np.float_],\nvariances: Optional[NDArray[np.float_]] = None,\ncounts: Optional[NDArray[np.int_]] = None,\nindices: Optional[NDArray[IndexT]] = None,\ndata_names: Optional[Sequence[NameT] | NDArray[NameT]] = None,\nalgorithm: str = \"\",\nstatus: Status = Status.Pending,\nsort: bool = False,\n**extra_values,\n):\nif variances is not None and len(variances) != len(values):\nraise ValueError(\"Lengths of values and variances do not match\")\nif data_names is not None and len(data_names) != len(values):\nraise ValueError(\"Lengths of values and data_names do not match\")\nif indices is not None and len(indices) != len(values):\nraise ValueError(\"Lengths of values and indices do not match\")\nself._algorithm = algorithm\nself._status = Status(status)  # Just in case we are given a string\nself._values = values\nself._variances = np.zeros_like(values) if variances is None else variances\nself._counts = np.ones_like(values) if counts is None else counts\nself._sort_order = None\nself._extra_values = extra_values or {}\n# Yuk...\nif data_names is None:\nif indices is not None:\nself._names = np.copy(indices)\nelse:\nself._names = np.arange(len(self._values), dtype=np.int_)\nelif not isinstance(data_names, np.ndarray):\nself._names = np.array(data_names)\nelse:\nself._names = data_names.copy()\nif len(np.unique(self._names)) != len(self._names):\nraise ValueError(\"Data names must be unique\")\nif indices is None:\nindices = np.arange(len(self._values), dtype=np.int_)\nself._indices = indices\nself._positions = {idx: pos for pos, idx in enumerate(indices)}\nself._sort_positions: NDArray[np.int_] = np.arange(\nlen(self._values), dtype=np.int_\n)\nif sort:\nself.sort()\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.values","title":"values: NDArray[np.float_] property","text":"

    The values, possibly sorted.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.variances","title":"variances: NDArray[np.float_] property","text":"

    The variances, possibly sorted.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.stderr","title":"stderr: NDArray[np.float_] property","text":"

    The raw standard errors, possibly sorted.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.counts","title":"counts: NDArray[np.int_] property","text":"

    The raw counts, possibly sorted.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.indices","title":"indices: NDArray[IndexT] property","text":"

    The indices for the values, possibly sorted.

    If the object is unsorted, then these are the same as declared at construction or np.arange(len(values)) if none were passed.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.names","title":"names: NDArray[NameT] property","text":"

    The names for the values, possibly sorted. If the object is unsorted, then these are the same as declared at construction or np.arange(len(values)) if none were passed.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.sort","title":"sort(reverse=False, key='value')","text":"

    Sorts the indices in place by key.

    Once sorted, iteration over the results, and indexing of all the properties ValuationResult.values, ValuationResult.variances, ValuationResult.counts, ValuationResult.indices and ValuationResult.names will follow the same order.

    PARAMETER DESCRIPTION reverse

    Whether to sort in descending order by value.

    TYPE: bool DEFAULT: False

    key

    The key to sort by. Defaults to ValueItem.value.

    TYPE: Literal['value', 'variance', 'index', 'name'] DEFAULT: 'value'

    Source code in src/pydvl/value/result.py
    def sort(\nself,\nreverse: bool = False,\n# Need a \"Comparable\" type here\nkey: Literal[\"value\", \"variance\", \"index\", \"name\"] = \"value\",\n) -> None:\n\"\"\"Sorts the indices in place by `key`.\n    Once sorted, iteration over the results, and indexing of all the\n    properties\n    [ValuationResult.values][pydvl.value.result.ValuationResult.values],\n    [ValuationResult.variances][pydvl.value.result.ValuationResult.variances],\n    [ValuationResult.counts][pydvl.value.result.ValuationResult.counts],\n    [ValuationResult.indices][pydvl.value.result.ValuationResult.indices]\n    and [ValuationResult.names][pydvl.value.result.ValuationResult.names]\n    will follow the same order.\n    Args:\n        reverse: Whether to sort in descending order by value.\n        key: The key to sort by. Defaults to\n            [ValueItem.value][pydvl.value.result.ValueItem].\n    \"\"\"\nkeymap = {\n\"index\": \"_indices\",\n\"value\": \"_values\",\n\"variance\": \"_variances\",\n\"name\": \"_names\",\n}\nself._sort_positions = np.argsort(getattr(self, keymap[key]))\nif reverse:\nself._sort_positions = self._sort_positions[::-1]\nself._sort_order = reverse\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.__getattr__","title":"__getattr__(attr)","text":"

    Allows access to extra values as if they were properties of the instance.

    Source code in src/pydvl/value/result.py
    def __getattr__(self, attr: str) -> Any:\n\"\"\"Allows access to extra values as if they were properties of the instance.\"\"\"\n# This is here to avoid a RecursionError when copying or pickling the object\nif attr == \"_extra_values\":\nraise AttributeError()\ntry:\nreturn self._extra_values[attr]\nexcept KeyError as e:\nraise AttributeError(\nf\"{self.__class__.__name__} object has no attribute {attr}\"\n) from e\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.__iter__","title":"__iter__()","text":"

    Iterate over the results returning ValueItem objects. To sort in place before iteration, use sort().

    Source code in src/pydvl/value/result.py
    def __iter__(self) -> Iterator[ValueItem[IndexT, NameT]]:\n\"\"\"Iterate over the results returning [ValueItem][pydvl.value.result.ValueItem] objects.\n    To sort in place before iteration, use [sort()][pydvl.value.result.ValuationResult.sort].\n    \"\"\"\nfor pos in self._sort_positions:\nyield ValueItem(\nself._indices[pos],\nself._names[pos],\nself._values[pos],\nself._variances[pos],\nself._counts[pos],\n)\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.__add__","title":"__add__(other)","text":"

    Adds two ValuationResults.

    The values must have been computed with the same algorithm. An exception to this is if one argument has empty values, in which case the other argument is returned.

    Warning

    Abusing this will introduce numerical errors.

    Means and standard errors are correctly handled. Statuses are added with bit-wise &, see Status. data_names are taken from the left summand, or if unavailable from the right one. The algorithm string is carried over if both terms have the same one or concatenated.

    It is possible to add ValuationResults of different lengths, and with different or overlapping indices. The result will have the union of indices, and the values.

    Warning

    FIXME: Arbitrary extra_values aren't handled.

    Source code in src/pydvl/value/result.py
    def __add__(\nself, other: ValuationResult[IndexT, NameT]\n) -> ValuationResult[IndexT, NameT]:\n\"\"\"Adds two ValuationResults.\n    The values must have been computed with the same algorithm. An exception\n    to this is if one argument has empty values, in which case the other\n    argument is returned.\n    !!! Warning\n        Abusing this will introduce numerical errors.\n    Means and standard errors are correctly handled. Statuses are added with\n    bit-wise `&`, see [Status][pydvl.value.result.Status].\n    `data_names` are taken from the left summand, or if unavailable from\n    the right one. The `algorithm` string is carried over if both terms\n    have the same one or concatenated.\n    It is possible to add ValuationResults of different lengths, and with\n    different or overlapping indices. The result will have the union of\n    indices, and the values.\n    !!! Warning\n        FIXME: Arbitrary `extra_values` aren't handled.\n    \"\"\"\n# empty results\nif len(self.values) == 0:\nreturn other\nif len(other.values) == 0:\nreturn self\nself._check_compatible(other)\nindices = np.union1d(self._indices, other._indices).astype(self._indices.dtype)\nthis_pos = np.searchsorted(indices, self._indices)\nother_pos = np.searchsorted(indices, other._indices)\nn: NDArray[np.int_] = np.zeros_like(indices, dtype=int)\nm: NDArray[np.int_] = np.zeros_like(indices, dtype=int)\nxn: NDArray[np.int_] = np.zeros_like(indices, dtype=float)\nxm: NDArray[np.int_] = np.zeros_like(indices, dtype=float)\nvn: NDArray[np.int_] = np.zeros_like(indices, dtype=float)\nvm: NDArray[np.int_] = np.zeros_like(indices, dtype=float)\nn[this_pos] = self._counts\nxn[this_pos] = self._values\nvn[this_pos] = self._variances\nm[other_pos] = other._counts\nxm[other_pos] = other._values\nvm[other_pos] = other._variances\n# Sample mean of n+m samples from two means of n and m samples\nxnm = (n * xn + m * xm) / (n + m)\n# Sample variance of n+m samples from two sample variances of n and m samples\nvnm = (n * (vn + xn**2) + m * (vm + xm**2)) / (n + m) - xnm**2\nif np.any(vnm < 0):\nif np.any(vnm < -1e-6):\nlogger.warning(\n\"Numerical error in variance computation. \"\nf\"Negative sample variances clipped to 0 in {vnm}\"\n)\nvnm[np.where(vnm < 0)] = 0\n# Merging of names:\n# If an index has the same name in both results, it must be the same.\n# If an index has a name in one result but not the other, the name is\n# taken from the result with the name.\nif self._names.dtype != other._names.dtype:\nif np.can_cast(other._names.dtype, self._names.dtype, casting=\"safe\"):\nother._names = other._names.astype(self._names.dtype)\nlogger.warning(\nf\"Casting ValuationResult.names from {other._names.dtype} to {self._names.dtype}\"\n)\nelse:\nraise TypeError(\nf\"Cannot cast ValuationResult.names from \"\nf\"{other._names.dtype} to {self._names.dtype}\"\n)\nboth_pos = np.intersect1d(this_pos, other_pos)\nif len(both_pos) > 0:\nthis_names: NDArray = np.empty_like(indices, dtype=object)\nother_names: NDArray = np.empty_like(indices, dtype=object)\nthis_names[this_pos] = self._names\nother_names[other_pos] = other._names\nthis_shared_names = np.take(this_names, both_pos)\nother_shared_names = np.take(other_names, both_pos)\nif np.any(this_shared_names != other_shared_names):\nraise ValueError(f\"Mismatching names in ValuationResults\")\nnames = np.empty_like(indices, dtype=self._names.dtype)\nnames[this_pos] = self._names\nnames[other_pos] = other._names\nreturn ValuationResult(\nalgorithm=self.algorithm or other.algorithm or \"\",\nstatus=self.status & other.status,\nindices=indices,\nvalues=xnm,\nvariances=vnm,\ncounts=n + m,\ndata_names=names,\n# FIXME: What to do with extra_values? This is not commutative:\n# extra_values=self._extra_values.update(other._extra_values),\n)\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.update","title":"update(idx, new_value)","text":"

    Updates the result in place with a new value, using running mean and variance.

    PARAMETER DESCRIPTION idx

    Data index of the value to update.

    TYPE: int

    new_value

    New value to add to the result.

    TYPE: float

    RETURNS DESCRIPTION ValuationResult[IndexT, NameT]

    A reference to the same, modified result.

    RAISES DESCRIPTION IndexError

    If the index is not found.

    Source code in src/pydvl/value/result.py
    def update(self, idx: int, new_value: float) -> ValuationResult[IndexT, NameT]:\n\"\"\"Updates the result in place with a new value, using running mean\n    and variance.\n    Args:\n        idx: Data index of the value to update.\n        new_value: New value to add to the result.\n    Returns:\n        A reference to the same, modified result.\n    Raises:\n        IndexError: If the index is not found.\n    \"\"\"\ntry:\npos = self._positions[idx]\nexcept KeyError:\nraise IndexError(f\"Index {idx} not found in ValuationResult\")\nval, var = running_moments(\nself._values[pos], self._variances[pos], self._counts[pos], new_value\n)\nself[pos] = ValueItem(\nindex=cast(IndexT, idx),  # FIXME\nname=self._names[pos],\nvalue=val,\nvariance=var,\ncount=self._counts[pos] + 1,\n)\nreturn self\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.get","title":"get(idx)","text":"

    Retrieves a ValueItem by data index, as opposed to sort index, like the indexing operator.

    RAISES DESCRIPTION IndexError

    If the index is not found.

    Source code in src/pydvl/value/result.py
    def get(self, idx: Integral) -> ValueItem:\n\"\"\"Retrieves a ValueItem by data index, as opposed to sort index, like\n    the indexing operator.\n    Raises:\n         IndexError: If the index is not found.\n    \"\"\"\ntry:\npos = self._positions[idx]\nexcept KeyError:\nraise IndexError(f\"Index {idx} not found in ValuationResult\")\nreturn ValueItem(\nself._indices[pos],\nself._names[pos],\nself._values[pos],\nself._variances[pos],\nself._counts[pos],\n)\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.to_dataframe","title":"to_dataframe(column=None, use_names=False)","text":"

    Returns values as a dataframe.

    PARAMETER DESCRIPTION column

    Name for the column holding the data value. Defaults to the name of the algorithm used.

    TYPE: Optional[str] DEFAULT: None

    use_names

    Whether to use data names instead of indices for the DataFrame's index.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION DataFrame

    A dataframe with two columns, one for the values, with name given as explained in column, and another with standard errors for approximate algorithms. The latter will be named column+'_stderr'.

    Raises: ImportError: If pandas is not installed

    Source code in src/pydvl/value/result.py
    def to_dataframe(\nself, column: Optional[str] = None, use_names: bool = False\n) -> pandas.DataFrame:\n\"\"\"Returns values as a dataframe.\n    Args:\n        column: Name for the column holding the data value. Defaults to\n            the name of the algorithm used.\n        use_names: Whether to use data names instead of indices for the\n            DataFrame's index.\n    Returns:\n        A dataframe with two columns, one for the values, with name\n            given as explained in `column`, and another with standard errors for\n            approximate algorithms. The latter will be named `column+'_stderr'`.\n    Raises:\n         ImportError: If pandas is not installed\n    \"\"\"\nif not pandas:\nraise ImportError(\"Pandas required for DataFrame export\")\ncolumn = column or self._algorithm\ndf = pandas.DataFrame(\nself._values[self._sort_positions],\nindex=self._names[self._sort_positions]\nif use_names\nelse self._indices[self._sort_positions],\ncolumns=[column],\n)\ndf[column + \"_stderr\"] = self.stderr[self._sort_positions]\nreturn df\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.from_random","title":"from_random(size, total=None, seed=None, **kwargs) classmethod","text":"

    Creates a ValuationResult object and fills it with an array of random values from a uniform distribution in [-1,1]. The values can be made to sum up to a given total number (doing so will change their range).

    PARAMETER DESCRIPTION size

    Number of values to generate

    TYPE: int

    total

    If set, the values are normalized to sum to this number (\"efficiency\" property of Shapley values).

    TYPE: Optional[float] DEFAULT: None

    kwargs

    Additional options to pass to the constructor of ValuationResult. Use to override status, names, etc.

    DEFAULT: {}

    RETURNS DESCRIPTION 'ValuationResult'

    A valuation result with its status set to

    'ValuationResult'

    Status.Converged by default.

    RAISES DESCRIPTION ValueError

    If size is less than 1.

    Changed in version 0.6.0

    Added parameter total. Check for zero size

    Source code in src/pydvl/value/result.py
    @classmethod\ndef from_random(\ncls,\nsize: int,\ntotal: Optional[float] = None,\nseed: Optional[Seed] = None,\n**kwargs,\n) -> \"ValuationResult\":\n\"\"\"Creates a [ValuationResult][pydvl.value.result.ValuationResult] object and fills it with an array\n    of random values from a uniform distribution in [-1,1]. The values can\n    be made to sum up to a given total number (doing so will change their range).\n    Args:\n        size: Number of values to generate\n        total: If set, the values are normalized to sum to this number\n            (\"efficiency\" property of Shapley values).\n        kwargs: Additional options to pass to the constructor of\n            [ValuationResult][pydvl.value.result.ValuationResult]. Use to override status, names, etc.\n    Returns:\n        A valuation result with its status set to\n        [Status.Converged][pydvl.utils.status.Status] by default.\n    Raises:\n         ValueError: If `size` is less than 1.\n    !!! tip \"Changed in version 0.6.0\"\n        Added parameter `total`. Check for zero size\n    \"\"\"\nif size < 1:\nraise ValueError(\"Size must be a positive integer\")\nrng = np.random.default_rng(seed)\nvalues = rng.uniform(low=-1, high=1, size=size)\nif total is not None:\nvalues *= total / np.sum(values)\noptions = dict(values=values, status=Status.Converged, algorithm=\"random\")\noptions.update(kwargs)\nreturn cls(**options)  # type: ignore\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.empty","title":"empty(algorithm='', indices=None, data_names=None, n_samples=0) classmethod","text":"

    Creates an empty ValuationResult object.

    Empty results are characterised by having an empty array of values. When another result is added to an empty one, the empty one is discarded.

    PARAMETER DESCRIPTION algorithm

    Name of the algorithm used to compute the values

    TYPE: str DEFAULT: ''

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Source code in src/pydvl/value/result.py
    @classmethod\n@deprecated(\ntarget=True,\ndeprecated_in=\"0.6.0\",\nremove_in=\"0.8.0\",\nargs_mapping=dict(indices=None, data_names=None, n_samples=None),\ntemplate_mgs=\"`%(source_name)s` is deprecated for generating zero-filled \"\n\"results, use `ValuationResult.zeros()` instead.\",\n)\ndef empty(\ncls,\nalgorithm: str = \"\",\nindices: Optional[Sequence[IndexT] | NDArray[IndexT]] = None,\ndata_names: Optional[Sequence[NameT] | NDArray[NameT]] = None,\nn_samples: int = 0,\n) -> ValuationResult:\n\"\"\"Creates an empty [ValuationResult][pydvl.value.result.ValuationResult] object.\n    Empty results are characterised by having an empty array of values. When\n    another result is added to an empty one, the empty one is discarded.\n    Args:\n        algorithm: Name of the algorithm used to compute the values\n    Returns:\n        Object with the results.\n    \"\"\"\nif indices is not None or data_names is not None or n_samples != 0:\nreturn cls.zeros(\nalgorithm=algorithm,\nindices=indices,\ndata_names=data_names,\nn_samples=n_samples,\n)\nreturn cls(algorithm=algorithm, status=Status.Pending, values=np.array([]))\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.zeros","title":"zeros(algorithm='', indices=None, data_names=None, n_samples=0) classmethod","text":"

    Creates an empty ValuationResult object.

    Empty results are characterised by having an empty array of values. When another result is added to an empty one, the empty one is ignored.

    PARAMETER DESCRIPTION algorithm

    Name of the algorithm used to compute the values

    TYPE: str DEFAULT: ''

    indices

    Data indices to use. A copy will be made. If not given, the indices will be set to the range [0, n_samples).

    TYPE: Optional[Sequence[IndexT] | NDArray[IndexT]] DEFAULT: None

    data_names

    Data names to use. A copy will be made. If not given, the names will be set to the string representation of the indices.

    TYPE: Optional[Sequence[NameT] | NDArray[NameT]] DEFAULT: None

    n_samples

    Number of data points whose values are computed. If not given, the length of indices will be used.

    TYPE: int DEFAULT: 0

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Source code in src/pydvl/value/result.py
    @classmethod\ndef zeros(\ncls,\nalgorithm: str = \"\",\nindices: Optional[Sequence[IndexT] | NDArray[IndexT]] = None,\ndata_names: Optional[Sequence[NameT] | NDArray[NameT]] = None,\nn_samples: int = 0,\n) -> ValuationResult:\n\"\"\"Creates an empty [ValuationResult][pydvl.value.result.ValuationResult] object.\n    Empty results are characterised by having an empty array of values. When\n    another result is added to an empty one, the empty one is ignored.\n    Args:\n        algorithm: Name of the algorithm used to compute the values\n        indices: Data indices to use. A copy will be made. If not given,\n            the indices will be set to the range `[0, n_samples)`.\n        data_names: Data names to use. A copy will be made. If not given,\n            the names will be set to the string representation of the indices.\n        n_samples: Number of data points whose values are computed. If\n            not given, the length of `indices` will be used.\n    Returns:\n        Object with the results.\n    \"\"\"\nif indices is None:\nindices = np.arange(n_samples, dtype=np.int_)\nelse:\nindices = np.array(indices, dtype=np.int_)\nreturn cls(\nalgorithm=algorithm,\nstatus=Status.Pending,\nindices=indices,\ndata_names=np.array(data_names, dtype=object)\nif data_names is not None\nelse np.empty_like(indices, dtype=object),\nvalues=np.zeros(len(indices)),\nvariances=np.zeros(len(indices)),\ncounts=np.zeros(len(indices), dtype=np.int_),\n)\n
    "},{"location":"api/pydvl/value/sampler/","title":"Sampler","text":"

    Samplers iterate over subsets of indices.

    The classes in this module are used to iterate over indices and subsets of their complement in the whole set, as required for the computation of marginal utility for semi-values. The elements returned when iterating over any subclass of PowersetSampler are tuples of the form (idx, subset), where idx is the index of the element being added to the subset, and subset is the subset of the complement of idx. The classes in this module are used to iterate over an index set \\(I\\) as required for the computation of marginal utility for semi-values. The elements returned when iterating over any subclass of :class:PowersetSampler are tuples of the form \\((i, S)\\), where \\(i\\) is an index of interest, and \\(S \\subset I \\setminus \\{i\\}\\) is a subset of the complement of \\(i\\).

    The iteration happens in two nested loops. An outer loop iterates over \\(I\\), and an inner loop iterates over the powerset of \\(I \\setminus \\{i\\}\\). The outer iteration can be either sequential or at random.

    Note

    This is the natural mode of iteration for the combinatorial definition of semi-values, in particular Shapley value. For the computation using permutations, adhering to this interface is not ideal, but we stick to it for consistency.

    The samplers are used in the semivalues module to compute any semi-value, in particular Shapley and Beta values, and Banzhaf indices.

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler--slicing-of-samplers","title":"Slicing of samplers","text":"

    The samplers can be sliced for parallel computation. For those which are embarrassingly parallel, this is done by slicing the set of \"outer\" indices and returning new samplers over those slices. This includes all truly powerset-based samplers, such as DeterministicUniformSampler and UniformSampler. In contrast, slicing a PermutationSampler creates a new sampler which iterates over the same indices.

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler","title":"PowersetSampler(indices, index_iteration=IndexIteration.Sequential, outer_indices=None)","text":"

    Bases: ABC, Iterable[SampleT], Generic[IndexT]

    Samplers are custom iterables over subsets of indices.

    Calling iter() on a sampler returns an iterator over tuples of the form \\((i, S)\\), where \\(i\\) is an index of interest, and \\(S \\subset I \\setminus \\{i\\}\\) is a subset of the complement of \\(i\\).

    This is done in two nested loops, where the outer loop iterates over the set of indices, and the inner loop iterates over subsets of the complement of the current index. The outer iteration can be either sequential or at random.

    Note

    Samplers are not iterators themselves, so that each call to iter() e.g. in a for loop creates a new iterator.

    Example
    >>>for idx, s in DeterministicUniformSampler(np.arange(2)):\n>>>    print(s, end=\"\")\n[][2,][][1,]\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler--methods-required-in-subclasses","title":"Methods required in subclasses","text":"

    Samplers must implement a weight() function to be used as a multiplier in Monte Carlo sums, so that the limit expectation coincides with the semi-value.

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler--slicing-of-samplers","title":"Slicing of samplers","text":"

    The samplers can be sliced for parallel computation. For those which are embarrassingly parallel, this is done by slicing the set of \"outer\" indices and returning new samplers over those slices.

    index_iteration: the order in which indices are iterated over\nouter_indices: The set of items (indices) over which to iterate\nwhen sampling. Subsets are taken from the complement of each index\nin succession. For embarrassingly parallel computations, this set\nis sliced and the samplers are used to iterate over the slices.\n
    Source code in src/pydvl/value/sampler.py
    def __init__(\nself,\nindices: NDArray[IndexT],\nindex_iteration: IndexIteration = IndexIteration.Sequential,\nouter_indices: NDArray[IndexT] | None = None,\n):\n\"\"\"\n    Args:\n        indices: The set of items (indices) to sample from.\n        index_iteration: the order in which indices are iterated over\n        outer_indices: The set of items (indices) over which to iterate\n            when sampling. Subsets are taken from the complement of each index\n            in succession. For embarrassingly parallel computations, this set\n            is sliced and the samplers are used to iterate over the slices.\n    \"\"\"\nself._indices = indices\nself._index_iteration = index_iteration\nself._outer_indices = outer_indices if outer_indices is not None else indices\nself._n = len(indices)\nself._n_samples = 0\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler.iterindices","title":"iterindices()","text":"

    Iterates over indices in the order specified at construction.

    this is probably not very useful, but I couldn't decide

    which method is better

    Source code in src/pydvl/value/sampler.py
    def iterindices(self) -> Iterator[IndexT]:\n\"\"\"Iterates over indices in the order specified at construction.\n    FIXME: this is probably not very useful, but I couldn't decide\n      which method is better\n    \"\"\"\nif self._index_iteration is PowersetSampler.IndexIteration.Sequential:\nfor idx in self._outer_indices:\nyield idx\nelif self._index_iteration is PowersetSampler.IndexIteration.Random:\nwhile True:\nyield np.random.choice(self._outer_indices, size=1).item()\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler.__len__","title":"__len__()","text":"

    Returns the number of outer indices over which the sampler iterates.

    Source code in src/pydvl/value/sampler.py
    def __len__(self) -> int:\n\"\"\"Returns the number of outer indices over which the sampler iterates.\"\"\"\nreturn len(self._outer_indices)\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler.weight","title":"weight(n, subset_len) abstractmethod classmethod","text":"

    Factor by which to multiply Monte Carlo samples, so that the mean converges to the desired expression.

    By the Law of Large Numbers, the sample mean of \\(\\delta_i(S_j)\\) converges to the expectation under the distribution from which \\(S_j\\) is sampled.

    \\[ \\frac{1}{m} \\sum_{j = 1}^m \\delta_i (S_j) c (S_j) \\longrightarrow \\underset{S \\sim \\mathcal{D}_{- i}}{\\mathbb{E}} [\\delta_i (S) c ( S)]\\]

    We add a factor \\(c(S_j)\\) in order to have this expectation coincide with the desired expression.

    Source code in src/pydvl/value/sampler.py
    @classmethod\n@abc.abstractmethod\ndef weight(cls, n: int, subset_len: int) -> float:\nr\"\"\"Factor by which to multiply Monte Carlo samples, so that the\n    mean converges to the desired expression.\n    By the Law of Large Numbers, the sample mean of $\\delta_i(S_j)$\n    converges to the expectation under the distribution from which $S_j$ is\n    sampled.\n    $$ \\frac{1}{m}  \\sum_{j = 1}^m \\delta_i (S_j) c (S_j) \\longrightarrow\n       \\underset{S \\sim \\mathcal{D}_{- i}}{\\mathbb{E}} [\\delta_i (S) c (\n       S)]$$\n    We add a factor $c(S_j)$ in order to have this expectation coincide with\n    the desired expression.\n    \"\"\"\n...\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.StochasticSamplerMixin","title":"StochasticSamplerMixin(*args, seed=None, **kwargs)","text":"

    Mixin class for samplers which use a random number generator.

    Source code in src/pydvl/value/sampler.py
    def __init__(self, *args, seed: Optional[Seed] = None, **kwargs):\nsuper().__init__(*args, **kwargs)\nself._rng = np.random.default_rng(seed)\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.DeterministicUniformSampler","title":"DeterministicUniformSampler(indices, *args, **kwargs)","text":"

    Bases: PowersetSampler[IndexT]

    For every index \\(i\\), each subset of the complement indices - {i} is returned.

    Note

    Indices are always iterated over sequentially, irrespective of the value of index_iteration upon construction.

    Example
    >>> for idx, s in DeterministicUniformSampler(np.arange(2)):\n>>>    print(f\"{idx} - {s}\", end=\", \")\n1 - [], 1 - [2], 2 - [], 2 - [1],\n
    PARAMETER DESCRIPTION indices

    The set of items (indices) to sample from.

    TYPE: NDArray[IndexT]

    Source code in src/pydvl/value/sampler.py
    def __init__(self, indices: NDArray[IndexT], *args, **kwargs):\n\"\"\"An iterator to perform uniform deterministic sampling of subsets.\n    For every index $i$, each subset of the complement `indices - {i}` is\n    returned.\n    !!! Note\n        Indices are always iterated over sequentially, irrespective of\n        the value of `index_iteration` upon construction.\n    ??? Example\n        ``` pycon\n        >>> for idx, s in DeterministicUniformSampler(np.arange(2)):\n        >>>    print(f\"{idx} - {s}\", end=\", \")\n        1 - [], 1 - [2], 2 - [], 2 - [1],\n        ```\n    Args:\n        indices: The set of items (indices) to sample from.\n    \"\"\"\n# Force sequential iteration\nkwargs.update({\"index_iteration\": PowersetSampler.IndexIteration.Sequential})\nsuper().__init__(indices, *args, **kwargs)\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.UniformSampler","title":"UniformSampler","text":"

    Bases: StochasticSamplerMixin, PowersetSampler[IndexT]

    An iterator to perform uniform random sampling of subsets.

    Iterating over every index \\(i\\), either in sequence or at random depending on the value of index_iteration, one subset of the complement indices - {i} is sampled with equal probability \\(2^{n-1}\\). The iterator never ends.

    Example

    The code

    for idx, s in UniformSampler(np.arange(3)):\nprint(f\"{idx} - {s}\", end=\", \")\n
    Produces the output:
    0 - [1 4], 1 - [2 3], 2 - [0 1 3], 3 - [], 4 - [2], 0 - [1 3 4], 1 - [0 2]\n(...)\n

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.UniformSampler.weight","title":"weight(n, subset_len) classmethod","text":"

    Correction coming from Monte Carlo integration so that the mean of the marginals converges to the value: the uniform distribution over the powerset of a set with n-1 elements has mass 2^{n-1} over each subset.

    Source code in src/pydvl/value/sampler.py
    @classmethod\ndef weight(cls, n: int, subset_len: int) -> float:\n\"\"\"Correction coming from Monte Carlo integration so that the mean of\n    the marginals converges to the value: the uniform distribution over the\n    powerset of a set with n-1 elements has mass 2^{n-1} over each subset.\"\"\"\nreturn float(2 ** (n - 1)) if n > 0 else 1.0\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.AntitheticSampler","title":"AntitheticSampler","text":"

    Bases: StochasticSamplerMixin, PowersetSampler[IndexT]

    An iterator to perform uniform random sampling of subsets, and their complements.

    Works as :class:~pydvl.value.sampler.UniformSampler, but for every tuple \\((i,S)\\), it subsequently returns \\((i,S^c)\\), where \\(S^c\\) is the complement of the set \\(S\\), including the index \\(i\\) itself.

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PermutationSampler","title":"PermutationSampler","text":"

    Bases: StochasticSamplerMixin, PowersetSampler[IndexT]

    Sample permutations of indices and iterate through each returning increasing subsets, as required for the permutation definition of semi-values.

    This sampler does not implement the two loops described in PowersetSampler. Instead, for a permutation (3,1,4,2), it returns in sequence the tuples of index and sets: (3, {}), (1, {3}), (4, {3,1}) and (2, {3,1,4}).

    Note that the full index set is never returned.

    Warning

    This sampler requires caching to be enabled or computation will be doubled wrt. a \"direct\" implementation of permutation MC

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PermutationSampler.__getitem__","title":"__getitem__(key)","text":"

    Permutation samplers cannot be split across indices, so we return a copy of the full sampler.

    Source code in src/pydvl/value/sampler.py
    def __getitem__(self, key: slice | list[int]) -> PowersetSampler[IndexT]:\n\"\"\"Permutation samplers cannot be split across indices, so we return\n    a copy of the full sampler.\"\"\"\nreturn super().__getitem__(slice(None))\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.DeterministicPermutationSampler","title":"DeterministicPermutationSampler","text":"

    Bases: PermutationSampler[IndexT]

    Samples all n! permutations of the indices deterministically, and iterates through them, returning sets as required for the permutation-based definition of semi-values.

    Warning

    This sampler requires caching to be enabled or computation will be doubled wrt. a \"direct\" implementation of permutation MC

    Warning

    This sampler is not parallelizable, as it always iterates over the whole set of permutations in the same order. Different processes would always return the same values for all indices.

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.RandomHierarchicalSampler","title":"RandomHierarchicalSampler","text":"

    Bases: StochasticSamplerMixin, PowersetSampler[IndexT]

    For every index, sample a set size, then a set of that size.

    Todo

    This is unnecessary, but a step towards proper stratified sampling.

    "},{"location":"api/pydvl/value/semivalues/","title":"Semivalues","text":"

    This module provides the core functionality for the computation of generic semi-values. A semi-value is any valuation function with the form:

    \\[v_\\text{semi}(i) = \\sum_{i=1}^n w(k) \\sum_{S \\subset D_{-i}^{(k)}} [U(S_{+i})-U(S)],\\]

    where the coefficients \\(w(k)\\) satisfy the property:

    \\[\\sum_{k=1}^n w(k) = 1.\\] Note

    For implementation consistency, we slightly depart from the common definition of semi-values, which includes a factor \\(1/n\\) in the sum over subsets. Instead, we subsume this factor into the coefficient \\(w(k)\\).

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues--main-components","title":"Main components","text":"

    The computation of a semi-value requires two components:

    1. A subset sampler that generates subsets of the set \\(D\\) of interest.
    2. A coefficient \\(w(k)\\) that assigns a weight to each subset size \\(k\\).

    Samplers can be found in sampler, and can be classified into two categories: powerset samplers and permutation samplers. Powerset samplers generate subsets of \\(D_{-i}\\), while the permutation sampler generates permutations of \\(D\\). The former conform to the above definition of semi-values, while the latter reformulates it as:

    \\[ v(i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)} \\tilde{w}( | \\sigma_{:i} | )[U(\\sigma_{:i} \\cup \\{i\\}) \u2212 U(\\sigma_{:i})], \\]

    where \\(\\sigma_{:i}\\) denotes the set of indices in permutation sigma before the position where \\(i\\) appears (see Data valuation for details), and

    \\[ \\tilde{w} (k) = n \\binom{n - 1}{k} w (k) \\]

    is the weight correction due to the reformulation.

    Warning

    Both PermutationSampler and DeterministicPermutationSampler require caching to be enabled or computation will be doubled wrt. a 'direct' implementation of permutation MC.

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues--computing-semi-values","title":"Computing semi-values","text":"

    Samplers and coefficients can be arbitrarily mixed by means of the main entry point of this module, compute_generic_semivalues. There are several pre-defined coefficients, including the Shapley value of (Ghorbani and Zou, 2019)1, the Banzhaf index of (Wang and Jia)3, and the Beta coefficient of (Kwon and Zou, 2022)2. For each of these methods, there is a convenience wrapper function. Respectively, these are: compute_shapley_semivalues, compute_banzhaf_semivalues, and compute_beta_shapley_semivalues. instead.

    Parallelization and batching

    In order to ensure reproducibility and fine-grained control of parallelization, samples are generated in the main process and then distributed to worker processes for evaluation. For small sample sizes, this can lead to a significant overhead. To avoid this, we temporarily provide an additional argument batch_size to all methods which can improve performance with small models up to an order of magnitude. Note that this argument will be removed before version 1.0 in favour of a more general solution.

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues--references","title":"References","text":"
    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning. In: Proceedings of the 36th International Conference on Machine Learning, PMLR, pp. 2242\u20132251.\u00a0\u21a9

    2. Kwon, Y. and Zou, J., 2022. Beta Shapley: A Unified and Noise-reduced Data Valuation Framework for Machine Learning. In: Proceedings of the 25th International Conference on Artificial Intelligence and Statistics (AISTATS) 2022, Vol. 151. PMLR, Valencia, Spain.\u00a0\u21a9

    3. Wang, J.T. and Jia, R., 2022. Data Banzhaf: A Robust Data Valuation Framework for Machine Learning. ArXiv preprint arXiv:2205.15466.\u00a0\u21a9

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.SVCoefficient","title":"SVCoefficient","text":"

    Bases: Protocol

    The protocol that coefficients for the computation of semi-values must fulfill.

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.SVCoefficient.__call__","title":"__call__(n, k)","text":"

    Computes the coefficient for a given subset size.

    PARAMETER DESCRIPTION n

    Total number of elements in the set.

    TYPE: int

    k

    Size of the subset for which the coefficient is being computed

    TYPE: int

    Source code in src/pydvl/value/semivalues.py
    def __call__(self, n: int, k: int) -> float:\n\"\"\"Computes the coefficient for a given subset size.\n    Args:\n        n: Total number of elements in the set.\n        k: Size of the subset for which the coefficient is being computed\n    \"\"\"\n...\n
    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.SemiValueMode","title":"SemiValueMode","text":"

    Bases: str, Enum

    Enumeration of semi-value modes.

    Deprecation notice

    This enum and the associated methods are deprecated and will be removed in 0.8.0.

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.compute_generic_semivalues","title":"compute_generic_semivalues(sampler, u, coefficient, done, *, batch_size=1, n_jobs=1, config=ParallelConfig(), progress=False)","text":"

    Computes semi-values for a given utility function and subset sampler.

    PARAMETER DESCRIPTION sampler

    The subset sampler to use for utility computations.

    TYPE: PowersetSampler[IndexT]

    u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    coefficient

    The semi-value coefficient

    TYPE: SVCoefficient

    done

    Stopping criterion.

    TYPE: StoppingCriterion

    batch_size

    Number of marginal evaluations per single parallel job.

    TYPE: int DEFAULT: 1

    n_jobs

    Number of parallel jobs to use.

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Deprecation notice

    Parameter batch_size is for experimental use and will be removed in future versions.

    Source code in src/pydvl/value/semivalues.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.9.0\",\nargs_mapping={\"batch_size\": None},\ntemplate_mgs=\"batch_size is for experimental use and will be removed\"\n\"in future versions.\",\n)\ndef compute_generic_semivalues(\nsampler: PowersetSampler[IndexT],\nu: Utility,\ncoefficient: SVCoefficient,\ndone: StoppingCriterion,\n*,\nbatch_size: int = 1,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\n) -> ValuationResult:\n\"\"\"Computes semi-values for a given utility function and subset sampler.\n    Args:\n        sampler: The subset sampler to use for utility computations.\n        u: Utility object with model, data, and scoring function.\n        coefficient: The semi-value coefficient\n        done: Stopping criterion.\n        batch_size: Number of marginal evaluations per single parallel job.\n        n_jobs: Number of parallel jobs to use.\n        config: Object configuring parallel computation, with cluster\n            address, number of cpus, etc.\n        progress: Whether to display a progress bar.\n    Returns:\n        Object with the results.\n    !!! warning \"Deprecation notice\"\n        Parameter `batch_size` is for experimental use and will be removed in\n        future versions.\n    \"\"\"\nfrom concurrent.futures import FIRST_COMPLETED, Future, wait\nfrom pydvl.utils import effective_n_jobs, init_executor, init_parallel_backend\nif isinstance(sampler, PermutationSampler) and not u.enable_cache:\nlog.warning(\n\"PermutationSampler requires caching to be enabled or computation \"\n\"will be doubled wrt. a 'direct' implementation of permutation MC\"\n)\nif batch_size != 1:\nwarnings.warn(\n\"Parameter `batch_size` is for experimental use and will be\"\n\" removed in future versions\",\nDeprecationWarning,\n)\nresult = ValuationResult.zeros(\nalgorithm=f\"semivalue-{str(sampler)}-{coefficient.__name__}\",  # type: ignore\nindices=u.data.indices,\ndata_names=u.data.data_names,\n)\nparallel_backend = init_parallel_backend(config)\nu = parallel_backend.put(u)\ncorrection = parallel_backend.put(\nlambda n, k: coefficient(n, k) * sampler.weight(n, k)\n)\nmax_workers = effective_n_jobs(n_jobs, config)\nn_submitted_jobs = 2 * max_workers  # number of jobs in the queue\nsampler_it = iter(sampler)\npbar = tqdm(disable=not progress, total=100, unit=\"%\")\nwith init_executor(\nmax_workers=max_workers, config=config, cancel_futures=True\n) as executor:\npending: set[Future] = set()\nwhile True:\npbar.n = 100 * done.completion()\npbar.refresh()\ncompleted, pending = wait(pending, timeout=1, return_when=FIRST_COMPLETED)\nfor future in completed:\nfor idx, marginal in future.result():\nresult.update(idx, marginal)\nif done(result):\nreturn result\n# Ensure that we always have n_submitted_jobs running\ntry:\nfor _ in range(n_submitted_jobs - len(pending)):\nsamples = tuple(islice(sampler_it, batch_size))\nif len(samples) == 0:\nraise StopIteration\npending.add(\nexecutor.submit(\n_marginal, u=u, coefficient=correction, samples=samples\n)\n)\nexcept StopIteration:\nif len(pending) == 0:\nreturn result\n
    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.compute_shapley_semivalues","title":"compute_shapley_semivalues(u, *, done=MaxUpdates(100), sampler_t=PermutationSampler, batch_size=1, n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Computes Shapley values for a given utility function.

    This is a convenience wrapper for compute_generic_semivalues with the Shapley coefficient. Use compute_shapley_values for a more flexible interface and additional methods, including TMCS.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    done

    Stopping criterion.

    TYPE: StoppingCriterion DEFAULT: MaxUpdates(100)

    sampler_t

    The sampler type to use. See the sampler module for a list.

    TYPE: Type[StochasticSampler] DEFAULT: PermutationSampler

    batch_size

    Number of marginal evaluations per single parallel job.

    TYPE: int DEFAULT: 1

    n_jobs

    Number of parallel jobs to use.

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Deprecation notice

    Parameter batch_size is for experimental use and will be removed in future versions.

    Source code in src/pydvl/value/semivalues.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.9.0\",\nargs_mapping={\"batch_size\": None},\ntemplate_mgs=\"batch_size is for experimental use and will be removed\"\n\"in future versions.\",\n)\ndef compute_shapley_semivalues(\nu: Utility,\n*,\ndone: StoppingCriterion = MaxUpdates(100),\nsampler_t: Type[StochasticSampler] = PermutationSampler,\nbatch_size: int = 1,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None,\n) -> ValuationResult:\n\"\"\"Computes Shapley values for a given utility function.\n    This is a convenience wrapper for\n    [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues]\n    with the Shapley coefficient. Use\n    [compute_shapley_values][pydvl.value.shapley.common.compute_shapley_values]\n    for a more flexible interface and additional methods, including TMCS.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        done: Stopping criterion.\n        sampler_t: The sampler type to use. See the\n            [sampler][pydvl.value.sampler] module for a list.\n        batch_size: Number of marginal evaluations per single parallel job.\n        n_jobs: Number of parallel jobs to use.\n        config: Object configuring parallel computation, with cluster\n            address, number of cpus, etc.\n        seed: Either an instance of a numpy random number generator or a seed\n            for it.\n        progress: Whether to display a progress bar.\n    Returns:\n        Object with the results.\n    !!! warning \"Deprecation notice\"\n        Parameter `batch_size` is for experimental use and will be removed in\n        future versions.\n    \"\"\"\n# HACK: cannot infer return type because of useless IndexT, NameT\nreturn compute_generic_semivalues(  # type: ignore\nsampler_t(u.data.indices, seed=seed),\nu,\nshapley_coefficient,\ndone,\nbatch_size=batch_size,\nn_jobs=n_jobs,\nconfig=config,\nprogress=progress,\n)\n
    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.compute_banzhaf_semivalues","title":"compute_banzhaf_semivalues(u, *, done=MaxUpdates(100), sampler_t=PermutationSampler, batch_size=1, n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Computes Banzhaf values for a given utility function.

    This is a convenience wrapper for compute_generic_semivalues with the Banzhaf coefficient.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    done

    Stopping criterion.

    TYPE: StoppingCriterion DEFAULT: MaxUpdates(100)

    sampler_t

    The sampler type to use. See the sampler module for a list.

    TYPE: Type[StochasticSampler] DEFAULT: PermutationSampler

    batch_size

    Number of marginal evaluations per single parallel job.

    TYPE: int DEFAULT: 1

    n_jobs

    Number of parallel jobs to use.

    TYPE: int DEFAULT: 1

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Deprecation notice

    Parameter batch_size is for experimental use and will be removed in future versions.

    Source code in src/pydvl/value/semivalues.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.9.0\",\nargs_mapping={\"batch_size\": None},\ntemplate_mgs=\"batch_size is for experimental use and will be removed\"\n\"in future versions.\",\n)\ndef compute_banzhaf_semivalues(\nu: Utility,\n*,\ndone: StoppingCriterion = MaxUpdates(100),\nsampler_t: Type[StochasticSampler] = PermutationSampler,\nbatch_size: int = 1,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None,\n) -> ValuationResult:\n\"\"\"Computes Banzhaf values for a given utility function.\n    This is a convenience wrapper for\n    [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues]\n    with the Banzhaf coefficient.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        done: Stopping criterion.\n        sampler_t: The sampler type to use. See the\n            [sampler][pydvl.value.sampler] module for a list.\n        batch_size: Number of marginal evaluations per single parallel job.\n        n_jobs: Number of parallel jobs to use.\n        seed: Either an instance of a numpy random number generator or a seed\n            for it.\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        progress: Whether to display a progress bar.\n    Returns:\n        Object with the results.\n    !!! warning \"Deprecation notice\"\n        Parameter `batch_size` is for experimental use and will be removed in\n        future versions.\n    \"\"\"\n# HACK: cannot infer return type because of useless IndexT, NameT\nreturn compute_generic_semivalues(  # type: ignore\nsampler_t(u.data.indices, seed=seed),\nu,\nbanzhaf_coefficient,\ndone,\nbatch_size=batch_size,\nn_jobs=n_jobs,\nconfig=config,\nprogress=progress,\n)\n
    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.compute_beta_shapley_semivalues","title":"compute_beta_shapley_semivalues(u, *, alpha=1, beta=1, done=MaxUpdates(100), sampler_t=PermutationSampler, batch_size=1, n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Computes Beta Shapley values for a given utility function.

    This is a convenience wrapper for compute_generic_semivalues with the Beta Shapley coefficient.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    alpha

    Alpha parameter of the Beta distribution.

    TYPE: float DEFAULT: 1

    beta

    Beta parameter of the Beta distribution.

    TYPE: float DEFAULT: 1

    done

    Stopping criterion.

    TYPE: StoppingCriterion DEFAULT: MaxUpdates(100)

    sampler_t

    The sampler type to use. See the sampler module for a list.

    TYPE: Type[StochasticSampler] DEFAULT: PermutationSampler

    batch_size

    Number of marginal evaluations per (parallelized) task.

    TYPE: int DEFAULT: 1

    n_jobs

    Number of parallel jobs to use.

    TYPE: int DEFAULT: 1

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Deprecation notice

    Parameter batch_size is for experimental use and will be removed in future versions.

    Source code in src/pydvl/value/semivalues.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.9.0\",\nargs_mapping={\"batch_size\": None},\ntemplate_mgs=\"batch_size is for experimental use and will be removed\"\n\"in future versions.\",\n)\ndef compute_beta_shapley_semivalues(\nu: Utility,\n*,\nalpha: float = 1,\nbeta: float = 1,\ndone: StoppingCriterion = MaxUpdates(100),\nsampler_t: Type[StochasticSampler] = PermutationSampler,\nbatch_size: int = 1,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None,\n) -> ValuationResult:\n\"\"\"Computes Beta Shapley values for a given utility function.\n    This is a convenience wrapper for\n    [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues]\n    with the Beta Shapley coefficient.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        alpha: Alpha parameter of the Beta distribution.\n        beta: Beta parameter of the Beta distribution.\n        done: Stopping criterion.\n        sampler_t: The sampler type to use. See the\n            [sampler][pydvl.value.sampler] module for a list.\n        batch_size: Number of marginal evaluations per (parallelized) task.\n        n_jobs: Number of parallel jobs to use.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n        config: Object configuring parallel computation, with cluster address, number of\n            cpus, etc.\n        progress: Whether to display a progress bar.\n    Returns:\n        Object with the results.\n    !!! warning \"Deprecation notice\"\n        Parameter `batch_size` is for experimental use and will be removed in\n        future versions.\n    \"\"\"\n# HACK: cannot infer return type because of useless IndexT, NameT\nreturn compute_generic_semivalues(  # type: ignore\nsampler_t(u.data.indices, seed=seed),\nu,\nbeta_coefficient(alpha, beta),\ndone,\nbatch_size=batch_size,\nn_jobs=n_jobs,\nconfig=config,\nprogress=progress,\n)\n
    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.compute_semivalues","title":"compute_semivalues(u, *, done=MaxUpdates(100), mode=SemiValueMode.Shapley, sampler_t=PermutationSampler[IndexT], batch_size=1, n_jobs=1, seed=None, **kwargs)","text":"

    Convenience entry point for most common semi-value computations.

    Deprecation warning

    This method is deprecated and will be replaced in 0.8.0 by the more general implementation of compute_generic_semivalues. Use compute_shapley_semivalues, compute_banzhaf_semivalues, or compute_beta_shapley_semivalues instead.

    The modes supported with this interface are the following. For greater flexibility use compute_generic_semivalues directly.

    • SemiValueMode.Shapley: Shapley values.
    • SemiValueMode.BetaShapley: Implements the Beta Shapley semi-value as introduced in (Kwon and Zou, 2022)1. Pass additional keyword arguments alpha and beta to set the parameters of the Beta distribution (both default to 1).
    • SemiValueMode.Banzhaf: Implements the Banzhaf semi-value as introduced in (Wang and Jia, 2022)1.

    See [[data-valuation]] for an overview of valuation. - SemiValueMode.Banzhaf: Implements the Banzhaf semi-value as introduced in [@wang_data_2022].

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    done

    Stopping criterion.

    TYPE: StoppingCriterion DEFAULT: MaxUpdates(100)

    mode

    The semi-value mode to use. See SemiValueMode for a list.

    TYPE: SemiValueMode DEFAULT: Shapley

    sampler_t

    The sampler type to use. See sampler for a list.

    TYPE: Type[StochasticSampler[IndexT]] DEFAULT: PermutationSampler[IndexT]

    batch_size

    Number of marginal evaluations per (parallelized) task.

    TYPE: int DEFAULT: 1

    n_jobs

    Number of parallel jobs to use.

    TYPE: int DEFAULT: 1

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    kwargs

    Additional keyword arguments passed to compute_generic_semivalues.

    DEFAULT: {}

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Deprecation notice

    Parameter batch_size is for experimental use and will be removed in future versions.

    Source code in src/pydvl/value/semivalues.py
    @deprecated(target=True, deprecated_in=\"0.7.0\", remove_in=\"0.8.0\")\ndef compute_semivalues(\nu: Utility,\n*,\ndone: StoppingCriterion = MaxUpdates(100),\nmode: SemiValueMode = SemiValueMode.Shapley,\nsampler_t: Type[StochasticSampler[IndexT]] = PermutationSampler[IndexT],\nbatch_size: int = 1,\nn_jobs: int = 1,\nseed: Optional[Seed] = None,\n**kwargs,\n) -> ValuationResult:\n\"\"\"Convenience entry point for most common semi-value computations.\n    !!! warning \"Deprecation warning\"\n        This method is deprecated and will be replaced in 0.8.0 by the more\n        general implementation of\n        [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues].\n        Use\n        [compute_shapley_semivalues][pydvl.value.semivalues.compute_shapley_semivalues],\n        [compute_banzhaf_semivalues][pydvl.value.semivalues.compute_banzhaf_semivalues],\n        or\n        [compute_beta_shapley_semivalues][pydvl.value.semivalues.compute_beta_shapley_semivalues]\n        instead.\n    The modes supported with this interface are the following. For greater\n    flexibility use\n    [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues]\n    directly.\n    - [SemiValueMode.Shapley][pydvl.value.semivalues.SemiValueMode]:\n      Shapley values.\n    - [SemiValueMode.BetaShapley][pydvl.value.semivalues.SemiValueMode.BetaShapley]:\n      Implements the Beta Shapley semi-value as introduced in\n      (Kwon and Zou, 2022)<sup><a href=\"#kwon_beta_2022\">1</a></sup>.\n      Pass additional keyword arguments `alpha` and `beta` to set the\n      parameters of the Beta distribution (both default to 1).\n    - [SemiValueMode.Banzhaf][SemiValueMode.Banzhaf]: Implements the Banzhaf\n      semi-value as introduced in (Wang and Jia, 2022)<sup><a href=\"#wang_data_2022\">1</a></sup>.\n    See [[data-valuation]] for an overview of valuation.\n    - [SemiValueMode.Banzhaf][pydvl.value.semivalues.SemiValueMode]: Implements\n      the Banzhaf semi-value as introduced in [@wang_data_2022].\n    Args:\n        u: Utility object with model, data, and scoring function.\n        done: Stopping criterion.\n        mode: The semi-value mode to use. See\n            [SemiValueMode][pydvl.value.semivalues.SemiValueMode] for a list.\n        sampler_t: The sampler type to use. See [sampler][pydvl.value.sampler]\n            for a list.\n        batch_size: Number of marginal evaluations per (parallelized) task.\n        n_jobs: Number of parallel jobs to use.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n        kwargs: Additional keyword arguments passed to\n            [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues].\n    Returns:\n        Object with the results.\n    !!! warning \"Deprecation notice\"\n        Parameter `batch_size` is for experimental use and will be removed in\n        future versions.\n    \"\"\"\nif mode == SemiValueMode.Shapley:\ncoefficient = shapley_coefficient\nelif mode == SemiValueMode.BetaShapley:\nalpha = kwargs.pop(\"alpha\", 1)\nbeta = kwargs.pop(\"beta\", 1)\ncoefficient = beta_coefficient(alpha, beta)\nelif mode == SemiValueMode.Banzhaf:\ncoefficient = banzhaf_coefficient\nelse:\nraise ValueError(f\"Unknown mode {mode}\")\ncoefficient = cast(SVCoefficient, coefficient)\n# HACK: cannot infer return type because of useless IndexT, NameT\nreturn compute_generic_semivalues(  # type: ignore\nsampler_t(u.data.indices, seed=seed),\nu,\ncoefficient,\ndone,\nn_jobs=n_jobs,\nbatch_size=batch_size,\n**kwargs,\n)\n
    "},{"location":"api/pydvl/value/stopping/","title":"Stopping","text":"

    Stopping criteria for value computations.

    This module provides a basic set of stopping criteria, like MaxUpdates, MaxTime, or HistoryDeviation among others. These can behave in different ways depending on the context. For example, MaxUpdates limits the number of updates to values, which depending on the algorithm may mean a different number of utility evaluations or imply other computations like solving a linear or quadratic program.

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping--creating-stopping-criteria","title":"Creating stopping criteria","text":"

    The easiest way is to declare a function implementing the interface StoppingCriterionCallable and wrap it with make_criterion(). This creates a StoppingCriterion object that can be composed with other stopping criteria.

    Alternatively, and in particular if reporting of completion is required, one can inherit from this class and implement the abstract methods _check and completion.

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping--composing-stopping-criteria","title":"Composing stopping criteria","text":"

    Objects of type StoppingCriterion can be composed with the binary operators & (and), and | (or), following the truth tables of Status. The unary operator ~ (not) is also supported. See StoppingCriterion for details on how these operations affect the behavior of the stopping criteria.

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping--references","title":"References","text":"
    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning. In: Proceedings of the 36th International Conference on Machine Learning, PMLR, pp. 2242\u20132251.\u00a0\u21a9

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterionCallable","title":"StoppingCriterionCallable","text":"

    Bases: Protocol

    Signature for a stopping criterion

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterion","title":"StoppingCriterion(modify_result=True)","text":"

    Bases: ABC

    A composable callable object to determine whether a computation must stop.

    A StoppingCriterion is a callable taking a ValuationResult and returning a Status. It also keeps track of individual convergence of values with converged, and reports the overall completion of the computation with completion.

    Instances of StoppingCriterion can be composed with the binary operators & (and), and | (or), following the truth tables of Status. The unary operator ~ (not) is also supported. These boolean operations act according to the following rules:

    • The results of _check are combined with the operator. See Status for the truth tables.
    • The results of converged are combined with the operator (returning another boolean array).
    • The completion method returns the min, max, or the complement to 1 of the completions of the operands, for AND, OR and NOT respectively. This is required for cases where one of the criteria does not keep track of the convergence of single values, e.g. MaxUpdates, because completion by default returns the mean of the boolean convergence array.
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterion--subclassing","title":"Subclassing","text":"

    Subclassing this class requires implementing a _check method that returns a Status object based on a given ValuationResult. This method should update the attribute _converged, which is a boolean array indicating whether the value for each index has converged. When this does not make sense for a particular stopping criterion, completion should be overridden to provide an overall completion value, since its default implementation attempts to compute the mean of _converged.

    PARAMETER DESCRIPTION modify_result

    If True the status of the input ValuationResult is modified in place after the call.

    TYPE: bool DEFAULT: True

    Source code in src/pydvl/value/stopping.py
    def __init__(self, modify_result: bool = True):\nself.modify_result = modify_result\nself._converged = np.full(0, False)\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterion.converged","title":"converged: NDArray[np.bool_] property","text":"

    Returns a boolean array indicating whether the values have converged for each data point.

    Inheriting classes must set the _converged attribute in their _check.

    RETURNS DESCRIPTION NDArray[bool_]

    A boolean array indicating whether the values have converged for

    NDArray[bool_]

    each data point.

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterion.completion","title":"completion()","text":"

    Returns a value between 0 and 1 indicating the completion of the computation.

    Source code in src/pydvl/value/stopping.py
    def completion(self) -> float:\n\"\"\"Returns a value between 0 and 1 indicating the completion of the\n    computation.\n    \"\"\"\nif self.converged.size == 0:\nreturn 0.0\nreturn float(np.mean(self.converged).item())\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterion.__call__","title":"__call__(result)","text":"

    Calls _check, maybe updating the result.

    Source code in src/pydvl/value/stopping.py
    def __call__(self, result: ValuationResult) -> Status:\n\"\"\"Calls [_check][pydvl.value.stopping.StoppingCriterion._check], maybe updating the result.\"\"\"\nif len(result) == 0:\nlogger.warning(\n\"At least one iteration finished but no results where generated. \"\n\"Please check that your scorer and utility return valid numbers.\"\n)\nstatus = self._check(result)\nif self.modify_result:  # FIXME: this is not nice\nresult._status = status\nreturn status\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.AbsoluteStandardError","title":"AbsoluteStandardError(threshold, fraction=1.0, burn_in=4, modify_result=True)","text":"

    Bases: StoppingCriterion

    Determine convergence based on the standard error of the values.

    If \\(s_i\\) is the standard error for datum \\(i\\) and \\(v_i\\) its value, then this criterion returns Converged if \\(s_i < \\epsilon\\) for all \\(i\\) and a threshold value \\(\\epsilon \\gt 0\\).

    PARAMETER DESCRIPTION threshold

    A value is considered to have converged if the standard error is below this value. A way of choosing it is to pick some percentage of the range of the values. For Shapley values this is the difference between the maximum and minimum of the utility function (to see this substitute the maximum and minimum values of the utility into the marginal contribution formula).

    TYPE: float

    fraction

    The fraction of values that must have converged for the criterion to return Converged.

    TYPE: float DEFAULT: 1.0

    burn_in

    The number of iterations to ignore before checking for convergence. This is required because computations typically start with zero variance, as a result of using empty(). The default is set to an arbitrary minimum which is usually enough but may need to be increased.

    TYPE: int DEFAULT: 4

    Source code in src/pydvl/value/stopping.py
    def __init__(\nself,\nthreshold: float,\nfraction: float = 1.0,\nburn_in: int = 4,\nmodify_result: bool = True,\n):\nsuper().__init__(modify_result=modify_result)\nself.threshold = threshold\nself.fraction = fraction\nself.burn_in = burn_in\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.MaxChecks","title":"MaxChecks(n_checks, modify_result=True)","text":"

    Bases: StoppingCriterion

    Terminate as soon as the number of checks exceeds the threshold.

    A \"check\" is one call to the criterion.

    PARAMETER DESCRIPTION n_checks

    Threshold: if None, no _check is performed, effectively creating a (never) stopping criterion that always returns Pending.

    TYPE: Optional[int]

    Source code in src/pydvl/value/stopping.py
    def __init__(self, n_checks: Optional[int], modify_result: bool = True):\nsuper().__init__(modify_result=modify_result)\nif n_checks is not None and n_checks < 1:\nraise ValueError(\"n_iterations must be at least 1 or None\")\nself.n_checks = n_checks\nself._count = 0\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.MaxUpdates","title":"MaxUpdates(n_updates, modify_result=True)","text":"

    Bases: StoppingCriterion

    Terminate if any number of value updates exceeds or equals the given threshold.

    Note

    If you want to ensure that all values have been updated, you probably want MinUpdates instead.

    This checks the counts field of a ValuationResult, i.e. the number of times that each index has been updated. For powerset samplers, the maximum of this number coincides with the maximum number of subsets sampled. For permutation samplers, it coincides with the number of permutations sampled.

    PARAMETER DESCRIPTION n_updates

    Threshold: if None, no _check is performed, effectively creating a (never) stopping criterion that always returns Pending.

    TYPE: Optional[int]

    Source code in src/pydvl/value/stopping.py
    def __init__(self, n_updates: Optional[int], modify_result: bool = True):\nsuper().__init__(modify_result=modify_result)\nif n_updates is not None and n_updates < 1:\nraise ValueError(\"n_updates must be at least 1 or None\")\nself.n_updates = n_updates\nself.last_max = 0\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.MinUpdates","title":"MinUpdates(n_updates, modify_result=True)","text":"

    Bases: StoppingCriterion

    Terminate as soon as all value updates exceed or equal the given threshold.

    This checks the counts field of a ValuationResult, i.e. the number of times that each index has been updated. For powerset samplers, the minimum of this number is a lower bound for the number of subsets sampled. For permutation samplers, it lower-bounds the amount of permutations sampled.

    PARAMETER DESCRIPTION n_updates

    Threshold: if None, no _check is performed, effectively creating a (never) stopping criterion that always returns Pending.

    TYPE: Optional[int]

    Source code in src/pydvl/value/stopping.py
    def __init__(self, n_updates: Optional[int], modify_result: bool = True):\nsuper().__init__(modify_result=modify_result)\nself.n_updates = n_updates\nself.last_min = 0\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.MaxTime","title":"MaxTime(seconds, modify_result=True)","text":"

    Bases: StoppingCriterion

    Terminate if the computation time exceeds the given number of seconds.

    Checks the elapsed time since construction

    PARAMETER DESCRIPTION seconds

    Threshold: The computation is terminated if the elapsed time between object construction and a _check exceeds this value. If None, no _check is performed, effectively creating a (never) stopping criterion that always returns Pending.

    TYPE: Optional[float]

    Source code in src/pydvl/value/stopping.py
    def __init__(self, seconds: Optional[float], modify_result: bool = True):\nsuper().__init__(modify_result=modify_result)\nself.max_seconds = seconds or np.inf\nif self.max_seconds <= 0:\nraise ValueError(\"Number of seconds for MaxTime must be positive or None\")\nself.start = time()\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.HistoryDeviation","title":"HistoryDeviation(n_steps, rtol, pin_converged=True, modify_result=True)","text":"

    Bases: StoppingCriterion

    A simple check for relative distance to a previous step in the computation.

    The method used by (Ghorbani and Zou, 2019)1 computes the relative distances between the current values \\(v_i^t\\) and the values at the previous checkpoint \\(v_i^{t-\\tau}\\). If the sum is below a given threshold, the computation is terminated.

    \\[\\sum_{i=1}^n \\frac{\\left| v_i^t - v_i^{t-\\tau} \\right|}{v_i^t} < \\epsilon.\\]

    When the denominator is zero, the summand is set to the value of \\(v_i^{ t-\\tau}\\).

    This implementation is slightly generalised to allow for different number of updates to individual indices, as happens with powerset samplers instead of permutations. Every subset of indices that is found to converge can be pinned to that state. Once all indices have converged the method has converged.

    Warning

    This criterion is meant for the reproduction of the results in the paper, but we do not recommend using it in practice.

    PARAMETER DESCRIPTION n_steps

    Checkpoint values every so many updates and use these saved values to compare.

    TYPE: int

    rtol

    Relative tolerance for convergence (\\(\\epsilon\\) in the formula).

    TYPE: float

    pin_converged

    If True, once an index has converged, it is pinned

    TYPE: bool DEFAULT: True

    Source code in src/pydvl/value/stopping.py
    def __init__(\nself,\nn_steps: int,\nrtol: float,\npin_converged: bool = True,\nmodify_result: bool = True,\n):\nsuper().__init__(modify_result=modify_result)\nif n_steps < 1:\nraise ValueError(\"n_steps must be at least 1\")\nif rtol <= 0 or rtol >= 1:\nraise ValueError(\"rtol must be in (0, 1)\")\nself.n_steps = n_steps\nself.rtol = rtol\nself.update_op = np.logical_or if pin_converged else np.logical_and\nself._memory = None  # type: ignore\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.make_criterion","title":"make_criterion(fun, converged=None, completion=None, name=None)","text":"

    Create a new StoppingCriterion from a function. Use this to enable simpler functions to be composed with bitwise operators

    PARAMETER DESCRIPTION fun

    The callable to wrap.

    TYPE: StoppingCriterionCallable

    converged

    A callable that returns a boolean array indicating what values have converged.

    TYPE: Callable[[], NDArray[bool_]] | None DEFAULT: None

    completion

    A callable that returns a value between 0 and 1 indicating the rate of completion of the computation. If not provided, the fraction of converged values is used.

    TYPE: Callable[[], float] | None DEFAULT: None

    name

    The name of the new criterion. If None, the __name__ of the function is used.

    TYPE: str | None DEFAULT: None

    RETURNS DESCRIPTION Type[StoppingCriterion]

    A new subclass of StoppingCriterion.

    Source code in src/pydvl/value/stopping.py
    def make_criterion(\nfun: StoppingCriterionCallable,\nconverged: Callable[[], NDArray[np.bool_]] | None = None,\ncompletion: Callable[[], float] | None = None,\nname: str | None = None,\n) -> Type[StoppingCriterion]:\n\"\"\"Create a new [StoppingCriterion][pydvl.value.stopping.StoppingCriterion] from a function.\n    Use this to enable simpler functions to be composed with bitwise operators\n    Args:\n        fun: The callable to wrap.\n        converged: A callable that returns a boolean array indicating what\n            values have converged.\n        completion: A callable that returns a value between 0 and 1 indicating\n            the rate of completion of the computation. If not provided, the fraction\n            of converged values is used.\n        name: The name of the new criterion. If `None`, the `__name__` of\n            the function is used.\n    Returns:\n        A new subclass of [StoppingCriterion][pydvl.value.stopping.StoppingCriterion].\n    \"\"\"\nclass WrappedCriterion(StoppingCriterion):\ndef __init__(self, modify_result: bool = True):\nsuper().__init__(modify_result=modify_result)\nself._name = name or getattr(fun, \"__name__\", \"WrappedCriterion\")\ndef _check(self, result: ValuationResult) -> Status:\nreturn fun(result)\n@property\ndef converged(self) -> NDArray[np.bool_]:\nif converged is None:\nreturn super().converged\nreturn converged()\n@property\ndef name(self):\nreturn self._name\ndef completion(self) -> float:\nif completion is None:\nreturn super().completion()\nreturn completion()\nreturn WrappedCriterion\n
    "},{"location":"api/pydvl/value/least_core/","title":"Least core","text":"

    New in version 0.4.0

    This package holds all routines for the computation of Least Core data values.

    Please refer to Data valuation for an overview.

    In addition to the standard interface via compute_least_core_values(), because computing the Least Core values requires the solution of a linear and a quadratic problem after computing all the utility values, there is the possibility of performing each step separately. This is useful when running multiple experiments: use lc_prepare_problem() or mclc_prepare_problem() to prepare a list of problems to solve, then solve them in parallel with lc_solve_problems().

    Note that mclc_prepare_problem() is parallelized itself, so preparing the problems should be done in sequence in this case. The solution of the linear systems can then be done in parallel.

    "},{"location":"api/pydvl/value/least_core/#pydvl.value.least_core.LeastCoreMode","title":"LeastCoreMode","text":"

    Bases: Enum

    Available Least Core algorithms.

    "},{"location":"api/pydvl/value/least_core/#pydvl.value.least_core.compute_least_core_values","title":"compute_least_core_values(u, *, n_jobs=1, n_iterations=None, mode=LeastCoreMode.MonteCarlo, non_negative_subsidy=False, solver_options=None, **kwargs)","text":"

    Umbrella method to compute Least Core values with any of the available algorithms.

    See Data valuation for an overview.

    The following algorithms are available. Note that the exact method can only work with very small datasets and is thus intended only for testing.

    • exact: uses the complete powerset of the training set for the constraints combinatorial_exact_shapley().
    • montecarlo: uses the approximate Monte Carlo Least Core algorithm. Implemented in montecarlo_least_core().
    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    n_jobs

    Number of jobs to run in parallel. Only used for Monte Carlo Least Core.

    TYPE: int DEFAULT: 1

    n_iterations

    Number of subsets to sample and evaluate the utility on. Only used for Monte Carlo Least Core.

    TYPE: Optional[int] DEFAULT: None

    mode

    Algorithm to use. See LeastCoreMode for available options.

    TYPE: LeastCoreMode DEFAULT: MonteCarlo

    non_negative_subsidy

    If True, the least core subsidy \\(e\\) is constrained to be non-negative.

    TYPE: bool DEFAULT: False

    solver_options

    Optional dictionary of options passed to the solvers.

    TYPE: Optional[dict] DEFAULT: None

    RETURNS DESCRIPTION ValuationResult

    Object with the computed values.

    New in version 0.5.0

    Source code in src/pydvl/value/least_core/__init__.py
    def compute_least_core_values(\nu: Utility,\n*,\nn_jobs: int = 1,\nn_iterations: Optional[int] = None,\nmode: LeastCoreMode = LeastCoreMode.MonteCarlo,\nnon_negative_subsidy: bool = False,\nsolver_options: Optional[dict] = None,\n**kwargs,\n) -> ValuationResult:\n\"\"\"Umbrella method to compute Least Core values with any of the available\n    algorithms.\n    See [Data valuation][computing-data-values] for an overview.\n    The following algorithms are available. Note that the exact method can only\n    work with very small datasets and is thus intended only for testing.\n    - `exact`: uses the complete powerset of the training set for the constraints\n      [combinatorial_exact_shapley()][pydvl.value.shapley.naive.combinatorial_exact_shapley].\n    - `montecarlo`:  uses the approximate Monte Carlo Least Core algorithm.\n      Implemented in [montecarlo_least_core()][pydvl.value.least_core.montecarlo.montecarlo_least_core].\n    Args:\n        u: Utility object with model, data, and scoring function\n        n_jobs: Number of jobs to run in parallel. Only used for Monte Carlo\n            Least Core.\n        n_iterations: Number of subsets to sample and evaluate the utility on.\n            Only used for Monte Carlo Least Core.\n        mode: Algorithm to use. See\n            [LeastCoreMode][pydvl.value.least_core.LeastCoreMode] for available\n            options.\n        non_negative_subsidy: If True, the least core subsidy $e$ is constrained\n            to be non-negative.\n        solver_options: Optional dictionary of options passed to the solvers.\n    Returns:\n        Object with the computed values.\n    !!! tip \"New in version 0.5.0\"\n    \"\"\"\nprogress: bool = kwargs.pop(\"progress\", False)\n# TODO: remove this before releasing version 0.7.0\nif kwargs:\nwarnings.warn(\nDeprecationWarning(\n\"Passing solver options as kwargs was deprecated in 0.6.0, will \"\n\"be removed in 0.7.0. `Use solver_options` instead.\"\n)\n)\nif solver_options is None:\nsolver_options = kwargs\nelse:\nsolver_options.update(kwargs)\nif mode == LeastCoreMode.MonteCarlo:\n# TODO fix progress showing and maybe_progress in remote case\nprogress = False\nif n_iterations is None:\nraise ValueError(\"n_iterations cannot be None for Monte Carlo Least Core\")\nreturn montecarlo_least_core(\nu=u,\nn_iterations=n_iterations,\nn_jobs=n_jobs,\nprogress=progress,\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n**kwargs,\n)\nelif mode == LeastCoreMode.Exact:\nreturn exact_least_core(\nu=u,\nprogress=progress,\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n)\nraise ValueError(f\"Invalid value encountered in {mode=}\")\n
    "},{"location":"api/pydvl/value/least_core/common/","title":"Common","text":""},{"location":"api/pydvl/value/least_core/common/#pydvl.value.least_core.common.lc_solve_problem","title":"lc_solve_problem(problem, *, u, algorithm, non_negative_subsidy=False, solver_options=None, **options)","text":"

    Solves a linear problem as prepared by mclc_prepare_problem(). Useful for parallel execution of multiple experiments by running this as a remote task.

    See exact_least_core() or montecarlo_least_core() for argument descriptions.

    Source code in src/pydvl/value/least_core/common.py
    def lc_solve_problem(\nproblem: LeastCoreProblem,\n*,\nu: Utility,\nalgorithm: str,\nnon_negative_subsidy: bool = False,\nsolver_options: Optional[dict] = None,\n**options,\n) -> ValuationResult:\n\"\"\"Solves a linear problem as prepared by\n    [mclc_prepare_problem()][pydvl.value.least_core.montecarlo.mclc_prepare_problem].\n    Useful for parallel execution of multiple experiments by running this as a\n    remote task.\n    See [exact_least_core()][pydvl.value.least_core.naive.exact_least_core] or\n    [montecarlo_least_core()][pydvl.value.least_core.montecarlo.montecarlo_least_core] for\n    argument descriptions.\n    \"\"\"\nn = len(u.data)\nif np.any(np.isnan(problem.utility_values)):\nwarnings.warn(\nf\"Calculation returned \"\nf\"{np.sum(np.isnan(problem.utility_values))} NaN \"\nf\"values out of {problem.utility_values.size}\",\nRuntimeWarning,\n)\n# TODO: remove this before releasing version 0.7.0\nif options:\nwarnings.warn(\nDeprecationWarning(\n\"Passing solver options as kwargs was deprecated in \"\n\"0.6.0, will be removed in 0.7.0. `Use solver_options` \"\n\"instead.\"\n)\n)\nif solver_options is None:\nsolver_options = options\nelse:\nsolver_options.update(options)\nif solver_options is None:\nsolver_options = {}\nif \"solver\" not in solver_options:\nsolver_options[\"solver\"] = cp.SCS\nif \"max_iters\" not in solver_options and solver_options[\"solver\"] == cp.SCS:\nsolver_options[\"max_iters\"] = 10000\nlogger.debug(\"Removing possible duplicate values in lower bound array\")\nb_lb = problem.utility_values\nA_lb, unique_indices = np.unique(problem.A_lb, return_index=True, axis=0)\nb_lb = b_lb[unique_indices]\nlogger.debug(\"Building equality constraint\")\nA_eq = np.ones((1, n))\n# We might have already computed the total utility one or more times.\n# This is the index of the row(s) in A_lb with all ones.\ntotal_utility_indices = np.where(A_lb.sum(axis=1) == n)[0]\nif len(total_utility_indices) == 0:\nb_eq = np.array([u(u.data.indices)])\nelse:\nb_eq = b_lb[total_utility_indices]\n# Remove the row(s) corresponding to the total utility\n# from the lower bound constraints\n# because given the equality constraint\n# it is the same as using the constraint e >= 0\n# (i.e. setting non_negative_subsidy = True).\nmask: NDArray[np.bool_] = np.ones_like(b_lb, dtype=bool)\nmask[total_utility_indices] = False\nb_lb = b_lb[mask]\nA_lb = A_lb[mask]\n# Remove the row(s) corresponding to the empty subset\n# because, given u(\u2205) = (which is almost always the case,\n# it is the same as using the constraint e >= 0\n# (i.e. setting non_negative_subsidy = True).\nemptyset_utility_indices = np.where(A_lb.sum(axis=1) == 0)[0]\nif len(emptyset_utility_indices) > 0:\nmask = np.ones_like(b_lb, dtype=bool)\nmask[emptyset_utility_indices] = False\nb_lb = b_lb[mask]\nA_lb = A_lb[mask]\n_, subsidy = _solve_least_core_linear_program(\nA_eq=A_eq,\nb_eq=b_eq,\nA_lb=A_lb,\nb_lb=b_lb,\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n)\nvalues: Optional[NDArray[np.float_]]\nif subsidy is None:\nlogger.debug(\"No values were found\")\nstatus = Status.Failed\nvalues = np.empty(n)\nvalues[:] = np.nan\nsubsidy = np.nan\nelse:\nvalues = _solve_egalitarian_least_core_quadratic_program(\nsubsidy,\nA_eq=A_eq,\nb_eq=b_eq,\nA_lb=A_lb,\nb_lb=b_lb,\nsolver_options=solver_options,\n)\nif values is None:\nlogger.debug(\"No values were found\")\nstatus = Status.Failed\nvalues = np.empty(n)\nvalues[:] = np.nan\nsubsidy = np.nan\nelse:\nstatus = Status.Converged\nreturn ValuationResult(\nalgorithm=algorithm,\nstatus=status,\nvalues=values,\nsubsidy=subsidy,\nstderr=None,\ndata_names=u.data.data_names,\n)\n
    "},{"location":"api/pydvl/value/least_core/common/#pydvl.value.least_core.common.lc_solve_problems","title":"lc_solve_problems(problems, u, algorithm, config=ParallelConfig(), n_jobs=1, non_negative_subsidy=True, solver_options=None, **options)","text":"

    Solves a list of linear problems in parallel.

    PARAMETER DESCRIPTION u

    Utility.

    TYPE: Utility

    problems

    Least Core problems to solve, as returned by mclc_prepare_problem().

    TYPE: Sequence[LeastCoreProblem]

    algorithm

    Name of the valuation algorithm.

    TYPE: str

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    n_jobs

    Number of parallel jobs to run.

    TYPE: int DEFAULT: 1

    non_negative_subsidy

    If True, the least core subsidy \\(e\\) is constrained to be non-negative.

    TYPE: bool DEFAULT: True

    solver_options

    Additional options to pass to the solver.

    TYPE: Optional[dict] DEFAULT: None

    RETURNS DESCRIPTION List[ValuationResult]

    List of solutions.

    Source code in src/pydvl/value/least_core/common.py
    def lc_solve_problems(\nproblems: Sequence[LeastCoreProblem],\nu: Utility,\nalgorithm: str,\nconfig: ParallelConfig = ParallelConfig(),\nn_jobs: int = 1,\nnon_negative_subsidy: bool = True,\nsolver_options: Optional[dict] = None,\n**options,\n) -> List[ValuationResult]:\n\"\"\"Solves a list of linear problems in parallel.\n    Args:\n        u: Utility.\n        problems: Least Core problems to solve, as returned by\n            [mclc_prepare_problem()][pydvl.value.least_core.montecarlo.mclc_prepare_problem].\n        algorithm: Name of the valuation algorithm.\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        n_jobs: Number of parallel jobs to run.\n        non_negative_subsidy: If True, the least core subsidy $e$ is constrained\n            to be non-negative.\n        solver_options: Additional options to pass to the solver.\n    Returns:\n        List of solutions.\n    \"\"\"\ndef _map_func(\nproblems: List[LeastCoreProblem], *args, **kwargs\n) -> List[ValuationResult]:\nreturn [lc_solve_problem(p, *args, **kwargs) for p in problems]\nmap_reduce_job: MapReduceJob[\n\"LeastCoreProblem\", \"List[ValuationResult]\"\n] = MapReduceJob(\ninputs=problems,\nmap_func=_map_func,\nmap_kwargs=dict(\nu=u,\nalgorithm=algorithm,\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n**options,\n),\nreduce_func=lambda x: list(itertools.chain(*x)),\nconfig=config,\nn_jobs=n_jobs,\n)\nsolutions = map_reduce_job()\nreturn solutions\n
    "},{"location":"api/pydvl/value/least_core/montecarlo/","title":"Montecarlo","text":""},{"location":"api/pydvl/value/least_core/montecarlo/#pydvl.value.least_core.montecarlo.montecarlo_least_core","title":"montecarlo_least_core(u, n_iterations, *, n_jobs=1, config=ParallelConfig(), non_negative_subsidy=False, solver_options=None, options=None, progress=False)","text":"

    Computes approximate Least Core values using a Monte Carlo approach.

    \\[ \\begin{array}{lll} \\text{minimize} & \\displaystyle{e} & \\\\ \\text{subject to} & \\displaystyle\\sum_{i\\in N} x_{i} = v(N) & \\\\ & \\displaystyle\\sum_{i\\in S} x_{i} + e \\geq v(S) & , \\forall S \\in \\{S_1, S_2, \\dots, S_m \\overset{\\mathrm{iid}}{\\sim} U(2^N) \\} \\end{array} \\]

    Where:

    • \\(U(2^N)\\) is the uniform distribution over the powerset of \\(N\\).
    • \\(m\\) is the number of subsets that will be sampled and whose utility will be computed and used to compute the data values.
    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    n_iterations

    total number of iterations to use

    TYPE: int

    n_jobs

    number of jobs across which to distribute the computation

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    non_negative_subsidy

    If True, the least core subsidy \\(e\\) is constrained to be non-negative.

    TYPE: bool DEFAULT: False

    solver_options

    Dictionary of options that will be used to select a solver and to configure it. Refer to cvxpy's documentation for all possible options.

    TYPE: Optional[dict] DEFAULT: None

    options

    (Deprecated) Dictionary of solver options. Use solver_options instead.

    TYPE: Optional[dict] DEFAULT: None

    progress

    If True, shows a tqdm progress bar

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the data values and the least core value.

    Source code in src/pydvl/value/least_core/montecarlo.py
    def montecarlo_least_core(\nu: Utility,\nn_iterations: int,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nnon_negative_subsidy: bool = False,\nsolver_options: Optional[dict] = None,\noptions: Optional[dict] = None,\nprogress: bool = False,\n) -> ValuationResult:\nr\"\"\"Computes approximate Least Core values using a Monte Carlo approach.\n    $$\n    \\begin{array}{lll}\n    \\text{minimize} & \\displaystyle{e} & \\\\\n    \\text{subject to} & \\displaystyle\\sum_{i\\in N} x_{i} = v(N) & \\\\\n    & \\displaystyle\\sum_{i\\in S} x_{i} + e \\geq v(S) & ,\n    \\forall S \\in \\{S_1, S_2, \\dots, S_m \\overset{\\mathrm{iid}}{\\sim} U(2^N) \\}\n    \\end{array}\n    $$\n    Where:\n    * $U(2^N)$ is the uniform distribution over the powerset of $N$.\n    * $m$ is the number of subsets that will be sampled and whose utility will\n      be computed and used to compute the data values.\n    Args:\n        u: Utility object with model, data, and scoring function\n        n_iterations: total number of iterations to use\n        n_jobs: number of jobs across which to distribute the computation\n        config: Object configuring parallel computation, with cluster\n            address, number of cpus, etc.\n        non_negative_subsidy: If True, the least core subsidy $e$ is constrained\n            to be non-negative.\n        solver_options: Dictionary of options that will be used to select a solver\n            and to configure it. Refer to [cvxpy's\n            documentation](https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options)\n            for all possible options.\n        options: (Deprecated) Dictionary of solver options. Use solver_options\n            instead.\n        progress: If True, shows a tqdm progress bar\n    Returns:\n        Object with the data values and the least core value.\n    \"\"\"\n# TODO: remove this before releasing version 0.7.0\nif options:\nwarnings.warn(\nDeprecationWarning(\n\"Passing solver options as kwargs was deprecated in \"\n\"0.6.0, will be removed in 0.7.0. `Use solver_options` \"\n\"instead.\"\n)\n)\nif solver_options is None:\nsolver_options = options\nelse:\nsolver_options.update(options)\nproblem = mclc_prepare_problem(\nu, n_iterations, n_jobs=n_jobs, config=config, progress=progress\n)\nreturn lc_solve_problem(\nproblem,\nu=u,\nalgorithm=\"montecarlo_least_core\",\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n)\n
    "},{"location":"api/pydvl/value/least_core/montecarlo/#pydvl.value.least_core.montecarlo.mclc_prepare_problem","title":"mclc_prepare_problem(u, n_iterations, *, n_jobs=1, config=ParallelConfig(), progress=False)","text":"

    Prepares a linear problem by sampling subsets of the data. Use this to separate the problem preparation from the solving with lc_solve_problem(). Useful for parallel execution of multiple experiments.

    See montecarlo_least_core for argument descriptions.

    Source code in src/pydvl/value/least_core/montecarlo.py
    def mclc_prepare_problem(\nu: Utility,\nn_iterations: int,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\n) -> LeastCoreProblem:\n\"\"\"Prepares a linear problem by sampling subsets of the data. Use this to\n    separate the problem preparation from the solving with\n    [lc_solve_problem()][pydvl.value.least_core.common.lc_solve_problem]. Useful\n    for parallel execution of multiple experiments.\n    See\n    [montecarlo_least_core][pydvl.value.least_core.montecarlo.montecarlo_least_core]\n    for argument descriptions.\n    \"\"\"\nn = len(u.data)\nif n_iterations < n:\nwarnings.warn(\nf\"Number of iterations '{n_iterations}' is smaller the size of the dataset '{n}'. \"\nf\"This is not optimal because in the worst case we need at least '{n}' constraints \"\n\"to satisfy the individual rationality condition.\"\n)\nif n_iterations > 2**n:\nwarnings.warn(\nf\"Passed n_iterations is greater than the number subsets! \"\nf\"Setting it to 2^{n}\",\nRuntimeWarning,\n)\nn_iterations = 2**n\niterations_per_job = max(1, n_iterations // effective_n_jobs(n_jobs, config))\nmap_reduce_job: MapReduceJob[\"Utility\", \"LeastCoreProblem\"] = MapReduceJob(\ninputs=u,\nmap_func=_montecarlo_least_core,\nreduce_func=_reduce_func,\nmap_kwargs=dict(n_iterations=iterations_per_job, progress=progress),\nn_jobs=n_jobs,\nconfig=config,\n)\nreturn map_reduce_job()\n
    "},{"location":"api/pydvl/value/least_core/naive/","title":"Naive","text":""},{"location":"api/pydvl/value/least_core/naive/#pydvl.value.least_core.naive.exact_least_core","title":"exact_least_core(u, *, non_negative_subsidy=False, solver_options=None, options=None, progress=True)","text":"

    Computes the exact Least Core values.

    Note

    If the training set contains more than 20 instances a warning is printed because the computation is very expensive. This method is mostly used for internal testing and simple use cases. Please refer to the Monte Carlo method for practical applications.

    The least core is the solution to the following Linear Programming problem:

    \\[ \\begin{array}{lll} \\text{minimize} & \\displaystyle{e} & \\\\ \\text{subject to} & \\displaystyle\\sum_{i\\in N} x_{i} = v(N) & \\\\ & \\displaystyle\\sum_{i\\in S} x_{i} + e \\geq v(S) &, \\forall S \\subseteq N \\\\ \\end{array} \\]

    Where \\(N = \\{1, 2, \\dots, n\\}\\) are the training set's indices.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function non_negative_subsidy: If True, the least core subsidy \\(e\\) is constrained to be non-negative.

    TYPE: Utility

    solver_options

    Dictionary of options that will be used to select a solver and to configure it. Refer to the cvxpy's documentation for all possible options.

    TYPE: Optional[dict] DEFAULT: None

    options

    (Deprecated) Dictionary of solver options. Use solver_options instead.

    TYPE: Optional[dict] DEFAULT: None

    progress

    If True, shows a tqdm progress bar

    TYPE: bool DEFAULT: True

    RETURNS DESCRIPTION ValuationResult

    Object with the data values and the least core value.

    Source code in src/pydvl/value/least_core/naive.py
    def exact_least_core(\nu: Utility,\n*,\nnon_negative_subsidy: bool = False,\nsolver_options: Optional[dict] = None,\noptions: Optional[dict] = None,\nprogress: bool = True,\n) -> ValuationResult:\nr\"\"\"Computes the exact Least Core values.\n    !!! Note\n        If the training set contains more than 20 instances a warning is printed\n        because the computation is very expensive. This method is mostly used for\n        internal testing and simple use cases. Please refer to the\n        [Monte Carlo method][pydvl.value.least_core.montecarlo.montecarlo_least_core]\n        for practical applications.\n    The least core is the solution to the following Linear Programming problem:\n    $$\n    \\begin{array}{lll}\n    \\text{minimize} & \\displaystyle{e} & \\\\\n    \\text{subject to} & \\displaystyle\\sum_{i\\in N} x_{i} = v(N) & \\\\\n    & \\displaystyle\\sum_{i\\in S} x_{i} + e \\geq v(S) &, \\forall S \\subseteq N \\\\\n    \\end{array}\n    $$\n    Where $N = \\{1, 2, \\dots, n\\}$ are the training set's indices.\n    Args:\n        u: Utility object with model, data, and scoring function\n            non_negative_subsidy: If True, the least core subsidy $e$ is constrained\n            to be non-negative.\n        solver_options: Dictionary of options that will be used to select a solver\n            and to configure it. Refer to the [cvxpy's\n            documentation](https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options)\n            for all possible options.\n        options: (Deprecated) Dictionary of solver options. Use `solver_options`\n            instead.\n        progress: If True, shows a tqdm progress bar\n    Returns:\n        Object with the data values and the least core value.\n    \"\"\"\nn = len(u.data)\nif n > 20:  # Arbitrary choice, will depend on time required, caching, etc.\nwarnings.warn(f\"Large dataset! Computation requires 2^{n} calls to model.fit()\")\n# TODO: remove this before releasing version 0.7.0\nif options:\nwarnings.warn(\nDeprecationWarning(\n\"Passing solver options as kwargs was deprecated in \"\n\"0.6.0, will \"\n\"be removed in 0.7.0. `Use solver_options` instead.\"\n)\n)\nif solver_options is None:\nsolver_options = options\nelse:\nsolver_options.update(options)\nproblem = lc_prepare_problem(u, progress=progress)\nreturn lc_solve_problem(\nproblem=problem,\nu=u,\nalgorithm=\"exact_least_core\",\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n)\n
    "},{"location":"api/pydvl/value/least_core/naive/#pydvl.value.least_core.naive.lc_prepare_problem","title":"lc_prepare_problem(u, progress=False)","text":"

    Prepares a linear problem with all subsets of the data Use this to separate the problem preparation from the solving with lc_solve_problem(). Useful for parallel execution of multiple experiments.

    See exact_least_core() for argument descriptions.

    Source code in src/pydvl/value/least_core/naive.py
    def lc_prepare_problem(u: Utility, progress: bool = False) -> LeastCoreProblem:\n\"\"\"Prepares a linear problem with all subsets of the data\n    Use this to separate the problem preparation from the solving with\n    [lc_solve_problem()][pydvl.value.least_core.common.lc_solve_problem]. Useful for\n    parallel execution of multiple experiments.\n    See [exact_least_core()][pydvl.value.least_core.naive.exact_least_core] for argument\n    descriptions.\n    \"\"\"\nn = len(u.data)\nlogger.debug(\"Building vectors and matrices for linear programming problem\")\npowerset_size = 2**n\nA_lb = np.zeros((powerset_size, n))\nlogger.debug(\"Iterating over all subsets\")\nutility_values = np.zeros(powerset_size)\nfor i, subset in enumerate(\nmaybe_progress(\npowerset(u.data.indices), progress, total=powerset_size - 1, position=0\n)\n):\nindices: NDArray[np.bool_] = np.zeros(n, dtype=bool)\nindices[list(subset)] = True\nA_lb[i, indices] = 1\nutility_values[i] = u(subset)\nreturn LeastCoreProblem(utility_values, A_lb)\n
    "},{"location":"api/pydvl/value/loo/","title":"Loo","text":""},{"location":"api/pydvl/value/loo/loo/","title":"Loo","text":""},{"location":"api/pydvl/value/loo/loo/#pydvl.value.loo.loo.compute_loo","title":"compute_loo(u, *, n_jobs=1, config=ParallelConfig(), progress=True)","text":"

    Computes leave one out value:

    \\[v(i) = u(D) - u(D \\setminus \\{i\\}) \\] PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    progress

    If True, display a progress bar

    TYPE: bool DEFAULT: True

    n_jobs

    Number of parallel jobs to use

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    If True, display a progress bar

    TYPE: bool DEFAULT: True

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    New in version 0.7.0

    Renamed from naive_loo and added parallel computation.

    Source code in src/pydvl/value/loo/loo.py
    def compute_loo(\nu: Utility,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = True,\n) -> ValuationResult:\nr\"\"\"Computes leave one out value:\n    $$v(i) = u(D) - u(D \\setminus \\{i\\}) $$\n    Args:\n        u: Utility object with model, data, and scoring function\n        progress: If True, display a progress bar\n        n_jobs: Number of parallel jobs to use\n        config: Object configuring parallel computation, with cluster\n            address, number of cpus, etc.\n        progress: If True, display a progress bar\n    Returns:\n        Object with the data values.\n    !!! tip \"New in version 0.7.0\"\n        Renamed from `naive_loo` and added parallel computation.\n    \"\"\"\nif len(u.data) < 3:\nraise ValueError(\"Dataset must have at least 2 elements\")\nresult = ValuationResult.zeros(\nalgorithm=\"loo\",\nindices=u.data.indices,\ndata_names=u.data.data_names,\n)\nall_indices = set(u.data.indices)\ntotal_utility = u(u.data.indices)\ndef fun(idx: int) -> tuple[int, float]:\nreturn idx, total_utility - u(all_indices.difference({idx}))\nmax_workers = effective_n_jobs(n_jobs, config)\nn_submitted_jobs = 2 * max_workers  # number of jobs in the queue\n# NOTE: this could be done with a simple executor.map(), but we want to\n# display a progress bar\nwith init_executor(\nmax_workers=max_workers, config=config, cancel_futures=True\n) as executor:\npending: set[Future] = set()\nindex_it = iter(u.data.indices)\npbar = tqdm(disable=not progress, total=100, unit=\"%\")\nwhile True:\npbar.n = 100 * sum(result.counts) / len(u.data)\npbar.refresh()\ncompleted, pending = wait(pending, timeout=0.1, return_when=FIRST_COMPLETED)\nfor future in completed:\nidx, marginal = future.result()\nresult.update(idx, marginal)\n# Ensure that we always have n_submitted_jobs running\ntry:\nfor _ in range(n_submitted_jobs - len(pending)):\npending.add(executor.submit(fun, next(index_it)))\nexcept StopIteration:\nif len(pending) == 0:\nreturn result\n
    "},{"location":"api/pydvl/value/loo/naive/","title":"Naive","text":""},{"location":"api/pydvl/value/loo/naive/#pydvl.value.loo.naive.naive_loo","title":"naive_loo(u, *, progress=True, **kwargs)","text":"

    Deprecated. Use compute_loo instead.

    Source code in src/pydvl/value/loo/naive.py
    @deprecated(\ntarget=compute_loo,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.8.0\",\nargs_extra=dict(n_jobs=1),\n)\ndef naive_loo(u: Utility, *, progress: bool = True, **kwargs) -> ValuationResult:\n\"\"\"Deprecated. Use [compute_loo][pydvl.value.loo.compute_loo] instead.\"\"\"\npass  # type: ignore\n
    "},{"location":"api/pydvl/value/oob/","title":"Oob","text":""},{"location":"api/pydvl/value/oob/oob/","title":"Oob","text":""},{"location":"api/pydvl/value/oob/oob/#pydvl.value.oob.oob--references","title":"References","text":"
    1. Kwon et al. Data-OOB: Out-of-bag Estimate as a Simple and Efficient Data Value. In: Published at ICML 2023\u00a0\u21a9

    "},{"location":"api/pydvl/value/oob/oob/#pydvl.value.oob.oob.compute_data_oob","title":"compute_data_oob(u, n_est=10, max_samples=0.8, n_jobs=None, loss=None, *, progress=False)","text":"

    Computes Data out of bag values

    This implements the method described in (Kwon and Zou, 2023) 1. It fits several base estimators provided through u.model through a bagging process. The point value corresponds to the average loss of estimators which were not fit on it.

    \\(w_{bj}\\in Z\\) is the number of times the j-th datum \\((x_j, y_j)\\) is selected in the b-th bootstrap dataset.

    \\[\\psi((x_i,y_i),\\Theta_B):=\\frac{\\sum_{b=1}^{B}\\mathbb{1}(w_{bi}=0)T(y_i, \\hat{f}_b(x_i))}{\\sum_{b=1}^{B} \\mathbb{1} (w_{bi}=0)}\\]

    With:

    \\[ T: Y \\times Y \\rightarrow \\mathbb{R} \\]

    T is a score function that represents the goodness of a weak learner \\(\\hat{f}_b\\) at the i-th datum \\((x_i, y_i)\\).

    There is a need to tune n_est and max_samples jointly to ensure all samples are at least 1 time oob, otherwise the result could include a nan value for that datum.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    n_est

    Number of estimator used in the bagging procedure.

    TYPE: int DEFAULT: 10

    max_samples

    The fraction of samples to draw to train each base estimator.

    TYPE: float DEFAULT: 0.8

    n_jobs

    The number of jobs to run in parallel used in the bagging procedure for both fit and predict.

    TYPE: int DEFAULT: None

    loss

    A function taking as parameters model prediction and corresponding data labels(preds, y) and returning an array of point-wise errors.

    TYPE: Callable DEFAULT: None

    progress

    If True, display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    Source code in src/pydvl/value/oob/oob.py
    def compute_data_oob(\nu: Utility,\nn_est: int = 10,\nmax_samples: float = 0.8,\nn_jobs: int = None,\nloss: Callable = None,\n*,\nprogress: bool = False,\n) -> ValuationResult:\nr\"\"\"Computes Data out of bag values\n    This implements the method described in (Kwon and Zou, 2023) <sup><a href=\"kwon_data_2023\">1</a></sup>.\n    It fits several base estimators provided through u.model through a bagging process. The point value corresponds to the average loss of estimators which were not fit on it.\n    $w_{bj}\\in Z$ is the number of times the j-th datum $(x_j, y_j)$ is selected in the b-th bootstrap dataset.\n    $$\\psi((x_i,y_i),\\Theta_B):=\\frac{\\sum_{b=1}^{B}\\mathbb{1}(w_{bi}=0)T(y_i, \\hat{f}_b(x_i))}{\\sum_{b=1}^{B}\n    \\mathbb{1}\n    (w_{bi}=0)}$$\n    With:\n    $$\n    T: Y \\times Y\n    \\rightarrow \\mathbb{R}\n    $$\n    T is a score function that represents the goodness of a weak learner $\\hat{f}_b$ at the i-th datum $(x_i, y_i)$.\n    There is a need to tune n_est and max_samples jointly to ensure all samples are at least 1 time oob, otherwise the result could include a nan value for that datum.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        n_est: Number of estimator used in the bagging procedure.\n        max_samples: The fraction of samples to draw to train each base estimator.\n        n_jobs: The number of jobs to run in parallel used in the bagging\n            procedure for both fit and predict.\n        loss: A function taking as parameters model prediction and corresponding\n            data labels(preds, y) and returning an array of point-wise errors.\n        progress: If True, display a progress bar.\n    Returns:\n        Object with the data values.\n    \"\"\"\nresult: ValuationResult[np.int_, np.object_] = ValuationResult.empty(\nalgorithm=\"data_oob\", indices=u.data.indices, data_names=u.data.data_names\n)\nif is_classifier(u.model):\nbag = BaggingClassifier(\nu.model, n_estimators=n_est, max_samples=max_samples, n_jobs=n_jobs\n)\nif loss is None:\nloss = point_wise_accuracy\nelif is_regressor(u.model):\nbag = BaggingRegressor(\nu.model, n_estimators=n_est, max_samples=max_samples, n_jobs=n_jobs\n)\nif loss is None:\nloss = neg_l2_distance\nelse:\nraise Exception(\n\"Model has to be a classifier or a regressor in sklearn format.\"\n)\nbag.fit(u.data.x_train, u.data.y_train)\nfor est, samples in maybe_progress(\nzip(bag.estimators_, bag.estimators_samples_), progress, total=n_est\n):  # The bottleneck is the bag fitting not this part so TQDM is not very useful here\noob_idx = np.setxor1d(u.data.indices, np.unique(samples))\narray_loss = loss(\npreds=est.predict(u.data.x_train[oob_idx]), y=u.data.y_train[oob_idx]\n)\nresult += ValuationResult(\nalgorithm=\"data_oob\",\nindices=oob_idx,\nvalues=array_loss,\ncounts=np.ones_like(array_loss, dtype=u.data.indices.dtype),\n)\nreturn result\n
    "},{"location":"api/pydvl/value/oob/oob/#pydvl.value.oob.oob.point_wise_accuracy","title":"point_wise_accuracy(preds, y)","text":"

    Computes point wise accuracy

    PARAMETER DESCRIPTION preds

    Model prediction on

    TYPE: NDArray

    y

    data labels corresponding to the model predictions

    TYPE: NDArray

    RETURNS DESCRIPTION NDArray

    Array of point wise accuracy

    Source code in src/pydvl/value/oob/oob.py
    def point_wise_accuracy(preds: NDArray, y: NDArray) -> NDArray:\nr\"\"\"Computes point wise accuracy\n    Args:\n        preds: Model prediction on\n        y:  data labels corresponding to the model predictions\n    Returns:\n        Array of point wise accuracy\n    \"\"\"\nreturn np.array(preds == y, dtype=np.int_)\n
    "},{"location":"api/pydvl/value/oob/oob/#pydvl.value.oob.oob.neg_l2_distance","title":"neg_l2_distance(preds, y)","text":"

    Computes negative l2 distance between label and model prediction

    PARAMETER DESCRIPTION preds

    Model prediction on

    TYPE: NDArray[T]

    y

    data labels corresponding to the model predictions

    TYPE: NDArray[T]

    RETURNS DESCRIPTION NDArray[T]

    Array with point wise negative l2 distance between label and model prediction

    Source code in src/pydvl/value/oob/oob.py
    def neg_l2_distance(preds: NDArray[T], y: NDArray[T]) -> NDArray[T]:\nr\"\"\"Computes negative l2 distance between label and model prediction\n    Args:\n        preds: Model prediction on\n        y:  data labels corresponding to the model predictions\n    Returns:\n        Array with point wise negative l2 distance between label and model prediction\n    \"\"\"\nreturn -np.square(\nnp.array(\npreds - y,\ndtype=np.float64,\n)\n)\n
    "},{"location":"api/pydvl/value/shapley/","title":"Shapley","text":"

    This package holds all routines for the computation of Shapley Data value. Users will want to use compute_shapley_values or compute_semivalues as interfaces to most methods defined in the modules.

    Please refer to the guide on data valuation for an overview of all methods.

    "},{"location":"api/pydvl/value/shapley/common/","title":"Common","text":""},{"location":"api/pydvl/value/shapley/common/#pydvl.value.shapley.common.compute_shapley_values","title":"compute_shapley_values(u, *, done=MaxUpdates(100), mode=ShapleyMode.TruncatedMontecarlo, n_jobs=1, seed=None, **kwargs)","text":"

    Umbrella method to compute Shapley values with any of the available algorithms.

    See [[data-valuation]] for an overview.

    The following algorithms are available. Note that the exact methods can only work with very small datasets and are thus intended only for testing. Some algorithms also accept additional arguments, please refer to the documentation of each particular method.

    • combinatorial_exact: uses the combinatorial implementation of data Shapley. Implemented in combinatorial_exact_shapley().
    • combinatorial_montecarlo: uses the approximate Monte Carlo implementation of combinatorial data Shapley. Implemented in combinatorial_montecarlo_shapley().
    • permutation_exact: uses the permutation-based implementation of data Shapley. Computation is not parallelized. Implemented in permutation_exact_shapley().
    • permutation_montecarlo: uses the approximate Monte Carlo implementation of permutation data Shapley. Accepts a TruncationPolicy to stop computing marginals. Implemented in permutation_montecarlo_shapley().
    • owen_sampling: Uses the Owen continuous extension of the utility function to the unit cube. Implemented in owen_sampling_shapley(). This method does not take a StoppingCriterion but instead requires a parameter q_max for the number of subdivisions of the unit interval to use for integration, and another parameter n_samples for the number of subsets to sample for each \\(q\\).
    • owen_halved: Same as 'owen_sampling' but uses correlated samples in the expectation. Implemented in owen_sampling_shapley(). This method requires an additional parameter q_max for the number of subdivisions of the interval [0,0.5] to use for integration, and another parameter n_samples for the number of subsets to sample for each \\(q\\).
    • group_testing: estimates differences of Shapley values and solves a constraint satisfaction problem. High sample complexity, not recommended. Implemented in group_testing_shapley(). This method does not take a StoppingCriterion but instead requires a parameter n_samples for the number of iterations to run.

    Additionally, one can use model-specific methods:

    • knn: Exact method for K-Nearest neighbour models. Implemented in knn_shapley().
    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    done

    Object used to determine when to stop the computation for Monte Carlo methods. The default is to stop after 100 iterations. See the available criteria in stopping. It is possible to combine several of them using boolean operators. Some methods ignore this argument, others require specific subtypes.

    TYPE: StoppingCriterion DEFAULT: MaxUpdates(100)

    n_jobs

    Number of parallel jobs (available only to some methods)

    TYPE: int DEFAULT: 1

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    mode

    Choose which shapley algorithm to use. See ShapleyMode for a list of allowed value.

    TYPE: ShapleyMode DEFAULT: TruncatedMontecarlo

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Source code in src/pydvl/value/shapley/common.py
    def compute_shapley_values(\nu: Utility,\n*,\ndone: StoppingCriterion = MaxUpdates(100),\nmode: ShapleyMode = ShapleyMode.TruncatedMontecarlo,\nn_jobs: int = 1,\nseed: Optional[Seed] = None,\n**kwargs,\n) -> ValuationResult:\n\"\"\"Umbrella method to compute Shapley values with any of the available\n    algorithms.\n    See [[data-valuation]] for an overview.\n    The following algorithms are available. Note that the exact methods can only\n    work with very small datasets and are thus intended only for testing. Some\n    algorithms also accept additional arguments, please refer to the\n    documentation of each particular method.\n    - `combinatorial_exact`: uses the combinatorial implementation of data\n      Shapley. Implemented in\n      [combinatorial_exact_shapley()][pydvl.value.shapley.naive.combinatorial_exact_shapley].\n    - `combinatorial_montecarlo`:  uses the approximate Monte Carlo\n      implementation of combinatorial data Shapley. Implemented in\n      [combinatorial_montecarlo_shapley()][pydvl.value.shapley.montecarlo.combinatorial_montecarlo_shapley].\n    - `permutation_exact`: uses the permutation-based implementation of data\n      Shapley. Computation is **not parallelized**. Implemented in\n      [permutation_exact_shapley()][pydvl.value.shapley.naive.permutation_exact_shapley].\n    - `permutation_montecarlo`: uses the approximate Monte Carlo\n      implementation of permutation data Shapley. Accepts a\n      [TruncationPolicy][pydvl.value.shapley.truncated.TruncationPolicy] to stop\n      computing marginals. Implemented in\n      [permutation_montecarlo_shapley()][pydvl.value.shapley.montecarlo.permutation_montecarlo_shapley].\n    - `owen_sampling`: Uses the Owen continuous extension of the utility\n      function to the unit cube. Implemented in\n      [owen_sampling_shapley()][pydvl.value.shapley.owen.owen_sampling_shapley]. This\n      method does not take a [StoppingCriterion][pydvl.value.stopping.StoppingCriterion]\n      but instead requires a parameter `q_max` for the number of subdivisions\n      of the unit interval to use for integration, and another parameter\n      `n_samples` for the number of subsets to sample for each $q$.\n    - `owen_halved`: Same as 'owen_sampling' but uses correlated samples in the\n      expectation. Implemented in\n      [owen_sampling_shapley()][pydvl.value.shapley.owen.owen_sampling_shapley].\n      This method  requires an additional parameter `q_max` for the number of\n      subdivisions of the interval [0,0.5] to use for integration, and another\n      parameter `n_samples` for the number of subsets to sample for each $q$.\n    - `group_testing`: estimates differences of Shapley values and solves a\n      constraint satisfaction problem. High sample complexity, not recommended.\n      Implemented in [group_testing_shapley()][pydvl.value.shapley.gt.group_testing_shapley]. This\n      method does not take a [StoppingCriterion][pydvl.value.stopping.StoppingCriterion]\n      but instead requires a parameter `n_samples` for the number of\n      iterations to run.\n    Additionally, one can use model-specific methods:\n    - `knn`: Exact method for K-Nearest neighbour models. Implemented in\n      [knn_shapley()][pydvl.value.shapley.knn.knn_shapley].\n    Args:\n        u: [Utility][pydvl.utils.utility.Utility] object with model, data, and\n            scoring function.\n        done: Object used to determine when to stop the computation for Monte\n            Carlo methods. The default is to stop after 100 iterations. See the\n            available criteria in [stopping][pydvl.value.stopping]. It is\n            possible to combine several of them using boolean operators. Some\n            methods ignore this argument, others require specific subtypes.\n        n_jobs: Number of parallel jobs (available only to some methods)\n        seed: Either an instance of a numpy random number generator or a seed\n            for it.\n        mode: Choose which shapley algorithm to use. See\n            [ShapleyMode][pydvl.value.shapley.ShapleyMode] for a list of allowed\n            value.\n    Returns:\n        Object with the results.\n    \"\"\"\nprogress: bool = kwargs.pop(\"progress\", False)\nif mode not in list(ShapleyMode):\nraise ValueError(f\"Invalid value encountered in {mode=}\")\nif mode in (\nShapleyMode.PermutationMontecarlo,\nShapleyMode.ApproShapley,\nShapleyMode.TruncatedMontecarlo,\n):\ntruncation = kwargs.pop(\"truncation\", NoTruncation())\nreturn permutation_montecarlo_shapley(  # type: ignore\nu=u, done=done, truncation=truncation, n_jobs=n_jobs, seed=seed, **kwargs\n)\nelif mode == ShapleyMode.CombinatorialMontecarlo:\nreturn combinatorial_montecarlo_shapley(\nu, done=done, n_jobs=n_jobs, seed=seed, progress=progress\n)\nelif mode == ShapleyMode.CombinatorialExact:\nreturn combinatorial_exact_shapley(u, n_jobs=n_jobs, progress=progress)\nelif mode == ShapleyMode.PermutationExact:\nreturn permutation_exact_shapley(u, progress=progress)\nelif mode == ShapleyMode.Owen or mode == ShapleyMode.OwenAntithetic:\nif kwargs.get(\"n_samples\") is None:\nraise ValueError(\"n_samples cannot be None for Owen methods\")\nif kwargs.get(\"max_q\") is None:\nraise ValueError(\"Owen Sampling requires max_q for the outer integral\")\nmethod = (\nOwenAlgorithm.Standard\nif mode == ShapleyMode.Owen\nelse OwenAlgorithm.Antithetic\n)\nreturn owen_sampling_shapley(\nu,\nn_samples=int(kwargs.get(\"n_samples\", -1)),\nmax_q=int(kwargs.get(\"max_q\", -1)),\nmethod=method,\nn_jobs=n_jobs,\nseed=seed,\n)\nelif mode == ShapleyMode.KNN:\nreturn knn_shapley(u, progress=progress)\nelif mode == ShapleyMode.GroupTesting:\nn_samples = kwargs.pop(\"n_samples\")\nif n_samples is None:\nraise ValueError(\"n_samples cannot be None for Group Testing\")\nepsilon = kwargs.pop(\"epsilon\")\nif epsilon is None:\nraise ValueError(\"Group Testing requires error bound epsilon\")\ndelta = kwargs.pop(\"delta\", 0.05)\nreturn group_testing_shapley(\nu,\nepsilon=float(epsilon),\ndelta=delta,\nn_samples=int(n_samples),\nn_jobs=n_jobs,\nprogress=progress,\nseed=seed,\n**kwargs,\n)\nelse:\nraise ValueError(f\"Invalid value encountered in {mode=}\")\n
    "},{"location":"api/pydvl/value/shapley/gt/","title":"Gt","text":"

    This module implements Group Testing for the approximation of Shapley values, as introduced in (Jia, R. et al., 2019)1. The sampling of index subsets is done in such a way that an approximation to the true Shapley values can be computed with guarantees.

    Warning

    This method is very inefficient. Potential improvements to the implementation notwithstanding, convergence seems to be very slow (in terms of evaluations of the utility required). We recommend other Monte Carlo methods instead.

    You can read more in the documentation.

    New in version 0.4.0

    "},{"location":"api/pydvl/value/shapley/gt/#pydvl.value.shapley.gt--references","title":"References","text":"
    1. Jia, R. et al., 2019. Towards Efficient Data Valuation Based on the Shapley Value. In: Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics, pp. 1167\u20131176. PMLR.\u00a0\u21a9

    "},{"location":"api/pydvl/value/shapley/gt/#pydvl.value.shapley.gt.num_samples_eps_delta","title":"num_samples_eps_delta(eps, delta, n, utility_range)","text":"

    Implements the formula in Theorem 3 of (Jia, R. et al., 2019)1 which gives a lower bound on the number of samples required to obtain an (\u03b5/\u221an,\u03b4/(N(N-1))-approximation to all pair-wise differences of Shapley values, wrt. \\(\\ell_2\\) norm.

    PARAMETER DESCRIPTION eps

    \u03b5

    TYPE: float

    delta

    \u03b4

    TYPE: float

    n

    Number of data points

    TYPE: int

    utility_range

    Range of the Utility function

    TYPE: float

    Returns: Number of samples from \\(2^{[n]}\\) guaranteeing \u03b5/\u221an-correct Shapley pair-wise differences of values with probability 1-\u03b4/(N(N-1)).

    New in version 0.4.0

    Source code in src/pydvl/value/shapley/gt.py
    def num_samples_eps_delta(\neps: float, delta: float, n: int, utility_range: float\n) -> int:\nr\"\"\"Implements the formula in Theorem 3 of (Jia, R. et al., 2019)<sup><a href=\"#jia_efficient_2019\">1</a></sup>\n    which gives a lower bound on the number of samples required to obtain an\n    (\u03b5/\u221an,\u03b4/(N(N-1))-approximation to all pair-wise differences of Shapley\n    values, wrt. $\\ell_2$ norm.\n    Args:\n        eps: \u03b5\n        delta: \u03b4\n        n: Number of data points\n        utility_range: Range of the [Utility][pydvl.utils.utility.Utility] function\n    Returns:\n        Number of samples from $2^{[n]}$ guaranteeing \u03b5/\u221an-correct Shapley\n            pair-wise differences of values with probability 1-\u03b4/(N(N-1)).\n    !!! tip \"New in version 0.4.0\"\n    \"\"\"\nconstants = _constants(n=n, epsilon=eps, delta=delta, utility_range=utility_range)\nreturn int(constants.T)\n
    "},{"location":"api/pydvl/value/shapley/gt/#pydvl.value.shapley.gt.group_testing_shapley","title":"group_testing_shapley(u, n_samples, epsilon, delta, *, n_jobs=1, config=ParallelConfig(), progress=False, seed=None, **options)","text":"

    Implements group testing for approximation of Shapley values as described in (Jia, R. et al., 2019)1.

    Warning

    This method is very inefficient. It requires several orders of magnitude more evaluations of the utility than others in montecarlo. It also uses several intermediate objects like the results from the runners and the constraint matrices which can become rather large.

    By picking a specific distribution over subsets, the differences in Shapley values can be approximated with a Monte Carlo sum. These are then used to solve for the individual values in a feasibility problem.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    n_samples

    Number of tests to perform. Use num_samples_eps_delta to estimate this.

    TYPE: int

    epsilon

    From the (\u03b5,\u03b4) sample bound. Use the same as for the estimation of n_iterations.

    TYPE: float

    delta

    From the (\u03b5,\u03b4) sample bound. Use the same as for the estimation of n_iterations.

    TYPE: float

    n_jobs

    Number of parallel jobs to use. Each worker performs a chunk of all tests (i.e. utility evaluations).

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display progress bars for each job.

    TYPE: bool DEFAULT: False

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    options

    Additional options to pass to cvxpy.Problem.solve(). E.g. to change the solver (which defaults to cvxpy.SCS) pass solver=cvxpy.CVXOPT.

    DEFAULT: {}

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    New in version 0.4.0

    Changed in version 0.5.0

    Changed the solver to cvxpy instead of scipy's linprog. Added the ability to pass arbitrary options to it.

    Source code in src/pydvl/value/shapley/gt.py
    def group_testing_shapley(\nu: Utility,\nn_samples: int,\nepsilon: float,\ndelta: float,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None,\n**options,\n) -> ValuationResult:\n\"\"\"Implements group testing for approximation of Shapley values as described\n    in (Jia, R. et al., 2019)<sup><a href=\"#jia_efficient_2019\">1</a></sup>.\n    !!! Warning\n        This method is very inefficient. It requires several orders of magnitude\n        more evaluations of the utility than others in\n        [montecarlo][pydvl.value.shapley.montecarlo]. It also uses several intermediate\n        objects like the results from the runners and the constraint matrices\n        which can become rather large.\n    By picking a specific distribution over subsets, the differences in Shapley\n    values can be approximated with a Monte Carlo sum. These are then used to\n    solve for the individual values in a feasibility problem.\n    Args:\n        u: Utility object with model, data, and scoring function\n        n_samples: Number of tests to perform. Use\n            [num_samples_eps_delta][pydvl.value.shapley.gt.num_samples_eps_delta]\n            to estimate this.\n        epsilon: From the (\u03b5,\u03b4) sample bound. Use the same as for the\n            estimation of `n_iterations`.\n        delta: From the (\u03b5,\u03b4) sample bound. Use the same as for the\n            estimation of `n_iterations`.\n        n_jobs: Number of parallel jobs to use. Each worker performs a chunk\n            of all tests (i.e. utility evaluations).\n        config: Object configuring parallel computation, with cluster\n            address, number of cpus, etc.\n        progress: Whether to display progress bars for each job.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n        options: Additional options to pass to\n            [cvxpy.Problem.solve()](https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options).\n            E.g. to change the solver (which defaults to `cvxpy.SCS`) pass\n            `solver=cvxpy.CVXOPT`.\n    Returns:\n        Object with the data values.\n    !!! tip \"New in version 0.4.0\"\n    !!! tip \"Changed in version 0.5.0\"\n        Changed the solver to cvxpy instead of scipy's linprog. Added the ability\n        to pass arbitrary options to it.\n    \"\"\"\nn = len(u.data.indices)\nconst = _constants(\nn=n,\nepsilon=epsilon,\ndelta=delta,\nutility_range=u.score_range.max() - u.score_range.min(),\n)\nT = n_samples\nif T < const.T:\nlog.warning(\nf\"n_samples of {T} are below the required {const.T} for the \"\nf\"\u03b5={epsilon:.02f} guarantee at \u03b4={1 - delta:.02f} probability\"\n)\nsamples_per_job = max(1, n_samples // effective_n_jobs(n_jobs, config))\ndef reducer(\nresults_it: Iterable[Tuple[NDArray, NDArray]]\n) -> Tuple[NDArray, NDArray]:\nreturn np.concatenate(list(x[0] for x in results_it)).astype(\nnp.float_\n), np.concatenate(list(x[1] for x in results_it)).astype(np.int_)\nseed_sequence = ensure_seed_sequence(seed)\nmap_reduce_seed_sequence, cvxpy_seed = tuple(seed_sequence.spawn(2))\nmap_reduce_job: MapReduceJob[Utility, Tuple[NDArray, NDArray]] = MapReduceJob(\nu,\nmap_func=_group_testing_shapley,\nreduce_func=reducer,\nmap_kwargs=dict(n_samples=samples_per_job, progress=progress),\nconfig=config,\nn_jobs=n_jobs,\n)\nuu, betas = map_reduce_job(seed=map_reduce_seed_sequence)\n# Matrix of estimated differences. See Eqs. (3) and (4) in the paper.\nC = np.zeros(shape=(n, n))\nfor i in range(n):\nfor j in range(i + 1, n):\nC[i, j] = np.dot(uu, betas[:, i] - betas[:, j])\nC *= const.Z / T\ntotal_utility = u(u.data.indices)\n###########################################################################\n# Solution of the constraint problem with cvxpy\nv = cp.Variable(n)\nconstraints = [cp.sum(v) == total_utility]\nfor i in range(n):\nfor j in range(i + 1, n):\nconstraints.append(v[i] - v[j] <= epsilon + C[i, j])\nconstraints.append(v[j] - v[i] <= epsilon - C[i, j])\nproblem = cp.Problem(cp.Minimize(0), constraints)\nsolver = options.pop(\"solver\", cp.SCS)\nproblem.solve(solver=solver, **options)\nif problem.status != \"optimal\":\nlog.warning(f\"cvxpy returned status {problem.status}\")\nvalues = (\nnp.nan * np.ones_like(u.data.indices)\nif not hasattr(v.value, \"__len__\")\nelse v.value\n)\nstatus = Status.Failed\nelse:\nvalues = v.value\nstatus = Status.Converged\nreturn ValuationResult(\nalgorithm=\"group_testing_shapley\",\nstatus=status,\nvalues=values,\ndata_names=u.data.data_names,\nsolver_status=problem.status,\n)\n
    "},{"location":"api/pydvl/value/shapley/knn/","title":"Knn","text":"

    This module contains Shapley computations for K-Nearest Neighbours.

    Todo

    Implement approximate KNN computation for sublinear complexity

    "},{"location":"api/pydvl/value/shapley/knn/#pydvl.value.shapley.knn--references","title":"References","text":"
    1. Jia, R. et al., 2019. Efficient Task-Specific Data Valuation for Nearest Neighbor Algorithms. In: Proceedings of the VLDB Endowment, Vol. 12, No. 11, pp. 1610\u20131623.\u00a0\u21a9

    "},{"location":"api/pydvl/value/shapley/knn/#pydvl.value.shapley.knn.knn_shapley","title":"knn_shapley(u, *, progress=True)","text":"

    Computes exact Shapley values for a KNN classifier.

    This implements the method described in (Jia, R. et al., 2019)1. It exploits the local structure of K-Nearest Neighbours to reduce the number of calls to the utility function to a constant number per index, thus reducing computation time to \\(O(n)\\).

    PARAMETER DESCRIPTION u

    Utility with a KNN model to extract parameters from. The object will not be modified nor used other than to call get_params()

    TYPE: Utility

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: True

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    RAISES DESCRIPTION TypeError

    If the model in the utility is not a sklearn.neighbors.KNeighborsClassifier.

    New in version 0.1.0

    Source code in src/pydvl/value/shapley/knn.py
    def knn_shapley(u: Utility, *, progress: bool = True) -> ValuationResult:\n\"\"\"Computes exact Shapley values for a KNN classifier.\n    This implements the method described in (Jia, R. et al., 2019)<sup><a href=\"#jia_efficient_2019a\">1</a></sup>.\n    It exploits the local structure of K-Nearest Neighbours to reduce the number\n    of calls to the utility function to a constant number per index, thus\n    reducing computation time to $O(n)$.\n    Args:\n        u: Utility with a KNN model to extract parameters from. The object\n            will not be modified nor used other than to call [get_params()](\n            <https://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html#sklearn.base.BaseEstimator.get_params>)\n        progress: Whether to display a progress bar.\n    Returns:\n        Object with the data values.\n    Raises:\n        TypeError: If the model in the utility is not a\n            [sklearn.neighbors.KNeighborsClassifier][].\n    !!! tip \"New in version 0.1.0\"\n    \"\"\"\nif not isinstance(u.model, KNeighborsClassifier):\nraise TypeError(\"KNN Shapley requires a K-Nearest Neighbours model\")\ndefaults: Dict[str, Union[int, str]] = {\n\"algorithm\": \"ball_tree\" if u.data.dim >= 20 else \"kd_tree\",\n\"metric\": \"minkowski\",\n\"p\": 2,\n}\ndefaults.update(u.model.get_params())\n# HACK: NearestNeighbors doesn't support this. There will be more...\ndel defaults[\"weights\"]\nn_neighbors: int = int(defaults[\"n_neighbors\"])\ndefaults[\"n_neighbors\"] = len(u.data)  # We want all training points sorted\nassert n_neighbors < len(u.data)\n# assert data.target_dim == 1\nnns = NearestNeighbors(**defaults).fit(u.data.x_train)\n# closest to farthest\n_, indices = nns.kneighbors(u.data.x_test)\nvalues: NDArray[np.float_] = np.zeros_like(u.data.indices, dtype=np.float_)\nn = len(u.data)\nyt = u.data.y_train\niterator = enumerate(zip(u.data.y_test, indices), start=1)\nfor j, (y, ii) in maybe_progress(iterator, progress):\nvalue_at_x = int(yt[ii[-1]] == y) / n\nvalues[ii[-1]] += (value_at_x - values[ii[-1]]) / j\nfor i in range(n - 2, n_neighbors, -1):  # farthest to closest\nvalue_at_x = (\nvalues[ii[i + 1]] + (int(yt[ii[i]] == y) - int(yt[ii[i + 1]] == y)) / i\n)\nvalues[ii[i]] += (value_at_x - values[ii[i]]) / j\nfor i in range(n_neighbors, -1, -1):  # farthest to closest\nvalue_at_x = (\nvalues[ii[i + 1]]\n+ (int(yt[ii[i]] == y) - int(yt[ii[i + 1]] == y)) / n_neighbors\n)\nvalues[ii[i]] += (value_at_x - values[ii[i]]) / j\nreturn ValuationResult(\nalgorithm=\"knn_shapley\",\nstatus=Status.Converged,\nvalues=values,\ndata_names=u.data.data_names,\n)\n
    "},{"location":"api/pydvl/value/shapley/montecarlo/","title":"Montecarlo","text":"

    Monte Carlo approximations to Shapley Data values.

    Warning

    You probably want to use the common interface provided by compute_shapley_values() instead of directly using the functions in this module.

    Because exact computation of Shapley values requires \\(\\mathcal{O}(2^n)\\) re-trainings of the model, several Monte Carlo approximations are available. The first two sample from the powerset of the training data directly: combinatorial_montecarlo_shapley() and owen_sampling_shapley(). The latter uses a reformulation in terms of a continuous extension of the utility.

    Alternatively, employing another reformulation of the expression above as a sum over permutations, one has the implementation in permutation_montecarlo_shapley(), or using an early stopping strategy to reduce computation truncated_montecarlo_shapley().

    Also see

    It is also possible to use group_testing_shapley() to reduce the number of evaluations of the utility. The method is however typically outperformed by others in this module.

    Also see

    Additionally, you can consider grouping your data points using GroupedDataset and computing the values of the groups instead. This is not to be confused with \"group testing\" as implemented in group_testing_shapley(): any of the algorithms mentioned above, including Group Testing, can work to valuate groups of samples as units.

    "},{"location":"api/pydvl/value/shapley/montecarlo/#pydvl.value.shapley.montecarlo--references","title":"References","text":"
    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning. In: Proceedings of the 36th International Conference on Machine Learning, PMLR, pp. 2242\u20132251.\u00a0\u21a9

    "},{"location":"api/pydvl/value/shapley/montecarlo/#pydvl.value.shapley.montecarlo.permutation_montecarlo_shapley","title":"permutation_montecarlo_shapley(u, done, *, truncation=NoTruncation(), n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Computes an approximate Shapley value by sampling independent permutations of the index set, approximating the sum:

    \\[ v_u(x_i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)} \\tilde{w}( | \\sigma_{:i} | )[u(\\sigma_{:i} \\cup \\{i\\}) \u2212 u(\\sigma_{:i})], \\]

    where \\(\\sigma_{:i}\\) denotes the set of indices in permutation sigma before the position where \\(i\\) appears (see [[data-valuation]] for details).

    This implements the method described in (Ghorbani and Zou, 2019)1 with a double stopping criterion.

    .. todo:: Think of how to add Robin-Gelman or some other more principled stopping criterion.

    Instead of naively implementing the expectation, we sequentially add points to coalitions from a permutation and incrementally compute marginal utilities. We stop computing marginals for a given permutation based on a TruncationPolicy. (Ghorbani and Zou, 2019)1 mention two policies: one that stops after a certain fraction of marginals are computed, implemented in FixedTruncation, and one that stops if the last computed utility (\"score\") is close to the total utility using the standard deviation of the utility as a measure of proximity, implemented in BootstrapTruncation.

    We keep sampling permutations and updating all shapley values until the StoppingCriterion returns True.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    done

    function checking whether computation must stop.

    TYPE: StoppingCriterion

    truncation

    An optional callable which decides whether to interrupt processing a permutation and set all subsequent marginals to zero. Typically used to stop computation when the marginal is small.

    TYPE: TruncationPolicy DEFAULT: NoTruncation()

    n_jobs

    number of jobs across which to distribute the computation.

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: False

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Seed DEFAULT: None

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    Source code in src/pydvl/value/shapley/montecarlo.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.8.0\",\nargs_mapping=dict(\ncoordinator_update_period=None, worker_update_period=None, progress=None\n),\n)\ndef permutation_montecarlo_shapley(\nu: Utility,\ndone: StoppingCriterion,\n*,\ntruncation: TruncationPolicy = NoTruncation(),\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Seed = None,\n) -> ValuationResult:\nr\"\"\"Computes an approximate Shapley value by sampling independent\n    permutations of the index set, approximating the sum:\n    $$\n    v_u(x_i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)}\n    \\tilde{w}( | \\sigma_{:i} | )[u(\\sigma_{:i} \\cup \\{i\\}) \u2212 u(\\sigma_{:i})],\n    $$\n    where $\\sigma_{:i}$ denotes the set of indices in permutation sigma before\n    the position where $i$ appears (see [[data-valuation]] for details).\n    This implements the method described in (Ghorbani and Zou, 2019)<sup><a href=\"#ghorbani_data_2019\">1</a></sup>\n    with a double stopping criterion.\n    .. todo::\n       Think of how to add Robin-Gelman or some other more principled stopping\n       criterion.\n    Instead of naively implementing the expectation, we sequentially add points\n    to coalitions from a permutation and incrementally compute marginal utilities.\n    We stop computing marginals for a given permutation based on a\n    [TruncationPolicy][pydvl.value.shapley.truncated.TruncationPolicy].\n    (Ghorbani and Zou, 2019)<sup><a href=\"#ghorbani_data_2019\">1</a></sup>\n    mention two policies: one that stops after a certain\n    fraction of marginals are computed, implemented in\n    [FixedTruncation][pydvl.value.shapley.truncated.FixedTruncation],\n    and one that stops if the last computed utility (\"score\") is close to the\n    total utility using the standard deviation of the utility as a measure of\n    proximity, implemented in\n    [BootstrapTruncation][pydvl.value.shapley.truncated.BootstrapTruncation].\n    We keep sampling permutations and updating all shapley values\n    until the [StoppingCriterion][pydvl.value.stopping.StoppingCriterion] returns\n    `True`.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        done: function checking whether computation must stop.\n        truncation: An optional callable which decides whether to interrupt\n            processing a permutation and set all subsequent marginals to\n            zero. Typically used to stop computation when the marginal is small.\n        n_jobs: number of jobs across which to distribute the computation.\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        progress: Whether to display a progress bar.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        Object with the data values.\n    \"\"\"\nalgorithm = \"permutation_montecarlo_shapley\"\nparallel_backend = init_parallel_backend(config)\nu = parallel_backend.put(u)\nmax_workers = effective_n_jobs(n_jobs, config)\nn_submitted_jobs = 2 * max_workers  # number of jobs in the executor's queue\nseed_sequence = ensure_seed_sequence(seed)\nresult = ValuationResult.zeros(\nalgorithm=algorithm, indices=u.data.indices, data_names=u.data.data_names\n)\npbar = tqdm(disable=not progress, total=100, unit=\"%\")\nwith init_executor(\nmax_workers=max_workers, config=config, cancel_futures=CancellationPolicy.ALL\n) as executor:\npending: set[Future] = set()\nwhile True:\npbar.n = 100 * done.completion()\npbar.refresh()\ncompleted, pending = wait(\npending, timeout=config.wait_timeout, return_when=FIRST_COMPLETED\n)\nfor future in completed:\nresult += future.result()\n# we could check outside the loop, but that means more\n# submissions if the stopping criterion is unstable\nif done(result):\nreturn result\n# Ensure that we always have n_submitted_jobs in the queue or running\nn_remaining_slots = n_submitted_jobs - len(pending)\nseeds = seed_sequence.spawn(n_remaining_slots)\nfor i in range(n_remaining_slots):\nfuture = executor.submit(\n_permutation_montecarlo_one_step,\nu,\ntruncation,\nalgorithm,\nseed=seeds[i],\n)\npending.add(future)\n
    "},{"location":"api/pydvl/value/shapley/montecarlo/#pydvl.value.shapley.montecarlo.combinatorial_montecarlo_shapley","title":"combinatorial_montecarlo_shapley(u, done, *, n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Computes an approximate Shapley value using the combinatorial definition:

    \\[v_u(i) = \\frac{1}{n} \\sum_{S \\subseteq N \\setminus \\{i\\}} \\binom{n-1}{ | S | }^{-1} [u(S \\cup \\{i\\}) \u2212 u(S)]\\]

    This consists of randomly sampling subsets of the power set of the training indices in u.data, and computing their marginal utilities. See Data valuation for details.

    Note that because sampling is done with replacement, the approximation is poor even for \\(2^{m}\\) subsets with \\(m>n\\), even though there are \\(2^{n-1}\\) subsets for each \\(i\\). Prefer permutation_montecarlo_shapley().

    Parallelization is done by splitting the set of indices across processes and computing the sum over subsets \\(S \\subseteq N \\setminus \\{i\\}\\) separately.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    done

    Stopping criterion for the computation.

    TYPE: StoppingCriterion

    n_jobs

    number of parallel jobs across which to distribute the computation. Each worker receives a chunk of indices

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display progress bars for each job.

    TYPE: bool DEFAULT: False

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    Source code in src/pydvl/value/shapley/montecarlo.py
    def combinatorial_montecarlo_shapley(\nu: Utility,\ndone: StoppingCriterion,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None,\n) -> ValuationResult:\nr\"\"\"Computes an approximate Shapley value using the combinatorial\n    definition:\n    $$v_u(i) = \\frac{1}{n} \\sum_{S \\subseteq N \\setminus \\{i\\}}\n    \\binom{n-1}{ | S | }^{-1} [u(S \\cup \\{i\\}) \u2212 u(S)]$$\n    This consists of randomly sampling subsets of the power set of the training\n    indices in [u.data][pydvl.utils.utility.Utility], and computing their\n    marginal utilities. See [Data valuation][computing-data-values] for details.\n    Note that because sampling is done with replacement, the approximation is\n    poor even for $2^{m}$ subsets with $m>n$, even though there are $2^{n-1}$\n    subsets for each $i$. Prefer\n    [permutation_montecarlo_shapley()][pydvl.value.shapley.montecarlo.permutation_montecarlo_shapley].\n    Parallelization is done by splitting the set of indices across processes and\n    computing the sum over subsets $S \\subseteq N \\setminus \\{i\\}$ separately.\n    Args:\n        u: Utility object with model, data, and scoring function\n        done: Stopping criterion for the computation.\n        n_jobs: number of parallel jobs across which to distribute the\n            computation. Each worker receives a chunk of\n            [indices][pydvl.utils.dataset.Dataset.indices]\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        progress: Whether to display progress bars for each job.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        Object with the data values.\n    \"\"\"\nmap_reduce_job: MapReduceJob[NDArray, ValuationResult] = MapReduceJob(\nu.data.indices,\nmap_func=_combinatorial_montecarlo_shapley,\nreduce_func=lambda results: reduce(operator.add, results),\nmap_kwargs=dict(u=u, done=done, progress=progress),\nn_jobs=n_jobs,\nconfig=config,\n)\nreturn map_reduce_job(seed=seed)\n
    "},{"location":"api/pydvl/value/shapley/naive/","title":"Naive","text":""},{"location":"api/pydvl/value/shapley/naive/#pydvl.value.shapley.naive.permutation_exact_shapley","title":"permutation_exact_shapley(u, *, progress=True)","text":"

    Computes the exact Shapley value using the formulation with permutations:

    \\[v_u(x_i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)} [u(\\sigma_{i-1} \\cup {i}) \u2212 u(\\sigma_{i})].\\]

    See Data valuation for details.

    When the length of the training set is > 10 this prints a warning since the computation becomes too expensive. Used mostly for internal testing and simple use cases. Please refer to the Monte Carlo approximations for practical applications.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    progress

    Whether to display progress bars for each job.

    TYPE: bool DEFAULT: True

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    Source code in src/pydvl/value/shapley/naive.py
    def permutation_exact_shapley(u: Utility, *, progress: bool = True) -> ValuationResult:\nr\"\"\"Computes the exact Shapley value using the formulation with permutations:\n    $$v_u(x_i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)} [u(\\sigma_{i-1} \\cup {i}) \u2212 u(\\sigma_{i})].$$\n    See [Data valuation][computing-data-values] for details.\n    When the length of the training set is > 10 this prints a warning since the\n    computation becomes too expensive. Used mostly for internal testing and\n    simple use cases. Please refer to the [Monte Carlo\n    approximations][pydvl.value.shapley.montecarlo] for practical applications.\n    Args:\n        u: Utility object with model, data, and scoring function\n        progress: Whether to display progress bars for each job.\n    Returns:\n        Object with the data values.\n    \"\"\"\nn = len(u.data)\n# Note that the cache in utility saves most of the refitting because we\n# use frozenset for the input.\nif n > 10:\nwarnings.warn(\nf\"Large dataset! Computation requires {n}! calls to utility()\",\nRuntimeWarning,\n)\nvalues = np.zeros(n)\nfor p in maybe_progress(\npermutations(u.data.indices),\nprogress,\ndesc=\"Permutation\",\ntotal=math.factorial(n),\n):\nfor i, idx in enumerate(p):\nvalues[idx] += u(p[: i + 1]) - u(p[:i])\nvalues /= math.factorial(n)\nreturn ValuationResult(\nalgorithm=\"permutation_exact_shapley\",\nstatus=Status.Converged,\nvalues=values,\ndata_names=u.data.data_names,\n)\n
    "},{"location":"api/pydvl/value/shapley/naive/#pydvl.value.shapley.naive.combinatorial_exact_shapley","title":"combinatorial_exact_shapley(u, *, n_jobs=1, config=ParallelConfig(), progress=False)","text":"

    Computes the exact Shapley value using the combinatorial definition.

    \\[v_u(i) = \\frac{1}{n} \\sum_{S \\subseteq N \\setminus \\{i\\}} \\binom{n-1}{ | S | }^{-1} [u(S \\cup \\{i\\}) \u2212 u(S)].\\]

    See Data valuation for details.

    Note

    If the length of the training set is > n_jobs*20 this prints a warning because the computation is very expensive. Used mostly for internal testing and simple use cases. Please refer to the Monte Carlo approximations for practical applications.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    n_jobs

    Number of parallel jobs to use

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display progress bars for each job.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    Source code in src/pydvl/value/shapley/naive.py
    def combinatorial_exact_shapley(\nu: Utility,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\n) -> ValuationResult:\nr\"\"\"Computes the exact Shapley value using the combinatorial definition.\n    $$v_u(i) = \\frac{1}{n} \\sum_{S \\subseteq N \\setminus \\{i\\}} \\binom{n-1}{ | S | }^{-1} [u(S \\cup \\{i\\}) \u2212 u(S)].$$\n    See [Data valuation][computing-data-values] for details.\n    !!! Note\n        If the length of the training set is > n_jobs*20 this prints a warning\n        because the computation is very expensive. Used mostly for internal testing\n        and simple use cases. Please refer to the\n        [Monte Carlo][pydvl.value.shapley.montecarlo] approximations for practical\n        applications.\n    Args:\n        u: Utility object with model, data, and scoring function\n        n_jobs: Number of parallel jobs to use\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        progress: Whether to display progress bars for each job.\n    Returns:\n        Object with the data values.\n    \"\"\"\n# Arbitrary choice, will depend on time required, caching, etc.\nif len(u.data) // n_jobs > 20:\nwarnings.warn(\nf\"Large dataset! Computation requires 2^{len(u.data)} calls to model.fit()\"\n)\ndef reduce_fun(results: List[NDArray]) -> NDArray:\nreturn np.array(results).sum(axis=0)  # type: ignore\nmap_reduce_job: MapReduceJob[NDArray, NDArray] = MapReduceJob(\nu.data.indices,\nmap_func=_combinatorial_exact_shapley,\nmap_kwargs=dict(u=u, progress=progress),\nreduce_func=reduce_fun,\nn_jobs=n_jobs,\nconfig=config,\n)\nvalues = map_reduce_job()\nreturn ValuationResult(\nalgorithm=\"combinatorial_exact_shapley\",\nstatus=Status.Converged,\nvalues=values,\ndata_names=u.data.data_names,\n)\n
    "},{"location":"api/pydvl/value/shapley/owen/","title":"Owen","text":""},{"location":"api/pydvl/value/shapley/owen/#pydvl.value.shapley.owen--references","title":"References","text":"
    1. Okhrati, R., Lipani, A., 2021. A Multilinear Sampling Algorithm to Estimate Shapley Values. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 7992\u20137999. IEEE.\u00a0\u21a9

    "},{"location":"api/pydvl/value/shapley/owen/#pydvl.value.shapley.owen.owen_sampling_shapley","title":"owen_sampling_shapley(u, n_samples, max_q, *, method=OwenAlgorithm.Standard, n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Owen sampling of Shapley values as described in (Okhrati and Lipani, 2021)1.

    This function computes a Monte Carlo approximation to

    \\[v_u(i) = \\int_0^1 \\mathbb{E}_{S \\sim P_q(D_{\\backslash \\{i\\}})} [u(S \\cup \\{i\\}) - u(S)]\\]

    using one of two methods. The first one, selected with the argument mode = OwenAlgorithm.Standard, approximates the integral with:

    \\[\\hat{v}_u(i) = \\frac{1}{Q M} \\sum_{j=0}^Q \\sum_{m=1}^M [u(S^{(q_j)}_m \\cup \\{i\\}) - u(S^{(q_j)}_m)],\\]

    where \\(q_j = \\frac{j}{Q} \\in [0,1]\\) and the sets \\(S^{(q_j)}\\) are such that a sample \\(x \\in S^{(q_j)}\\) if a draw from a \\(Ber(q_j)\\) distribution is 1.

    The second method, selected with the argument mode = OwenAlgorithm.Antithetic, uses correlated samples in the inner sum to reduce the variance:

    \\[\\hat{v}_u(i) = \\frac{1}{2 Q M} \\sum_{j=0}^Q \\sum_{m=1}^M [u(S^{(q_j)}_m \\cup \\{i\\}) - u(S^{(q_j)}_m) + u((S^{(q_j)}_m)^c \\cup \\{i\\}) - u((S^{( q_j)}_m)^c)],\\]

    where now \\(q_j = \\frac{j}{2Q} \\in [0,\\frac{1}{2}]\\), and \\(S^c\\) is the complement of \\(S\\).

    Note

    The outer integration could be done instead with a quadrature rule.

    PARAMETER DESCRIPTION u

    Utility object holding data, model and scoring function.

    TYPE: Utility

    n_samples

    Numer of sets to sample for each value of q

    TYPE: int

    max_q

    Number of subdivisions for q \u2208 [0,1] (the element sampling probability) used to approximate the outer integral.

    TYPE: int

    method

    Selects the algorithm to use, see the description. Either OwenAlgorithm.Full for \\(q \\in [0,1]\\) or OwenAlgorithm.Halved for \\(q \\in [0,0.5]\\) and correlated samples

    TYPE: OwenAlgorithm DEFAULT: Standard

    n_jobs

    Number of parallel jobs to use. Each worker receives a chunk of the total of max_q values for q.

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display progress bars for each job.

    TYPE: bool DEFAULT: False

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    New in version 0.3.0

    Changed in version 0.5.0

    Support for parallel computation and enable antithetic sampling.

    Source code in src/pydvl/value/shapley/owen.py
    def owen_sampling_shapley(\nu: Utility,\nn_samples: int,\nmax_q: int,\n*,\nmethod: OwenAlgorithm = OwenAlgorithm.Standard,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None\n) -> ValuationResult:\nr\"\"\"Owen sampling of Shapley values as described in\n    (Okhrati and Lipani, 2021)<sup><a href=\"#okhrati_multilinear_2021\">1</a></sup>.\n    This function computes a Monte Carlo approximation to\n    $$v_u(i) = \\int_0^1 \\mathbb{E}_{S \\sim P_q(D_{\\backslash \\{i\\}})}\n    [u(S \\cup \\{i\\}) - u(S)]$$\n    using one of two methods. The first one, selected with the argument ``mode =\n    OwenAlgorithm.Standard``, approximates the integral with:\n    $$\\hat{v}_u(i) = \\frac{1}{Q M} \\sum_{j=0}^Q \\sum_{m=1}^M [u(S^{(q_j)}_m\n    \\cup \\{i\\}) - u(S^{(q_j)}_m)],$$\n    where $q_j = \\frac{j}{Q} \\in [0,1]$ and the sets $S^{(q_j)}$ are such that a\n    sample $x \\in S^{(q_j)}$ if a draw from a $Ber(q_j)$ distribution is 1.\n    The second method, selected with the argument ``mode =\n    OwenAlgorithm.Antithetic``, uses correlated samples in the inner sum to\n    reduce the variance:\n    $$\\hat{v}_u(i) = \\frac{1}{2 Q M} \\sum_{j=0}^Q \\sum_{m=1}^M [u(S^{(q_j)}_m\n    \\cup \\{i\\}) - u(S^{(q_j)}_m) + u((S^{(q_j)}_m)^c \\cup \\{i\\}) - u((S^{(\n    q_j)}_m)^c)],$$\n    where now $q_j = \\frac{j}{2Q} \\in [0,\\frac{1}{2}]$, and $S^c$ is the\n    complement of $S$.\n    !!! Note\n        The outer integration could be done instead with a quadrature rule.\n    Args:\n        u: [Utility][pydvl.utils.utility.Utility] object holding data, model\n            and scoring function.\n        n_samples: Numer of sets to sample for each value of q\n        max_q: Number of subdivisions for q \u2208 [0,1] (the element sampling\n            probability) used to approximate the outer integral.\n        method: Selects the algorithm to use, see the description. Either\n            [OwenAlgorithm.Full][pydvl.value.shapley.owen.OwenAlgorithm] for\n            $q \\in [0,1]$ or\n            [OwenAlgorithm.Halved][pydvl.value.shapley.owen.OwenAlgorithm] for\n            $q \\in [0,0.5]$ and correlated samples\n        n_jobs: Number of parallel jobs to use. Each worker receives a chunk\n            of the total of `max_q` values for q.\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        progress: Whether to display progress bars for each job.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        Object with the data values.\n    !!! tip \"New in version 0.3.0\"\n    !!! tip \"Changed in version 0.5.0\"\n        Support for parallel computation and enable antithetic sampling.\n    \"\"\"\nmap_reduce_job: MapReduceJob[NDArray, ValuationResult] = MapReduceJob(\nu.data.indices,\nmap_func=_owen_sampling_shapley,\nreduce_func=lambda results: reduce(operator.add, results),\nmap_kwargs=dict(\nu=u,\nmethod=OwenAlgorithm(method),\nn_samples=n_samples,\nmax_q=max_q,\nprogress=progress,\n),\nn_jobs=n_jobs,\nconfig=config,\n)\nreturn map_reduce_job(seed=seed)\n
    "},{"location":"api/pydvl/value/shapley/truncated/","title":"Truncated","text":""},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated--references","title":"References","text":"
    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning. In: Proceedings of the 36th International Conference on Machine Learning, PMLR, pp. 2242\u20132251.\u00a0\u21a9

    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.TruncationPolicy","title":"TruncationPolicy()","text":"

    Bases: ABC

    A policy for deciding whether to stop computing marginals in a permutation.

    Statistics are kept on the number of calls and truncations as n_calls and n_truncations respectively.

    ATTRIBUTE DESCRIPTION n_calls

    Number of calls to the policy.

    TYPE: int

    n_truncations

    Number of truncations made by the policy.

    TYPE: int

    Todo

    Because the policy objects are copied to the workers, the statistics are not accessible from the coordinating process. We need to add methods for this.

    Source code in src/pydvl/value/shapley/truncated.py
    def __init__(self):\nself.n_calls: int = 0\nself.n_truncations: int = 0\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.TruncationPolicy.reset","title":"reset() abstractmethod","text":"

    Reset the policy to a state ready for a new permutation.

    Source code in src/pydvl/value/shapley/truncated.py
    @abc.abstractmethod\ndef reset(self):\n\"\"\"Reset the policy to a state ready for a new permutation.\"\"\"\n...\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.TruncationPolicy.__call__","title":"__call__(idx, score)","text":"

    Check whether the computation should be interrupted.

    PARAMETER DESCRIPTION idx

    Position in the permutation currently being computed.

    TYPE: int

    score

    Last utility computed.

    TYPE: float

    RETURNS DESCRIPTION bool

    True if the computation should be interrupted.

    Source code in src/pydvl/value/shapley/truncated.py
    def __call__(self, idx: int, score: float) -> bool:\n\"\"\"Check whether the computation should be interrupted.\n    Args:\n        idx: Position in the permutation currently being computed.\n        score: Last utility computed.\n    Returns:\n        `True` if the computation should be interrupted.\n    \"\"\"\nret = self._check(idx, score)\nself.n_calls += 1\nself.n_truncations += 1 if ret else 0\nreturn ret\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.NoTruncation","title":"NoTruncation","text":"

    Bases: TruncationPolicy

    A policy which never interrupts the computation.

    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.FixedTruncation","title":"FixedTruncation(u, fraction)","text":"

    Bases: TruncationPolicy

    Break a permutation after computing a fixed number of marginals.

    The experiments in Appendix B of (Ghorbani and Zou, 2019)1 show that when the training set size is large enough, one can simply truncate the iteration over permutations after a fixed number of steps. This happens because beyond a certain number of samples in a training set, the model becomes insensitive to new ones. Alas, this strongly depends on the data distribution and the model and there is no automatic way of estimating this number.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    fraction

    Fraction of marginals in a permutation to compute before stopping (e.g. 0.5 to compute half of the marginals).

    TYPE: float

    Source code in src/pydvl/value/shapley/truncated.py
    def __init__(self, u: Utility, fraction: float):\nsuper().__init__()\nif fraction <= 0 or fraction > 1:\nraise ValueError(\"fraction must be in (0, 1]\")\nself.max_marginals = len(u.data) * fraction\nself.count = 0\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.RelativeTruncation","title":"RelativeTruncation(u, rtol)","text":"

    Bases: TruncationPolicy

    Break a permutation if the marginal utility is too low.

    This is called \"performance tolerance\" in (Ghorbani and Zou, 2019)1.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    rtol

    Relative tolerance. The permutation is broken if the last computed utility is less than total_utility * rtol.

    TYPE: float

    Source code in src/pydvl/value/shapley/truncated.py
    def __init__(self, u: Utility, rtol: float):\nsuper().__init__()\nself.rtol = rtol\nlogger.info(\"Computing total utility for permutation truncation.\")\nself.total_utility = u(u.data.indices)\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.BootstrapTruncation","title":"BootstrapTruncation(u, n_samples, sigmas=1)","text":"

    Bases: TruncationPolicy

    Break a permutation if the last computed utility is close to the total utility, measured as a multiple of the standard deviation of the utilities.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    n_samples

    Number of bootstrap samples to use to compute the variance of the utilities.

    TYPE: int

    sigmas

    Number of standard deviations to use as a threshold.

    TYPE: float DEFAULT: 1

    Source code in src/pydvl/value/shapley/truncated.py
    def __init__(self, u: Utility, n_samples: int, sigmas: float = 1):\nsuper().__init__()\nself.n_samples = n_samples\nlogger.info(\"Computing total utility for permutation truncation.\")\nself.total_utility = u(u.data.indices)\nself.count: int = 0\nself.variance: float = 0\nself.mean: float = 0\nself.sigmas: float = sigmas\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.truncated_montecarlo_shapley","title":"truncated_montecarlo_shapley(u, *, done, truncation, config=ParallelConfig(), n_jobs=1, coordinator_update_period=10, worker_update_period=5)","text":"

    Warning

    This method is deprecated and only a wrapper for permutation_montecarlo_shapley.

    Todo

    Think of how to add Robin-Gelman or some other more principled stopping criterion.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    done

    Check on the results which decides when to stop sampling permutations.

    TYPE: StoppingCriterion

    truncation

    callable that decides whether to stop computing marginals for a given permutation.

    TYPE: TruncationPolicy

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    n_jobs

    Number of permutation monte carlo jobs to run concurrently.

    TYPE: int DEFAULT: 1

    Returns: Object with the data values.

    Source code in src/pydvl/value/shapley/truncated.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.8.0\",\nargs_mapping=dict(coordinator_update_period=None, worker_update_period=None),\n)\ndef truncated_montecarlo_shapley(\nu: Utility,\n*,\ndone: StoppingCriterion,\ntruncation: TruncationPolicy,\nconfig: ParallelConfig = ParallelConfig(),\nn_jobs: int = 1,\ncoordinator_update_period: int = 10,\nworker_update_period: int = 5,\n) -> ValuationResult:\n\"\"\"\n    !!! Warning\n        This method is deprecated and only a wrapper for\n        [permutation_montecarlo_shapley][pydvl.value.shapley.montecarlo.permutation_montecarlo_shapley].\n    !!! Todo\n        Think of how to add Robin-Gelman or some other more principled stopping\n        criterion.\n    Args:\n        u: Utility object with model, data, and scoring function\n        done: Check on the results which decides when to stop sampling\n            permutations.\n        truncation: callable that decides whether to stop computing marginals\n            for a given permutation.\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        n_jobs: Number of permutation monte carlo jobs to run concurrently.\n    Returns:\n        Object with the data values.\n    \"\"\"\nfrom pydvl.value.shapley.montecarlo import permutation_montecarlo_shapley\nreturn cast(\nValuationResult,\npermutation_montecarlo_shapley(\nu, done=done, truncation=truncation, config=config, n_jobs=n_jobs\n),\n)\n
    "},{"location":"api/pydvl/value/shapley/types/","title":"Types","text":""},{"location":"api/pydvl/value/shapley/types/#pydvl.value.shapley.types.ShapleyMode","title":"ShapleyMode","text":"

    Bases: str, Enum

    Supported algorithms for the computation of Shapley values.

    Todo

    Make algorithms register themselves here.

    "},{"location":"examples/data_oob/","title":"Data OOB","text":"

    This notebook introduces the Data-OOB method, an implementation based on a publication from Kwon and Zou \"Data-OOB: Out-of-bag Estimate as a Simple and Efficient Data Value\" ICML 2023 , using pyDVL.

    The objective of this paper is mainly to overcome the computational bottleneck of shapley-based data valuation methods that require to fit a significant number of models to accurately estimate marginal contributions. The algorithms computes data values from out of bag estimates using a bagging model.

    The value can be interpreted as a partition of the OOB estimate, which is originally introduced to estimate the prediction error. This OOB estimate is given as:

    \\[ \\sum_{i=1}^n\\frac{\\sum_{b=1}^{B}\\mathbb{1}(w_{bi}=0)T(y_i, \\hat{f}_b(x_i))}{\\sum_{b=1}^{B} \\mathbb{1} (w_{bi}=0)} \\]
    \n   age  fnlwgt  education-num  capital-gain  capital-loss  hours-per-week  \\\n0   39   77516             13          2174             0              40   \n1   50   83311             13             0             0              13   \n2   38  215646              9             0             0              40   \n3   53  234721              7             0             0              40   \n4   28  338409             13             0             0              40   \n\n  income  \n0  <=50K  \n1  <=50K  \n2  <=50K  \n3  <=50K  \n4  <=50K  \n\n
    oob_values = compute_data_oob(utility, n_est=1000, max_samples=0.95)\n
    "},{"location":"examples/data_oob/#bagging-for-data-valuation","title":"Bagging for data valuation","text":""},{"location":"examples/data_oob/#setup","title":"Setup","text":"

    We begin by importing the main libraries and setting some defaults.

    If you are reading this in the documentation, some boilerplate (including most plotting code) has been omitted for convenience."},{"location":"examples/data_oob/#variance","title":"Variance","text":"

    The variance it the weak learner variance. It is computed with Welford's online algorithm.

    "},{"location":"examples/data_oob/#point-removal-experiments","title":"Point removal experiments","text":"

    The standard procedure for the evaluation of data valuation schemes is the point removal experiment. The objective is to measure the evolution of performance when the best/worst points are removed from the training set.

    "},{"location":"examples/influence_imagenet/","title":"For CNNs","text":"If you are reading this in the documentation, some boilerplate has been omitted for convenience.
    %load_ext autoreload\n
    %autoreload\n%matplotlib inline\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport os\nimport pandas as pd\nimport torch\nfrom torch import nn\nfrom notebook_support import (\nplot_sample_images,\nplot_lowest_highest_influence_images,\nplot_losses,\ncorrupt_imagenet,\nload_preprocess_imagenet,\nplot_corrupted_influences_distribution,\ncompute_mean_corrupted_influences,\nTrainingManager,\nMODEL_PATH,\nnew_resnet_model,\n)\ndefault_figsize = (7, 7)\nplt.rcParams[\"figure.figsize\"] = default_figsize\nplt.rcParams[\"font.size\"] = 12\nplt.rcParams[\"xtick.labelsize\"] = 12\nplt.rcParams[\"ytick.labelsize\"] = 10\nhessian_reg = 1e4 if os.environ.get(\"CI\") else 1e-3\nrandom_state = 42\nnp.random.seed(random_state)\nDEVICE = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n
    from pydvl.influence.general import compute_influences\nfrom pydvl.reporting.plots import plot_influence_distribution_by_label\nfrom sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score\n
    label_names = {90: \"tables\", 100: \"boats\"}\ntrain_ds, val_ds, test_ds = load_preprocess_imagenet(\ntrain_size=0.8,\ntest_size=0.1,\nkeep_labels=label_names,\ndownsampling_ratio=1,\n)\nprint(\"Normalised image dtype:\", train_ds[\"normalized_images\"][0].dtype)\nprint(\"Label type:\", type(train_ds[\"labels\"][0]))\nprint(\"Image type:\", type(train_ds[\"images\"][0]))\ntrain_ds.info()\n
    \nNormalised image dtype: torch.float32\nLabel type: <class 'str'>\nImage type: <class 'PIL.JpegImagePlugin.JpegImageFile'>\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 707 entries, 0 to 706\nData columns (total 3 columns):\n #   Column             Non-Null Count  Dtype \n---  ------             --------------  ----- \n 0   normalized_images  707 non-null    object\n 1   labels             707 non-null    object\n 2   images             707 non-null    object\ndtypes: object(3)\nmemory usage: 16.7+ KB\n\n

    Let's take a closer look at a few image samples

    plot_sample_images(train_ds, n_images_per_class=3)\n

    Let's now further pre-process the data and prepare for model training. The helper function process_io converts the normalized images into tensors and the labels to the indices 0 and 1 to train the classifier.

    from typing import Tuple\ndef process_io(df: pd.DataFrame, labels: dict) -&gt; Tuple[torch.Tensor, torch.Tensor]:\nx = df[\"normalized_images\"]\ny = df[\"labels\"]\nds_label_to_model_label = {\nds_label: idx for idx, ds_label in enumerate(labels.values())\n}\nx_nn = torch.stack(x.tolist()).to(DEVICE)\ny_nn = torch.tensor([ds_label_to_model_label[yi] for yi in y], device=DEVICE)\nreturn x_nn, y_nn\ntrain_x, train_y = process_io(train_ds, label_names)\nval_x, val_y = process_io(val_ds, label_names)\ntest_x, test_y = process_io(test_ds, label_names)\n
    model_ft = new_resnet_model(output_size=len(label_names))\nmgr = TrainingManager(\n\"model_ft\",\nmodel_ft,\nnn.CrossEntropyLoss(),\ntrain_x,\ntrain_y,\nval_x,\nval_y,\nMODEL_PATH,\n)\n# Set use_cache=False to retrain the model\ntrain_loss, val_loss = mgr.train(n_epochs=50, use_cache=True)\n
    \nCached model found, loading...\n\n
    plot_losses(train_loss, val_loss)\n

    The confusion matrix and \\(F_1\\) score look good, especially considering the low resolution of the images and their complexity (they contain different objects)

    pred_y_test = np.argmax(model_ft(test_x).detach(), axis=1)\nmodel_score = f1_score(test_y, pred_y_test, average=\"weighted\")\ncm = confusion_matrix(test_y, pred_y_test)\ndisp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_names.values())\nprint(\"f1_score of model:\", model_score)\ndisp.plot();\n
    \nf1_score of model: 0.8468272032336833\n\n
    influences = compute_influences(\nmodel=mgr.model,\nloss=mgr.loss,\nx=train_x,\ny=train_y,\nx_test=test_x,\ny_test=test_y,\nhessian_regularization=hessian_reg,\ninversion_method=\"cg\",\ninfluence_type=\"up\",\nprogress=True,\n)\n
    test_image_idx = 42\nmodel_label_to_ds_label = {\nidx: ds_label for idx, ds_label in enumerate(label_names.values())\n}\npredicted_label = model_label_to_ds_label[\nnp.argmax(model_ft(test_x[test_image_idx].unsqueeze(0)).detach(), axis=1).item()\n]\ntrue_label = test_ds[\"labels\"][test_image_idx]\nplt.rcParams[\"figure.figsize\"] = (3, 3)\nplt.imshow(test_ds[\"images\"][test_image_idx])\nplt.axis(\"off\")\nplt.title(f\"Predicted: {predicted_label} - True: {true_label}\")\nplt.show()\n

    Now we plot the histogram of the influence that all training images have on the image selected above, separated by their label.

    plt.rcParams[\"figure.figsize\"] = default_figsize\nplot_influence_distribution_by_label(\ninfluences[test_image_idx],\ntrain_ds[\"labels\"].values,\ntitle_extra=f\"over index {test_image_idx}\",\n)\n

    Rather unsurprisingly, the training samples that have the same label as the test image have, on average, a higher influence on the classifier's output for it. Let's then take them and visualize those with the highest and lowest influence:

    images_with_same_label = train_ds[\"labels\"] == test_ds[\"labels\"][test_image_idx]\ninfluence_values_with_same_label = influences[test_image_idx][images_with_same_label]\nimages_same_label = train_ds[\"images\"][images_with_same_label].values\nplot_lowest_highest_influence_images(\ninfluence_values_with_same_label, subset_images=images_same_label, num_to_plot=3\n)\n

    Looking at the images, it is difficult to explain why those on the right are more influential than those on the left. At first sight, the choice seems to be random (or at the very least noisy). Let's dig in a bit more by looking at average influences:

    avg_influences = np.mean(influences, axis=0)\n

    Once again, let's plot the histogram of influence values by label.

    plot_influence_distribution_by_label(\navg_influences, train_ds[\"labels\"].values, \"over all test samples\"\n)\n

    Next, for each class (you can change value by changing label key) we can have a look at the top and bottom images by average influence, i.e. we can show the images that have the highest and lowest average influence over all test images.

    label = \"tables\"\nimg_with_selected_label = train_ds[\"labels\"] == label\nif_selected_label = avg_influences[img_with_selected_label]\nimges_same_label = train_ds[\"images\"][img_with_selected_label].values\nplot_lowest_highest_influence_images(if_selected_label, imges_same_label, num_to_plot=3)\n

    Once again, it is not easy to explain why the images on the left have a lower influence than the ones on the right. One could argue that in order to predict that there is a dining table in the image it is beneficial to clearly see both the chairs and the table itself, a feature missing in some samples on the left. Also, color seems to be a discriminant: houses with a blue painting could get confused with the water around a boat. Of course, this is debatable and different people could come up with other explanations a posteriori.

    corrupted_model = new_resnet_model(output_size=len(label_names))\ncorrupted_dataset, corrupted_indices = corrupt_imagenet(\ndataset=train_ds,\nfraction_to_corrupt=0.1,\navg_influences=avg_influences,\n)\ncorrupted_train_x, corrupted_train_y = process_io(corrupted_dataset, label_names)\nmgr = TrainingManager(\n\"corrupted_model\",\ncorrupted_model,\nnn.CrossEntropyLoss(),\ncorrupted_train_x,\ncorrupted_train_y,\nval_x,\nval_y,\nMODEL_PATH,\n)\ntraining_loss, validation_loss = mgr.train(n_epochs=50, use_cache=True)\n
    \nCached model found, loading...\n\n
    plot_losses(training_loss, validation_loss)\n
    pred_y_test = np.argmax(corrupted_model(test_x).detach(), axis=1)\nmodel_score = f1_score(test_y, pred_y_test, average=\"weighted\")\nprint(\"F1 score of model with corrupted data:\", model_score)\n
    \nF1 score of model with corrupted data: 0.8164795918367347\n\n

    Interestingly, despite being trained on a corrupted dataset, the model has a fairly high \\(F_1\\) score. Let's now calculate the influence of the corrupted training data points over the test data points.

    influences = compute_influences(\nmodel=mgr.model,\nloss=mgr.loss,\nx=corrupted_train_x,\ny=corrupted_train_y,\nx_test=test_x,\ny_test=test_y,\nhessian_regularization=hessian_reg,\ninversion_method=\"cg\",\ninfluence_type=\"up\",\nprogress=True,\n)\n
    \nSplit Gradient:   0%|          | 0/98 [00:00<?, ?it/s]\n
    \nConjugate gradient:   0%|          | 0/98 [00:00<?, ?it/s]\n
    \nSplit Gradient:   0%|          | 0/707 [00:00<?, ?it/s]\n

    As before, since we are interested in the average influence on the test dataset, we take the average of influences across rows, and then plot the highest and lowest influences for a chosen label

    avg_corrupted_influences = np.mean(influences, axis=0)\n
    label = \"boats\"\nimg_with_selected_label = corrupted_dataset[\"labels\"] == label\nif_selected_label = avg_corrupted_influences[img_with_selected_label]\nimges_same_label = corrupted_dataset[\"images\"][img_with_selected_label].values\nplot_lowest_highest_influence_images(if_selected_label, imges_same_label, num_to_plot=3)\n

    As expected, the samples with lowest (negative) influence for the label \"boats\" are those that have been corrupted: all the images on the left are tables! We can compare the average influence of corrupted data with non-corrupted ones

    plot_corrupted_influences_distribution(\ncorrupted_dataset, corrupted_indices, avg_corrupted_influences\n)\n
    compute_mean_corrupted_influences(\ncorrupted_dataset, corrupted_indices, avg_corrupted_influences\n)\n
    label avg_non_corrupted_infl avg_corrupted_infl score_diff 0 boats 0.945390 -0.890972 1.836362 1 tables -1.092637 -2.757206 1.664569

    And indeed corrupted data have a more negative influence on average than clean ones!

    Despite this being a useful property, influence functions are known to be unreliable for tasks of data valuation, especially in deep learning where the fundamental assumption of the theory (convexity) is grossly violated. A lot of factors (e.g. the size of the network, the training process or the Hessian regularization term) can interfere with the computation, to the point that often the results that we obtain cannot be trusted. This has been extensively studied in the recent paper:

    Basu, S., P. Pope, and S. Feizi. Influence Functions in Deep Learning Are Fragile. International Conference on Learning Representations (ICLR). 2021.

    Nevertheless, influence functions offer a relatively quick and mathematically rigorous way to evaluate (at first order) the importance of a training point for a model's prediction.

    "},{"location":"examples/influence_imagenet/#influence-functions-for-neural-networks","title":"Influence functions for neural networks","text":"

    This notebook explores the use of influence functions for convolutional neural networks. In the first part we will investigate the usefulness, or lack thereof, of influence functions for the interpretation of a classifier's outputs.

    For our study we choose a pre-trained ResNet18, fine-tuned on the tiny-imagenet dataset. This dataset was created for a Stanford course on Deep Learning for Computer Vision, and is a subset of the famous ImageNet with 200 classes instead of 1000, and images down-sampled to a lower resolution of 64x64 pixels.

    After tuning the last layers of the network, we will use pyDVL to find the most and the least influential training images for the test set. This can sometimes be used to explain inference errors, or to direct efforts during data collection, although we will face inconclusive results with our model and data. This illustrates well-known issues of influence functions for neural networks.

    However, in the final part of the notebook we will see that influence functions are an effective tool for finding anomalous or corrupted data points.

    We conclude with an appendix with some basic theoretical concepts used.

    "},{"location":"examples/influence_imagenet/#imports-and-setup","title":"Imports and setup","text":""},{"location":"examples/influence_imagenet/#loading-and-preprocessing-the-dataset","title":"Loading and preprocessing the dataset","text":"

    We pick two classes arbitrarily to work with: 90 and 100, corresponding respectively to dining tables, and boats in Venice (you can of course select any other two classes, or more of them, although that would imply longer training times and some modifications in the notebook below). The dataset is loaded with load_preprocess_imagenet(), which returns three pandas DataFrames with training, validation and test sets respectively. Each dataframe has three columns: normalized images, labels and the original images. Note that you can load a subset of the data decreasing downsampling_ratio.

    "},{"location":"examples/influence_imagenet/#model-definition-and-training","title":"Model definition and training","text":"

    We use a ResNet18 from torchvision with final layers modified for binary classification.

    Training for influence computation is facilitated by :class:~pydvl.influence.model_wrappers.torch_wrappers.TorchModel, a convenience wrapper around torch models which is part of pyDVL. We wrap this with a simple class TrainingManager which transparently handles persistence after training. The latter is not part of the main pyDVL package but just a way to reduce clutter in this notebook.

    We train the model for 50 epochs and save the results. Then we plot the train and validation loss curves.

    "},{"location":"examples/influence_imagenet/#influence-computation","title":"Influence computation","text":"

    Let's now calculate influences! The main method is :func:~pydvl.influence.general.compute_influences, which takes a trained nn.Model, the training loss, some input dataset with labels (which typically is the training data, or a subset of it) and some test data.

    Other important parameters are the Hessian regularization term, which should be chosen as small as possible for the computation to converge (further details on why this is important can be found in the Appendix).

    Since Resnet18 is quite big, we pick conjugate gradient (cg) as the method for inversion of the Hessian. A naive computation would require a lot of memory. Finally, the influence type will be up. The other option, perturbation, is beyond the scope of this notebook, but more info can be found in the notebook using the Wine dataset or in the documentation for pyDVL.

    The output of calculate_influences is a matrix of size test_set_length x training_set_length. Each row represents a test data point, and each column a training data point, so that entry \\((i,j)\\) represents the influence of training point \\(j\\) on test point \\(i\\).

    "},{"location":"examples/influence_imagenet/#analysing-influences","title":"Analysing influences","text":"

    With the computed influences we can study single images or all of them together:

    "},{"location":"examples/influence_imagenet/#influence-on-a-single-test-image","title":"Influence on a single test image","text":"

    Let's take any image in the test set:

    "},{"location":"examples/influence_imagenet/#analysing-the-average-influence-on-test-samples","title":"Analysing the average influence on test samples","text":"

    By averaging across the rows of the influence matrix, we obtain the average influence of each training sample on the whole test set:

    "},{"location":"examples/influence_imagenet/#detecting-corrupted-data","title":"Detecting corrupted data","text":"

    After facing the shortcomings of influence functions for explaining decisions, we move to an application with clear-cut results. Influences can be successfully used to detect corrupted or mislabeled samples, making them an effective tool to \"debug\" training data.

    We begin by training a new model (with the same architecture as before) on a dataset with some corrupted labels. The method get_corrupted_imagenet will take the training dataset and corrupt a certain fraction of the labels by flipping them. We use the same number of epochs and optimizer as before.

    "},{"location":"examples/influence_imagenet/#theory-of-influence-functions-for-neural-networks","title":"Theory of influence functions for neural networks","text":"

    In this appendix we will briefly go through the basic ideas of influence functions adapted for neural networks as introduced in Koh, Pang Wei, and Percy Liang. \"Understanding Black-box Predictions via Influence Functions\" International conference on machine learning. PMLR, 2017.

    Note however that this paper departs from the standard and established theory and notation for influence functions. For a rigorous introduction to the topic we recommend classical texts like Hampel, Frank R., Elvezio M. Ronchetti, Peter J. Rousseeuw, and Werner A. Stahel. Robust Statistics: The Approach Based on Influence Functions. 1st edition. Wiley Series in Probability and Statistics. New York: Wiley-Interscience, 2005. https://doi.org/10.1002/9781118186435.

    "},{"location":"examples/influence_imagenet/#upweighting-points","title":"Upweighting points","text":"

    Let's start by considering some input space \\(\\mathcal{X}\\) to a model (e.g. images) and an output space \\(\\mathcal{Y}\\) (e.g. labels). Let's take \\(z_i = (x_i, y_i)\\) to be the \\(i\\)-th training point, and \\(\\theta\\) to be the (potentially highly) multi-dimensional parameters of the neural network (i.e. \\(\\theta\\) is a big array with very many parameters). We will indicate with \\(L(z, \\theta)\\) the loss of the model for point \\(z\\) and parameters \\(\\theta\\). When training the model we minimize the loss over all points, i.e. the optimal parameters are calculated through gradient descent on the following formula: $$ \\hat{\\theta} = \\arg \\min_\\theta \\frac{1}{n}\\sum_{i=1}^n L(z_i, \\theta) $$ where \\(n\\) is the total number of training data points.

    For notational convenience, let's define $$ \\hat{\\theta}{-z} = \\arg \\min\\theta \\frac{1}{n}\\sum_{z_i \\ne z} L(z_i, \\theta) \\ , $$ i.e. \\(\\hat{\\theta}_{-z}\\) are the model parameters that minimize the total loss when \\(z\\) is not in the training dataset.

    In order to check the impact of each training point on the model, we would need to calculate \\(\\hat{\\theta}_{-z}\\) for each \\(z\\) in the training dataset, thus re-training the model at least ~\\(n\\) times (more if model training is noisy). This is computationally very expensive, especially for big neural networks. To circumvent this problem, we can just calculate a first order approximation of \\(\\hat{\\theta}\\). This can be done through single backpropagation and without re-training the full model.

    Let's define $$ \\hat{\\theta}{\\epsilon, z} = \\arg \\min\\theta \\frac{1}{n}\\sum_{i=1}^n L(z_i, \\theta) + \\epsilon L(z, \\theta) \\ , $$ which is the optimal \\(\\hat{\\theta}\\) if we were to up-weigh \\(z\\) by an amount \\(\\epsilon\\).

    From a classical result (a simple derivation is available in Appendix A of Koh and Liang's paper), we know that: $$ \\frac{d \\ \\hat{\\theta}{\\epsilon, z}}{d \\epsilon} \\Big| = -H_{\\hat{\\theta}}^{-1} \\nabla_\\theta L(z, \\hat{\\theta}) $$ where \\(H_{\\hat{\\theta}} = \\frac{1}{n} \\sum_{i=1}^n \\nabla_\\theta^2 L(z_i, \\hat{\\theta})\\) is the Hessian of \\(L\\). Importantly, notice that this expression is only valid when \\(\\hat{\\theta}\\) is a minimum of \\(L\\), or otherwise \\(H_{\\hat{\\theta}}\\) cannot be inverted!

    "},{"location":"examples/influence_imagenet/#approximating-the-influence-of-a-point","title":"Approximating the influence of a point","text":"

    We will define the influence of training point \\(z\\) on test point \\(z_{\\text{test}}\\) as \\(\\mathcal{I}(z, z_{\\text{test}}) = L(z_{\\text{test}}, \\hat{\\theta}_{-z}) - L(z_{\\text{test}}, \\hat{\\theta})\\) (notice that it is higher for points \\(z\\) which positively impact the model score, since if they are excluded, the loss is higher). In practice, however, we will always use the infinitesimal approximation \\(\\mathcal{I}_{up}(z, z_{\\text{test}})\\), defined as $$ \\mathcal{I}{up}(z, z}) = - \\frac{d L(z_{\\text{test}}, \\hat{\\theta}{\\epsilon, z})}{d \\epsilon} \\Big| $$

    Using the chain rule and the results calculated above, we thus have:

    \\[ \\mathcal{I}_{up}(z, z_{\\text{test}}) = - \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ \\frac{d \\hat{\\theta}_{\\epsilon, z}}{d \\epsilon} \\Big|_{\\epsilon=0} = \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ H_{\\hat{\\theta}}^{-1} \\ \\nabla_\\theta L(z, \\hat{\\theta}) \\]

    In order to calculate this expression we need the gradient and the Hessian of the loss wrt. the model parameters \\(\\hat{\\theta}\\). This can be easily done through a single backpropagation pass.

    "},{"location":"examples/influence_imagenet/#regularizing-the-hessian","title":"Regularizing the Hessian","text":"

    One very important assumption that we make when approximating influence is that \\(\\hat{\\theta}\\) is at least a local minimum of the loss. However, we clearly cannot guarantee this except for convex models, and despite good apparent convergence, \\(\\hat{\\theta}\\) might be located in a region with flat curvature or close to a saddle point. In particular, the Hessian might have vanishing eigenvalues making its direct inversion impossible.

    To circumvent this problem, instead of inverting the true Hessian \\(H_{\\hat{\\theta}}\\), one can invert a small perturbation thereof: \\(H_{\\hat{\\theta}} + \\lambda \\mathbb{I}\\), with \\(\\mathbb{I}\\) being the identity matrix. This standard trick ensures that the eigenvalues of \\(H_{\\hat{\\theta}}\\) are bounded away from zero and therefore the matrix is invertible. In order for this regularization not to corrupt the outcome too much, the parameter \\(\\lambda\\) should be as small as possible while still allowing a reliable inversion of \\(H_{\\hat{\\theta}} + \\lambda \\mathbb{I}\\).

    "},{"location":"examples/influence_synthetic/","title":"For mislabeled data","text":"
    %load_ext autoreload\n
    %autoreload\n%matplotlib inline\nimport os\nimport random\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\nimport matplotlib.pyplot as plt\nfrom pydvl.influence import compute_influences, TorchTwiceDifferentiable\nfrom support.shapley import (\nsynthetic_classification_dataset,\ndecision_boundary_fixed_variance_2d,\n)\nfrom support.common import (\nplot_gaussian_blobs,\nplot_losses,\nplot_influences,\n)\nfrom support.torch import (\nfit_torch_model,\nTorchLogisticRegression,\n)\nfrom sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\nfrom torch.optim import AdamW, lr_scheduler\nfrom torch.utils.data import DataLoader\n
    \n/Users/fabio/miniconda3/envs/pydvl_env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n\n
    plt.rcParams[\"figure.figsize\"] = (16, 8)\nplt.rcParams[\"font.size\"] = 12\nplt.rcParams[\"xtick.labelsize\"] = 12\nplt.rcParams[\"ytick.labelsize\"] = 10\n
    random_state = 24\nis_CI = os.environ.get(\"CI\")\n
    num_samples = 10000\nnum_features = 2\nsigma = 0.2\nmeans = np.asarray([[0.0, 0.0], [1.0, 1.0]])\n
    random.seed(random_state)\nnp.random.seed(random_state)\n

    The following code snippet generates the aforementioned dataset.

    train_data, val_data, test_data = synthetic_classification_dataset(\nmeans, sigma, num_samples, train_size=0.7, test_size=0.2\n)\n# In CI we only use a subset of the training set\nif is_CI:\ntrain_data = (train_data[0][:10], train_data[1][:10])\n

    Given the simplicity of the dataset, we can calculate exactly the optimal decision boundary(that which maximizes our accuracy). The following code maps a continuous line of z values to a 2-dimensional vector in feature space (More details are in the appendix to this notebook.)

    decision_boundary_fn = decision_boundary_fixed_variance_2d(means[0], means[1])\ndecision_boundary = decision_boundary_fn(np.linspace(-1.5, 1.5, 100))\n
    plot_gaussian_blobs(\ntrain_data,\ntest_data,\nxlabel=\"$x_0$\",\nylabel=\"$x_1$\",\nlegend_title=\"$y - labels$\",\nline=decision_boundary,\ns=10,\nsuptitle=\"Plot of train-test data\",\n)\n

    Note that there are samples which go across the optimal decision boundary and will be wrongly labelled. The optimal decision boundary can not discriminate these as the mislabelling is a consequence of the presence of random noise.

    model = TorchLogisticRegression(num_features)\ndevice = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\nnum_epochs = 50\nlr = 0.05\nweight_decay = 0.05\nbatch_size = 256\ntrain_data_loader = DataLoader(\nlist(zip(train_data[0], train_data[1].astype(float))),\nbatch_size=batch_size,\nshuffle=True,\n)\nval_data_loader = DataLoader(\nlist(zip(val_data[0], val_data[1].astype(float))),\nbatch_size=batch_size,\nshuffle=True,\n)\noptimizer = AdamW(params=model.parameters(), lr=lr, weight_decay=weight_decay)\nscheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)\nlosses = fit_torch_model(\nmodel=model,\ntraining_data=train_data_loader,\nval_data=val_data_loader,\nloss=F.binary_cross_entropy,\noptimizer=optimizer,\nscheduler=scheduler,\nnum_epochs=num_epochs,\n)\n
    \nModel fitting:   0%|          | 0/50 [00:00<?, ?it/s]Model fitting: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 50/50 [00:02<00:00, 19.41it/s]\n\n

    And let's check that the model is not overfitting

    plot_losses(losses)\n

    A look at the confusion matrix also shows good results

    model.eval()\npred_probabilities = model(test_data[0]).detach()\npred_y_test = [1 if prob &gt; 0.5 else 0 for prob in pred_probabilities]\ncm = confusion_matrix(test_data[1], pred_y_test)\ndisp = ConfusionMatrixDisplay(confusion_matrix=cm)\ndisp.plot();\n

    It is important that the model converges to a point near the optimum, since the influence values assume that we are at a minimum (or close) in the loss landscape. The function

    \\[I(x_1, y_1, x_2, y_2) \\colon \\mathbb{R}^d \\times \\mathbb{R}^d \\to \\mathbb{R}\\]

    measures the influence of the data point \\(x_1\\) onto \\(x_2\\) conditioned on the training targets \\(y_1\\) and \\(y_2\\) trough some model parameters \\(\\theta\\). If the loss function L is differentiable, we can take \\(I\\) to be

    $$ I(x_1, x_2) = \\nabla_\\theta\\; L(x_1, y_1) ^\\mathsf{T} \\; H_\\theta^{-1} \\; \\nabla_\\theta \\; L(x_2, y_2) $$ See \"Understanding Black-box Predictions via Influence Functions\" for a detailed derivation of this formula

    Let's take a subset of the training data points, which we will calculate the influence values of.

    x = train_data[0][:100]\ny = train_data[1][:100]\n

    In pyDVL, the influence of the training points on the test points can be calculated with the following

    train_data_loader = DataLoader(list(zip(x, y.astype(float))), batch_size=batch_size)\ntest_data_loader = DataLoader(\nlist(zip(test_data[0], test_data[1].astype(float))), batch_size=batch_size\n)\ninfluence_values = compute_influences(\ndifferentiable_model=TorchTwiceDifferentiable(model, F.binary_cross_entropy),\ntraining_data=train_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"up\",\ninversion_method=\"direct\",  # use 'cg' for big models\n)\n

    The above explicitly constructs the Hessian. This can often be computationally expensive and conjugate gradient approximate calculation should be used for bigger models.

    With the influence type 'up', training influences have shape [NxM] where N is the number of test samples and M is the number of training samples. They therefore associate to each training sample its influence on each test sample. Influence type 'perturbation', instead, return an array of shape [NxMxF], where F is the number of features in input, ie. the length of x.

    In our case, in order to have a value of the total average influence of a point we can just average across training samples.

    mean_train_influences = np.mean(influence_values.numpy(), axis=0)\n

    Let's plot the results (adjust colorbar_limits for better color gradient)

    plot_influences(\nx,\nmean_train_influences,\nline=decision_boundary,\nxlabel=\"$x_0$\",\nylabel=\"$x_1$\",\nsuptitle=\"Influences of input points\",\nlegend_title=\"influence values\",\n# colorbar_limits=(-0.3,),\n);\n

    We can see that, as we approach the separation line, the influences tend to move away from zero, i.e. the points become more decisive for model training, some in a positive way, some negative.

    As a further test, let's introduce some labelling errors into \\(y\\) and see how the distribution of the influences changes. Let's flip the first 10 labels and calculate influences

    y_corrupted = np.copy(y)\ny_corrupted[:10] = [1 - yi for yi in y[:10]]\ntrain_corrupted_data_loader = DataLoader(\nlist(zip(x, y_corrupted.astype(float))), batch_size=batch_size\n)\ninfluence_values = compute_influences(\ndifferentiable_model=TorchTwiceDifferentiable(model, F.binary_cross_entropy),\ntraining_data=train_corrupted_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"up\",\ninversion_method=\"direct\",\n)\nmean_train_influences = np.mean(influence_values.numpy(), axis=0)\n
    print(\"Average mislabelled data influence:\", np.mean(mean_train_influences[:10]))\nprint(\"Average correct data influence:\", np.mean(mean_train_influences[10:]))\n
    \nAverage mislabelled data influence: -0.8225848370029777\nAverage correct data influence: 0.011277048916970962\n\n
    plot_influences(\nx,\nmean_train_influences,\ncorrupted_indices=np.array(range(10)),\nline=decision_boundary,\nxlabel=\"$x_0$\",\nylabel=\"$x_1$\",\nsuptitle=\"Influences of input points with corrupted data\",\nlegend_title=\"influence values\",\n# colorbar_limits=(-0.3,),\n);\n

    Red circles indicate the points which have been corrupted. We can see that the mislabelled data have a more negative average influence on the model, especially those that are farther away from the decision boundary.

    The \"direct\" method that we have used above involves the inversion of the Hessian matrix of the model. If a model has \\(n\\) training points and \\(\\theta \\in \\mathbb{R}^p\\) parameters, this requires \\(O(n \\ p^2 + p^3)\\) operations, which for larger models, like neural networks, becomes quickly unfeasible. Conjugate gradient avoids the explicit computation of the Hessian via a technique called implicit Hessian-vector products (HVPs), which typically takes \\(O(n \\ p)\\) operations.

    In the next cell we will use conjugate gradient to compute the influence factors. Since logistic regression is a very simple model, \"cg\" actually slows computation with respect to the direct method, which in this case is a much better choice. Nevertheless, we are able to verify that the influences calculated with \"cg\" are the same (to a minor error) as those calculated directly.

    influence_values = compute_influences(\ndifferentiable_model=TorchTwiceDifferentiable(model, F.binary_cross_entropy),\ntraining_data=train_corrupted_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"up\",\ninversion_method=\"cg\",\nprogress=True,\n)\nmean_train_influences = np.mean(influence_values.numpy(), axis=0)\nprint(\"Average mislabelled data influence:\", np.mean(mean_train_influences[:10]))\nprint(\"Average correct data influence:\", np.mean(mean_train_influences[10:]))\n
    \nBatch Test Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 8/8 [00:00<00:00, 17.89it/s]\nBatch Train Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 308.47it/s]\nConjugate gradient: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [00:16<00:00, 118.24it/s]\nBatch Split Input Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 44.89it/s]\n
    \nAverage mislabelled data influence: -0.82248804123547\nAverage correct data influence: 0.01127580743952819\n\n
    \n\n\n

    Averages are very similar to the ones calculated through direct method. Same is true for the plot

    plot_influences(\nx,\nmean_train_influences,\ncorrupted_indices=np.array(range(10)),\nline=decision_boundary,\nxlabel=\"$x_0$\",\nylabel=\"$x_1$\",\nsuptitle=\"Influences of input points with corrupted data\",\nlegend_title=\"influence values\",\n# colorbar_limits=(-0.1, 0.1),\n);\n
    "},{"location":"examples/influence_synthetic/#influence-functions-for-data-mislabeling","title":"Influence functions for data mislabeling","text":"

    In this notebook, we will take a closer look at the theory of influence functions with the help of a synthetic dataset. Data mislabeling occurs whenever some examples from a usually big dataset are wrongly-labeled. In real-life this happens fairly often, e.g. as a consequence of human error, or noise in the data.

    Let's consider a classification problem with the following notation:

    \\[ \\begin{align*} x_i &\\in \\mathbb{R}^d \\\\ y_i &\\in \\{0, 1\\} \\\\ \\forall i &\\in [ N ] \\end{align*} \\]

    In other words, we have a dataset containing \\(N\\) samples, each with label 1 or 0. As typical example you can think of y indicating whether a patient has a disease based on some feature representation \\(x\\).

    Let's now introduce a toy model that will help us delve into the theory and practical utility of influence functions. We will assume that \\(y\\) is a Bernoulli binary random variable while the input \\(x\\) is d-dimensional Gaussian distribution which depends on the label \\(y\\). More precisely:

    \\[ y_i \\sim \\text{Ber}\\left (0.5 \\right) \\\\ x_i \\sim \\mathcal{N}\\left ((1 - y_i) \\mu_1 + y_i \\mu_2, \\sigma^2 I \\right), \\]

    with fixed means and diagonal covariance. Implementing the sampling scheme in python is straightforward and can be achieved by first sampling \\(y\\) and afterward \\(x\\).

    "},{"location":"examples/influence_synthetic/#imports","title":"Imports","text":""},{"location":"examples/influence_synthetic/#constants","title":"Constants","text":""},{"location":"examples/influence_synthetic/#dataset","title":"Dataset","text":""},{"location":"examples/influence_synthetic/#plotting-the-dataset","title":"Plotting the dataset","text":"

    Let's plot the dataset is plotted with their respective labels and the optimal decision line

    "},{"location":"examples/influence_synthetic/#training-the-model","title":"Training the model","text":"

    We will now train a logistic regression model on the training data. This can be done with the following

    "},{"location":"examples/influence_synthetic/#calculating-influences","title":"Calculating influences","text":""},{"location":"examples/influence_synthetic/#inversion-through-conjugate-gradient","title":"Inversion through conjugate gradient","text":""},{"location":"examples/influence_synthetic/#appendix-calculating-the-decision-boundary","title":"Appendix: Calculating the decision boundary","text":"

    For obtaining the optimal discriminator one has to solve the equation

    \\[p(x|y=0)=p(x|y=1)\\]

    and determine the solution set \\(X\\). Let's take the following probabilities

    \\[ \\begin{align*} p(x|y=0)&=\\mathcal{N}\\left (\\mu_1, \\sigma^2 I \\right) \\\\ p(x|y=1)&=\\mathcal{N}\\left (\\mu_2, \\sigma^2 I \\right) \\end{align*} \\]

    For a single fixed diagonal variance parameterized by \\(\\sigma\\), the optimal discriminator lays at points which are equidistant from the means of the two distributions, i.e.

    \\[ \\begin{align*} \\| x - \\mu_1 \\|^2 &= \\| x - \\mu_2 \\|^2 \\\\ \\| \\mu_1 \\|^2 -2 x^\\mathsf{T} \\mu_1 &= \\| \\mu_2 \\|^2 -2 x^\\mathsf{T} \\mu_2 \\\\ \\implies 0 &= 2 (\\mu_2 - \\mu_1)^\\mathsf{T} x + \\| \\mu_1 \\|^2 - \\| \\mu_2 \\|^2 \\\\ 0 &= \\mu_1^\\mathsf{T}x - \\mu_2^\\mathsf{T}x - \\frac{1}{2} \\mu_1^\\mathsf{T} \\mu_1 + \\frac{1}{2} \\mu_2^\\mathsf{T} \\mu_2 \\end{align*} \\]

    This is just the implicit description of the line. Solving for the explicit form can be achieved by enforcing a functional form \\(f(z) = x = a z + b\\) with \\(z \\in \\mathbb{R}\\) onto \\(x\\). After the term is inserted in the previous equation

    \\[ 0 = (\\mu_2 - \\mu_1)^\\mathsf{T} (az + b) + \\frac{1}{2} \\| \\mu_1 \\|^2 - \\| \\mu_2 \\|^2 \\]

    We can write \\(a\\) since, by symmetry, it is expected to be explicitly orthogonal to \\(\\mu_2 - \\mu_1\\). Then, solving for \\(b\\), the solution can be found to be

    \\[ f(z) = \\underbrace{\\begin{bmatrix} 0 & 1 \\\\ -1 & 0 \\end{bmatrix} (\\mu_2 - \\mu_1)}_a z + \\underbrace{\\frac{\\mu_1 + \\mu_2}{2}}_b \\]"},{"location":"examples/influence_wine/","title":"For outlier detection","text":"

    Let's start by loading the imports, the dataset and splitting it into train, validation and test sets. We will use a large test set to have a less noisy estimate of the average influence.

    %load_ext autoreload\n
    %autoreload\n%matplotlib inline\nimport os\nimport random\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\nfrom support.common import plot_losses\nfrom support.torch import TorchMLP, fit_torch_model\nfrom pydvl.influence import compute_influences, TorchTwiceDifferentiable\nfrom support.shapley import load_wine_dataset\nfrom sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score\nfrom torch.optim import Adam, lr_scheduler\nfrom torch.utils.data import DataLoader, TensorDataset\n
    plt.rcParams[\"figure.figsize\"] = (16, 8)\nplt.rcParams[\"font.size\"] = 12\nplt.rcParams[\"xtick.labelsize\"] = 12\nplt.rcParams[\"ytick.labelsize\"] = 10\n
    random_state = 24\nis_CI = os.environ.get(\"CI\")\n
    random.seed(random_state)\nnp.random.seed(random_state)\n
    training_data, val_data, test_data, feature_names = load_wine_dataset(\ntrain_size=0.3, test_size=0.6\n)\n# In CI we only use a subset of the training set\nif is_CI:\ntrain_data = (training_data[0][:10], training_data[1][:10])\n

    We will corrupt some of the training points by flipping their labels

    num_corrupted_idxs = 10\ntraining_data[1][:num_corrupted_idxs] = torch.tensor(\n[(val + 1) % 3 for val in training_data[1][:num_corrupted_idxs]]\n)\n

    and let's wrap it in a pytorch data loader

    training_data_loader = DataLoader(\nTensorDataset(*training_data), batch_size=32, shuffle=False\n)\nval_data_loader = DataLoader(TensorDataset(*val_data), batch_size=32, shuffle=False)\ntest_data_loader = DataLoader(TensorDataset(*test_data), batch_size=32, shuffle=False)\n
    feature_dimension = 13\nnum_classes = 3\nnetwork_size = [16, 16]\nlayers_size = [feature_dimension, *network_size, num_classes]\nnum_epochs = 300\nlr = 0.005\nweight_decay = 0.01\ndevice = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\nnn_model = TorchMLP(layers_size)\nnn_model.to(device)\noptimizer = Adam(params=nn_model.parameters(), lr=lr, weight_decay=weight_decay)\nscheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)\nlosses = fit_torch_model(\nmodel=nn_model,\ntraining_data=training_data_loader,\nval_data=val_data_loader,\nloss=F.cross_entropy,\noptimizer=optimizer,\nscheduler=scheduler,\nnum_epochs=num_epochs,\n)\n
    \nModel fitting: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 300/300 [00:00<00:00, 307.77it/s]\n\n

    Let's check that the training has found a stable minimum by plotting the training and validation loss

    plot_losses(losses)\n

    Since it is a classification problem, let's also take a look at the confusion matrix on the test set

    nn_model.eval()\npred_y_test = np.argmax(nn_model(test_data[0]).detach(), axis=1)\ncm = confusion_matrix(test_data[1], pred_y_test)\ndisp = ConfusionMatrixDisplay(confusion_matrix=cm)\ndisp.plot();\n

    And let's compute the f1 score of the model

    f1_score(test_data[1], pred_y_test, average=\"weighted\")\n
    \n0.9906846833902615\n

    Let's now move to calculating influences of each point on the total score.

    train_influences = compute_influences(\nTorchTwiceDifferentiable(nn_model, F.cross_entropy),\ntraining_data=training_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"up\",\ninversion_method=\"direct\",\nhessian_regularization=0.1,\nprogress=True,\n)\n
    \nBatch Test Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4/4 [00:00<00:00, 67.10it/s]\nMVP: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 547/547 [00:00<00:00, 742.01it/s] \nBatch Split Input Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2/2 [00:00<00:00, 85.02it/s]\n\n

    the returned matrix, train_influences, has a quantity of columns equal to the points in the training set, and a number of rows equal to the points in the test set. At each element \\(a_{i,j}\\) it stores the influence that training point \\(j\\) has on the classification of test point \\(i\\).

    If we take the average across every column of the influences matrix, we obtain an estimate of the overall influence of a training point on the total accuracy of the network.

    mean_train_influences = np.mean(train_influences.numpy(), axis=0)\n

    The following histogram shows that there are big differences in score within the training set (notice the log-scale on the y axis).

    _, ax = plt.subplots()\nax.hist(mean_train_influences[num_corrupted_idxs:], label=\"normal\")\nax.hist(mean_train_influences[:num_corrupted_idxs], label=\"corrupted\", bins=5)\nax.set_title(\"Influece scores distribution\")\nax.set_xlabel(\"influece score\")\nax.set_ylabel(\"number of points\")\nax.legend()\nplt.show()\n

    We can see that the corrupted points tend to have a negative effect on the model, as expected

    print(\n\"Average influence of corrupted points: \",\nnp.mean(mean_train_influences[:num_corrupted_idxs]),\n)\nprint(\n\"Average influence of other points: \",\nnp.mean(mean_train_influences[num_corrupted_idxs:]),\n)\n
    \nAverage influence of corrupted points:  -0.05317057\nAverage influence of other points:  0.034408495\n\n

    We have seen how to calculate the influence of single training points on each test point using influence_type 'up'. Using influence_type 'perturbation' we can also calculate the influence of the input features of each point. In the next cell we will calculate the average influence of each feature on training and test points, and ultimately assess which are the most relevant to model performance.

    feature_influences = compute_influences(\nTorchTwiceDifferentiable(nn_model, F.cross_entropy),\ntraining_data=training_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"perturbation\",\ninversion_method=\"direct\",\nhessian_regularization=1,\nprogress=True,\n)\n
    \nBatch Test Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4/4 [00:00<00:00, 61.20it/s]\nMVP: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 547/547 [00:00<00:00, 1265.72it/s]\nBatch Influence Perturbation: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2/2 [00:03<00:00,  1.66s/it]\n\n
    mean_feature_influences = np.mean(feature_influences.numpy(), axis=(0, 1))\n_, ax = plt.subplots()\nax.bar(feature_names, mean_feature_influences)\nax.set_xlabel(\"training features\")\nax.set_ylabel(\"influence values\")\nax.set_title(\"Average feature influence\")\nplt.xticks(rotation=60)\nplt.show()\n

    The calculation of the Hessian matrix (necessary to calculate the influences) can be quite numerically challenging, but there are some techniques to speed up its calculation. PyDVL allows to use the full method (\"direct\") or the conjugate gradient method (\"cg\"). The first one should be used only for very small networks (like our current example), while for bigger ones \"cg\" is advisable.

    cg_train_influences = compute_influences(\nTorchTwiceDifferentiable(nn_model, F.cross_entropy),\ntraining_data=training_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"up\",\ninversion_method=\"cg\",\nhessian_regularization=0.1,\nprogress=True,\n)\nmean_cg_train_influences = np.mean(cg_train_influences.numpy(), axis=0)\n
    \nBatch Test Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4/4 [00:00<00:00, 81.02it/s]\nBatch Train Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2/2 [00:00<00:00, 535.33it/s]\nConjugate gradient: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 107/107 [00:04<00:00, 22.66it/s]\nBatch Split Input Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2/2 [00:00<00:00, 98.91it/s]\n\n

    Let's compare the results obtained through conjugate gradient with those from the direct method

    print(\nf\"Percentage error of cg over direct method:{np.mean(np.abs(mean_cg_train_influences - mean_train_influences)/np.abs(mean_train_influences))*100} %\"\n)\n
    \nPercentage error of cg over direct method:1.5124550145628746e-05 %\n\n

    This was a quick introduction to the pyDVL interface for influence functions. Despite their speed and simplicity, influence functions are known to be a very noisy estimator of data quality, as pointed out in the paper \"Influence functions in deep learning are fragile\". The size of the network, the weight decay, the inversion method used for calculating influences, the size of the test set: they all add up to the total amount of noise. Experiments may therefore give quantitative and qualitatively different results if not averaged across several realisations. Shapley values, on the contrary, have shown to be a more robust, but this comes at the cost of high computational requirements. PyDVL employs several parallelization and caching techniques to optimize such calculations.

    "},{"location":"examples/influence_wine/#influence-functions-for-outlier-detection","title":"Influence functions for outlier detection","text":"

    This notebook shows how to calculate influences on a NN model using pyDVL for an arbitrary dataset, and how this can be used to find anomalous or corrupted data points.

    It uses the wine dataset from sklearn: given a set of 13 different input parameters regarding a particular bottle, each related to some physical property (e.g. concentration of magnesium, malic acidity, alcoholic percentage, etc.), the model will need to predict to which of 3 classes the wine belongs to. For more details, please refer to the sklearn documentation.

    "},{"location":"examples/influence_wine/#imports","title":"Imports","text":""},{"location":"examples/influence_wine/#constants","title":"Constants","text":""},{"location":"examples/influence_wine/#dataset","title":"Dataset","text":""},{"location":"examples/influence_wine/#fit-a-neural-network-to-the-data","title":"Fit a neural network to the data","text":"

    We will train a 2-layer neural network. PyDVL has some convenience wrappers to initialize a pytorch NN. If you already have a model loaded and trained, you can skip this section.

    "},{"location":"examples/influence_wine/#calculating-influences-for-small-neural-networks","title":"Calculating influences for small neural networks","text":"

    The following cell calculates the influences of each training data point on the neural network. Neural networks have typically a very bumpy parameter space, which, during training, is explored until the configuration that minimises the loss is found. There is an important assumption in influence functions that the model lays at a (at least local) minimum of such loss, and if this is not fulfilled many issues can arise. In order to avoid this scenario, a regularisation term should be used whenever dealing with big and noisy models.

    "},{"location":"examples/influence_wine/#influence-of-training-features","title":"Influence of training features","text":""},{"location":"examples/influence_wine/#speeding-up-influences-for-big-models","title":"Speeding up influences for big models","text":""},{"location":"examples/least_core_basic/","title":"Least Core","text":"

    We will be using the following functions and classes from pyDVL.

    %autoreload\nfrom pydvl.utils import (\nDataset,\nUtility,\n)\nfrom pydvl.value import compute_least_core_values, LeastCoreMode, ValuationResult\nfrom pydvl.reporting.plots import shaded_mean_std\nfrom pydvl.reporting.scores import compute_removal_score\n
    X, y = make_classification(\nn_samples=200,\nn_features=50,\nn_informative=25,\nn_classes=3,\nrandom_state=random_state,\n)\n
    full_dataset = Dataset.from_arrays(\nX, y, stratify_by_target=True, random_state=random_state\n)\nsmall_dataset = Dataset.from_arrays(\nX,\ny,\nstratify_by_target=True,\ntrain_size=10,\nrandom_state=random_state,\n)\n
    model = LogisticRegression(max_iter=500, solver=\"liblinear\")\n
    model.fit(full_dataset.x_train, full_dataset.y_train)\nprint(\nf\"Training accuracy: {100 * model.score(full_dataset.x_train, full_dataset.y_train):0.2f}%\"\n)\nprint(\nf\"Testing accuracy: {100 * model.score(full_dataset.x_test, full_dataset.y_test):0.2f}%\"\n)\n
    \nTraining accuracy: 86.25%\nTesting accuracy: 70.00%\n\n
    model.fit(small_dataset.x_train, small_dataset.y_train)\nprint(\nf\"Training accuracy: {100 * model.score(small_dataset.x_train, small_dataset.y_train):0.2f}%\"\n)\nprint(\nf\"Testing accuracy: {100 * model.score(small_dataset.x_test, small_dataset.y_test):0.2f}%\"\n)\n
    \nTraining accuracy: 100.00%\nTesting accuracy: 47.89%\n\n
    utility = Utility(model=model, data=small_dataset)\n
    exact_values = compute_least_core_values(\nu=utility,\nmode=LeastCoreMode.Exact,\nprogress=True,\n)\n
    \n  0%|          | 0/1023 [00:00<?, ?it/s]\n
    exact_values_df = exact_values.to_dataframe(column=\"exact_value\").T\nexact_values_df = exact_values_df[sorted(exact_values_df.columns)]\n
    budget_array = np.linspace(200, 2 ** len(small_dataset), num=10, dtype=int)\nall_estimated_values_df = []\nall_errors = {budget: [] for budget in budget_array}\nfor budget in tqdm(budget_array):\ndfs = []\nerrors = []\ncolumn_name = f\"estimated_value_{budget}\"\nfor i in range(20):\nvalues = compute_least_core_values(\nu=utility,\nmode=LeastCoreMode.MonteCarlo,\nn_iterations=budget,\nn_jobs=n_jobs,\n)\ndf = (\nvalues.to_dataframe(column=column_name)\n.drop(columns=[f\"{column_name}_stderr\"])\n.T\n)\ndf = df[sorted(df.columns)]\nerror = mean_squared_error(\nexact_values_df.loc[\"exact_value\"].values, df.values.ravel()\n)\nall_errors[budget].append(error)\ndf[\"budget\"] = budget\ndfs.append(df)\nestimated_values_df = pd.concat(dfs)\nall_estimated_values_df.append(estimated_values_df)\nvalues_df = pd.concat(all_estimated_values_df)\nerrors_df = pd.DataFrame(all_errors)\n
    \n  0%|          | 0/10 [00:00<?, ?it/s]\n

    We can see that the approximation error decreases, on average, as the we increase the budget.

    Still, the decrease may not always necessarily happen when we increase the number of iterations because of the fact that we sample the subsets with replacement in the Monte Carlo method i.e there may be repeated subsets.

    utility = Utility(model=model, data=full_dataset)\n
    method_names = [\"Random\", \"Least Core\"]\nremoval_percentages = np.arange(0, 0.41, 0.05)\n
    all_scores = []\nfor i in trange(5):\nfor method_name in method_names:\nif method_name == \"Random\":\nvalues = ValuationResult.from_random(size=len(utility.data))\nelse:\nvalues = compute_least_core_values(\nu=utility,\nmode=LeastCoreMode.MonteCarlo,\nn_iterations=25000,\nn_jobs=n_jobs,\n)\nscores = compute_removal_score(\nu=utility,\nvalues=values,\npercentages=removal_percentages,\nremove_best=True,\n)\nscores[\"method_name\"] = method_name\nall_scores.append(scores)\nscores_df = pd.DataFrame(all_scores)\n
    \n  0%|          | 0/5 [00:00<?, ?it/s]\n

    We can clearly see that removing the most valuable data points, as given by the Least Core method, leads to, on average, a decrease in the model's performance and that the method outperforms random removal of data points.

    all_scores = []\nfor i in trange(5):\nfor method_name in method_names:\nif method_name == \"Random\":\nvalues = ValuationResult.from_random(size=len(utility.data))\nelse:\nvalues = compute_least_core_values(\nu=utility,\nmode=LeastCoreMode.MonteCarlo,\nn_iterations=25000,\nn_jobs=n_jobs,\n)\nscores = compute_removal_score(\nu=utility,\nvalues=values,\npercentages=removal_percentages,\n)\nscores[\"method_name\"] = method_name\nall_scores.append(scores)\nscores_df = pd.DataFrame(all_scores)\n
    \n  0%|          | 0/5 [00:00<?, ?it/s]\n

    We can clearly see that removing the least valuable data points, as given by the Least Core method, leads to, on average, an increase in the model's performance and that the method outperforms the random removal of data points.

    "},{"location":"examples/least_core_basic/#least-core-for-data-valuation","title":"Least Core for Data Valuation","text":"

    This notebook introduces Least Core methods for the computation of data values using pyDVL.

    Shapley values define a fair way of distributing the worth of the whole training set when every data point is part of it. But they do not consider the question of stability of subsets: Could some data points obtain a higher payoff if they formed smaller subsets? It is argued that this might be relevant if data providers are paid based on data value, since Shapley values can incentivise them not to contribute their data to the \"grand coalition\", but instead try to form smaller ones. Whether this is of actual practical relevance is debatable, but in any case, the least core is an alternative tool available for any task of Data Valuation

    The Core is another approach to compute data values originating in cooperative game theory that attempts to answer those questions. It is the set of feasible payoffs that cannot be improved upon by a coalition of the participants.

    Its use for Data Valuation was first described in the paper If You Like Shapley Then You\u2019ll Love the Core by Tom Yan and Ariel D. Procaccia.

    The Least Core value \\(v\\) of the \\(i\\)-th sample in dataset \\(D\\) wrt. utility \\(u\\) is computed by solving the following Linear Program:

    \\[ \\begin{array}{lll} \\text{minimize} & \\displaystyle{e} & \\\\ \\text{subject to} & \\displaystyle\\sum_{x_i\\in D} v_u(x_i) = u(D) & \\\\ & \\displaystyle\\sum_{x_i\\in S} v_u(x_i) + e \\geq u(S) &, \\forall S \\subset D, S \\neq \\emptyset \\\\ \\end{array} \\]

    To illustrate this method we will use a synthetic dataset. We will first use a subset of 10 data point to compute the exact values and use them to assess the Monte Carlo approximation. Afterwards, we will conduct the data removal experiments as described by Ghorbani and Zou in their paper Data Shapley: Equitable Valuation of Data for Machine Learning: We compute the data valuation given different computation budgets and incrementally remove a percentage of the best, respectively worst, data points and observe how that affects the utility.

    "},{"location":"examples/least_core_basic/#setup","title":"Setup","text":"

    We begin by importing the main libraries and setting some defaults.

    If you are reading this in the documentation, some boilerplate (including most plotting code) has been omitted for convenience."},{"location":"examples/least_core_basic/#dataset","title":"Dataset","text":"

    We generate a synthetic dataset using the make_classification function from scikit-learn.

    We sample 200 data points from a 50-dimensional Gaussian distribution with 25 informative features and 25 non-informative features (generated as random linear combinations of the informative features).

    The 200 samples are uniformly distributed across 3 classes with a small percentage of noise added to the labels to make the task a bit more difficult.

    "},{"location":"examples/least_core_basic/#estimating-least-core-values","title":"Estimating Least Core Values","text":"

    In this first section we will use a smaller subset of the dataset containing 10 samples in order to be able to compute exact values in a reasonable amount of time. Afterwards, we will use the Monte Carlo method with a limited budget (maximum number of subsets) to approximate these values.

    "},{"location":"examples/least_core_basic/#data-removal","title":"Data Removal","text":"

    We now move on to the data removal experiments using the full dataset.

    In these experiments, we first rank the data points from most valuable to least valuable using the values estimated by the Monte Carlo Least Core method. Then, we gradually remove from 5 to 40 percent, by increments of 5 percentage points, of the most valuable/least valuable ones, train the model on this subset and compute its accuracy.

    "},{"location":"examples/least_core_basic/#remove-best","title":"Remove Best","text":"

    We start by removing the best data points and seeing how the model's accuracy evolves.

    "},{"location":"examples/least_core_basic/#remove-worst","title":"Remove Worst","text":"

    We then proceed to removing the worst data points and seeing how the model's accuracy evolves.

    "},{"location":"examples/shapley_basic_spotify/","title":"Shapley values","text":"

    This notebook introduces Shapley methods for the computation of data value using pyDVL.

    In order to illustrate the practical advantages, we will predict the popularity of songs in the dataset Top Hits Spotify from 2000-2019, and highlight how data valuation can help investigate and boost the performance of the models. In doing so, we will describe the basic usage patterns of pyDVL.

    Recall that data value is a function of three things:

    1. The dataset.
    2. The model.
    3. The performance metric or scoring function.

    Below we will describe how to instantiate each one of these objects and how to use them for data valuation. Please also see the documentation on data valuation.

    We will be using the following functions from pyDVL. The main entry point is the function compute_shapley_values(), which provides a facade to all Shapley methods. In order to use it we need the classes Dataset, Utility and Scorer.

    %autoreload\nfrom pydvl.reporting.plots import plot_shapley\nfrom pydvl.utils.dataset import GroupedDataset\nfrom support.shapley import load_spotify_dataset\nfrom pydvl.value import *\n
    training_data, val_data, test_data = load_spotify_dataset(\nval_size=0.3, test_size=0.3, target_column=\"popularity\", random_state=random_state\n)\n
    training_data[0].head()\n
    artist song duration_ms explicit year danceability energy key loudness mode speechiness acousticness instrumentalness liveness valence tempo genre 1561 Fetty Wap 679 (feat. Remy Boyz) 196693 True 2015 0.618 0.717 7 -5.738 1 0.3180 0.00256 0.000000 0.6250 0.603 190.050 8 1410 Meghan Trainor All About That Bass 187920 True 2015 0.807 0.887 9 -3.726 1 0.0503 0.05730 0.000003 0.1240 0.961 134.052 14 1772 Katy Perry Chained To The Rhythm 237733 False 2017 0.562 0.800 0 -5.404 1 0.1120 0.08140 0.000000 0.1990 0.471 95.029 14 1670 Sigala Sweet Lovin' - Radio Edit 202149 False 2015 0.683 0.910 10 -1.231 1 0.0515 0.05530 0.000005 0.3360 0.674 124.977 15 1780 Liam Payne Strip That Down 204502 False 2017 0.869 0.485 6 -5.595 1 0.0545 0.24600 0.000000 0.0765 0.527 106.028 14

    The dataset has many high-level features, some quite intuitive ('duration_ms' or 'tempo'), while others are a bit more cryptic ('valence'?). For information on each feature, please consult the dataset's website.

    In our analysis, we will use all the columns, except for 'artist' and 'song', to predict the 'popularity' of each song. We will nonetheless keep the information on song and artist in a separate object for future reference.

    song_name = training_data[0][\"song\"]\nartist = training_data[0][\"artist\"]\ntraining_data[0] = training_data[0].drop([\"song\", \"artist\"], axis=1)\ntest_data[0] = test_data[0].drop([\"song\", \"artist\"], axis=1)\nval_data[0] = val_data[0].drop([\"song\", \"artist\"], axis=1)\n

    Input and label data are then used to instantiate a Dataset object:

    dataset = Dataset(*training_data, *val_data)\n

    The calculation of exact Shapley values is computationally very expensive (exponentially so!) because it requires training the model on every possible subset of the training set. For this reason, PyDVL implements techniques to speed up the calculation, such as Monte Carlo approximations, surrogate models or caching of intermediate results and grouping of data to calculate group Shapley values instead of single data points.

    In our case, we will group songs by artist and calculate the Shapley value for the artists. Given the pandas Series for 'artist', to group the dataset by it, one does the following:

    grouped_dataset = GroupedDataset.from_dataset(dataset=dataset, data_groups=artist)\n
    utility = Utility(\nmodel=GradientBoostingRegressor(n_estimators=3),\ndata=grouped_dataset,\nscorer=Scorer(\"neg_mean_absolute_error\", default=0.0),\n)\nvalues = compute_shapley_values(\nutility,\nmode=ShapleyMode.TruncatedMontecarlo,\n# Stop if the standard error is below 1% of the range of the values (which is ~2),\n# or if the number of updates exceeds 1000\ndone=AbsoluteStandardError(threshold=0.2, fraction=0.9) | MaxUpdates(1000),\ntruncation=RelativeTruncation(utility, rtol=0.01),\nn_jobs=-1,\n)\nvalues.sort(key=\"value\")\ndf = values.to_dataframe(column=\"data_value\", use_names=True)\n

    The function compute_shapley_values() serves as a common access point to all Shapley methods. For most of them, we must choose a StoppingCriterion with the argument done=. In this case we choose to stop when the ratio of standard error to value is below 0.2 for at least 90% of the training points, or if the number of updates of any index exceeds 1000. The mode argument specifies the Shapley method to use. In this case, we use the Truncated Monte Carlo approximation, which is the fastest of the Monte Carlo methods, owing both to using the permutation definition of Shapley values and the ability to truncate the iteration over a given permutation. We configure this to happen when the contribution of the remaining elements is below 1% of the total utility with the parameter truncation= and the policy RelativeTruncation.

    Let's take a look at the returned dataframe:

    df.head()\n
    data_value data_value_stderr Kendrick Lamar -1.279149 0.091670 BLACKPINK -1.277363 0.177476 Adele -1.241698 0.183732 5 Seconds of Summer -1.228002 0.103377 Flume -1.197065 0.102345

    The first thing to notice is that we sorted the results in ascending order of Shapley value. The index holds the labels for each data group: in this case, artist names. The column data_value is just that: the Shapley Data value, and data_value_stderr is its estimated standard error because we are using a Monte Carlo approximation.

    Let us plot the results. In the next cell we will take the 30 artists with the lowest score and plot their values with 95% Normal confidence intervals. Keep in mind that Monte Carlo Shapley is typically very noisy, and it can take many steps to arrive at a clean estimate.

    We can immediately see that many artists (groups of samples) have very low, even negative value, which means that they tend to decrease the total score of the model when present in the training set! What happens if we remove them?

    In the next cell we create a new training set excluding the artists with the lowest scores:

    low_dvl_artists = df.iloc[:30].index.to_list()\nartist_filter = ~artist.isin(low_dvl_artists)\nX_train_good_dvl = training_data[0][artist_filter]\ny_train_good_dvl = training_data[1][artist_filter]\n

    Now we will use this \"cleaned\" dataset to retrain the same model and compare its mean absolute error to the one trained on the full dataset. Notice that the score now is calculated using the test set, while in the calculation of the Shapley values we were using the validation set.

    model_good_data = GradientBoostingRegressor(n_estimators=3).fit(\nX_train_good_dvl, y_train_good_dvl\n)\nerror_good_data = mean_absolute_error(\nmodel_good_data.predict(test_data[0]), test_data[1]\n)\nmodel_all_data = GradientBoostingRegressor(n_estimators=3).fit(\ntraining_data[0], training_data[1]\n)\nerror_all_data = mean_absolute_error(model_all_data.predict(test_data[0]), test_data[1])\nprint(f\"Improvement: {100*(error_all_data - error_good_data)/error_all_data:02f}%\")\n
    \nImprovement: 13.940685%\n\n

    The score has improved by almost 14%! This is quite an important result, as it shows a consistent process to improve the performance of a model by excluding data points from its training set.

    One must however proceed with caution instead of simply throwing away data. For one, `mean_absolute_error` is an estimate of generalization error on unseen data, so the improvement we see on the test set might not be as large upon deployment. It would be advisable to cross-validate this whole process to obtain more conservative estimates. It is also advisable to manually inspect the artists with low value and to try to understand the reason why the model behaves like it does. Finally, remember that **the value depends on the model chosen**! Artists that are detrimental to the Gradient Boosting Regressor might be informative for a different model (although it is likely that the worst ones share some characteristic making them \"bad\" for other regressors).

    Let us take all the songs by Rihanna, set their score to 0 and re-calculate the Shapley values.

    y_train_anomalous = training_data[1].copy(deep=True)\ny_train_anomalous[artist == \"Rihanna\"] = 0\nanomalous_dataset = Dataset(\nx_train=training_data[0],\ny_train=y_train_anomalous,\nx_test=val_data[0],\ny_test=val_data[1],\n)\ngrouped_anomalous_dataset = GroupedDataset.from_dataset(anomalous_dataset, artist)\nanomalous_utility = Utility(\nmodel=GradientBoostingRegressor(n_estimators=3),\ndata=grouped_anomalous_dataset,\nscorer=Scorer(\"neg_mean_absolute_error\", default=0.0),\n)\nvalues = compute_shapley_values(\nanomalous_utility,\nmode=ShapleyMode.TruncatedMontecarlo,\ndone=AbsoluteStandardError(threshold=0.2, fraction=0.9) | MaxUpdates(1000),\nn_jobs=-1,\n)\nvalues.sort(key=\"value\")\ndf = values.to_dataframe(column=\"data_value\", use_names=True)\n

    Let us now consider the low-value artists (at least for predictive purposes, no claims are made about their artistic value!) and plot the results

    And Rihanna (our anomalous data group) has moved from top contributor to having negative impact on the performance of the model, as expected!

    What is going on? A popularity of 0 for Rihanna's songs is inconsistent with listening patterns for other artists. In artificially setting this, we degrade the predictive power of the model.

    By dropping low-value groups or samples, one can often increase model performance, but by inspecting them, it is possible to identify bogus data sources or acquisition methods.

    "},{"location":"examples/shapley_basic_spotify/#shapley-for-data-valuation","title":"Shapley for data valuation","text":""},{"location":"examples/shapley_basic_spotify/#setup","title":"Setup","text":"

    We begin by importing the main libraries and setting some defaults.

    If you are reading this in the documentation, some boilerplate (including most plotting code) has been omitted for convenience."},{"location":"examples/shapley_basic_spotify/#loading-and-grouping-the-dataset","title":"Loading and grouping the dataset","text":"

    pyDVL provides a support function for this notebook, load_spotify_dataset(), which downloads data on songs published after 2014, and splits 30% of data for testing, and 30% of the remaining data for validation. The return value is a triple of training, validation and test data as lists of the form [X_input, Y_label].

    "},{"location":"examples/shapley_basic_spotify/#creating-the-utility-and-computing-values","title":"Creating the utility and computing values","text":"

    Now we can calculate the contribution of each group to the model performance.

    As a model, we use scikit-learn's GradientBoostingRegressor, but pyDVL can work with any model from sklearn, xgboost or lightgbm. More precisely, any model that implements the protocol pydvl.utils.types.SupervisedModel, which is just the standard sklearn interface of fit(),predict() and score() can be used to construct the utility.

    The third and final component is the scoring function. It can be anything like accuracy or \\(R^2\\), and is set with a string from the standard sklearn scoring methods. Please refer to that documentation on information on how to define your own scoring function.

    We group dataset, model and scoring function into an instance of Utility.

    "},{"location":"examples/shapley_basic_spotify/#evaluation-on-anomalous-data","title":"Evaluation on anomalous data","text":"

    One interesting test is to corrupt some data and to monitor how their value changes. To do this, we will take one of the artists with the highest value and set the popularity of all their songs to 0.

    "},{"location":"examples/shapley_knn_flowers/","title":"KNN Shapley","text":"

    This notebook shows how to calculate Shapley values for the K-Nearest Neighbours algorithm. By making use of the local structure of KNN, it is possible to compute an exact value in almost linear time, as opposed to exponential complexity of exact, model-agnostic Shapley.

    The main idea is to exploit the fact that adding or removing points beyond the k-ball doesn't influence the score. Because the algorithm then essentially only needs to do a search it runs in \\(\\mathcal{O}(N \\log N)\\) time.

    By further using approximate nearest neighbours, it is possible to achieve \\((\\epsilon,\\delta)\\)-approximations in sublinear time. However, this is not implemented in pyDVL yet.

    We refer to the original paper that pyDVL implements for details: Jia, Ruoxi, David Dao, Boxin Wang, Frances Ann Hubis, Nezihe Merve Gurel, Bo Li, Ce Zhang, Costas Spanos, and Dawn Song. Efficient Task-Specific Data Valuation for Nearest Neighbor Algorithms. Proceedings of the VLDB Endowment 12, no. 11 (1 July 2019): 1610\u201323.

    The main entry point is the function compute_shapley_values(), which provides a facade to all Shapley methods. In order to use it we need the classes Dataset, Utility and Scorer, all of which can be imported from pydvl.value:

    from pydvl.value import *\n
    sklearn_dataset = sk.datasets.load_iris()\ndata = Dataset.from_sklearn(sklearn_dataset)\nknn = sk.neighbors.KNeighborsClassifier(n_neighbors=5)\nutility = Utility(knn, data)\n
    shapley_values = compute_shapley_values(utility, mode=ShapleyMode.KNN, progress=True)\nshapley_values.sort(key=\"value\")\nvalues = shapley_values.values\n
    \n0it [00:00, ?it/s]\n

    If we now look at the distribution of Shapley values for each class, we see that each has samples with both high and low scores. This is expected, because an accurate model uses information of all classes.

    corrupted_data = deepcopy(data)\nn_corrupted = 10\ncorrupted_data.y_train[:n_corrupted] = (corrupted_data.y_train[:n_corrupted] + 1) % 3\nknn = sk.neighbors.KNeighborsClassifier(n_neighbors=5)\ncontaminated_values = compute_shapley_values(\nUtility(knn, corrupted_data), mode=ShapleyMode.KNN\n)\n

    Taking the average corrupted value and comparing it to non-corrupted ones, we notice that on average anomalous points have a much lower score, i.e. they tend to be much less valuable to the model.

    To do this, first we make sure that we access the results by data index with a call to ValuationResult.sort(), then we split the values into two groups: corrupted and non-corrupted. Note how we access property values of the ValuationResult object. This is a numpy array of values, sorted however the object was sorted. Finally, we compute the quantiles of the two groups and compare them. We see that the corrupted mean is in the lowest percentile of the value distribution, while the correct mean is in the 70th percentile.

    contaminated_values.sort(\nkey=\"index\"\n)  # This is redundant, but illustrates sorting, which is in-place\ncorrupted_shapley_values = contaminated_values.values[:n_corrupted]\ncorrect_shapley_values = contaminated_values.values[n_corrupted:]\nmean_corrupted = np.mean(corrupted_shapley_values)\nmean_correct = np.mean(correct_shapley_values)\npercentile_corrupted = np.round(100 * np.mean(values &lt; mean_corrupted), 0)\npercentile_correct = np.round(100 * np.mean(values &lt; mean_correct), 0)\nprint(\nf\"The corrupted mean is at percentile {percentile_corrupted:.0f} of the value distribution.\"\n)\nprint(\nf\"The correct mean is percentile {percentile_correct:.0f} of the value distribution.\"\n)\n
    \nThe corrupted mean is at percentile 1 of the value distribution.\nThe correct mean is percentile 71 of the value distribution.\n\n

    This is confirmed if we plot the distribution of Shapley values and circle corrupt points in red. They all tend to have low Shapley scores, regardless of their position in space and assigned label:

    "},{"location":"examples/shapley_knn_flowers/#knn-shapley","title":"KNN Shapley","text":""},{"location":"examples/shapley_knn_flowers/#setup","title":"Setup","text":"

    We begin by importing the main libraries and setting some defaults.

    If you are reading this in the documentation, some boilerplate (including most plotting code) has been omitted for convenience."},{"location":"examples/shapley_knn_flowers/#building-a-dataset-and-a-utility","title":"Building a Dataset and a Utility","text":"

    We use the sklearn iris dataset and wrap it into a pydvl.utils.dataset.Dataset calling the factory pydvl.utils.dataset.Dataset.from_sklearn(). This automatically creates a train/test split for us which will be used to compute the utility.

    We then create a model and instantiate a Utility using data and model. The model needs to implement the protocol pydvl.utils.types.SupervisedModel, which is just the standard sklearn interface of fit(),predict() and score(). In constructing the Utility one can also choose a scoring function, but we pick the default which is just the model's knn.score().

    "},{"location":"examples/shapley_knn_flowers/#computing-values","title":"Computing values","text":"

    Calculating the Shapley values is straightforward. We just call compute_shapley_values() with the utility object we created above. The function returns a ValuationResult. This object contains the values themselves, data indices and labels.

    "},{"location":"examples/shapley_knn_flowers/#inspecting-the-results","title":"Inspecting the results","text":"

    Let us first look at the labels' distribution as a function of petal and sepal length:

    "},{"location":"examples/shapley_knn_flowers/#corrupting-labels","title":"Corrupting labels","text":"

    To test how informative values are, we can corrupt some training labels and see how their Shapley values change with respect to the non-corrupted points.

    "},{"location":"examples/shapley_utility_learning/","title":"Data utility learning","text":"

    This notebook introduces Data Utility Learning, a method of approximating Data Shapley values by learning to estimate the utility function.

    The idea is to employ a model to learn the performance of the learning algorithm of interest on unseen data combinations (i.e. subsets of the dataset). The method was originally described in Wang, Tianhao, Yu Yang, and Ruoxi Jia. Improving Cooperative Game Theory-Based Data Valuation via Data Utility Learning. arXiv, 2022.

    Warning: Work on Data Utility Learning is preliminary. It remains to be seen when or whether it can be put effectively into application. For this further testing and benchmarking are required.

    Recall the definition of Shapley value \\(v_u(i)\\) for data point \\(i\\):

    \\[\\begin{equation} v_u(i) = \\frac{1}{n} \\sum_{S \\subseteq N \\setminus \\{i\\}} \\binom{n-1}{|S|}^{-1} [u(S \\cup \\{i\\}) \u2212 u(S)] , \\tag{1} \\label{eq:shapley-def} \\end{equation}\\]

    where \\(N\\) is the set of all indices in the training set and \\(u\\) is the utility.

    In Data Utility Learning, to avoid the exponential cost of computing this sum, one learns a surrogate model for \\(u\\). We start by sampling so-called utility samples to form a training set \\(S_\\mathrm{train}\\) for our utility model. Each utility sample is a tuple consisting of a subset of indices \\(S_j\\) in the dataset and its utility \\(u(S_j)\\):

    \\[\\mathcal{S}_\\mathrm{train} = \\{(S_j, u(S_j): j = 1 , ..., m_\\mathrm{train}\\}\\]

    where \\(m_\\mathrm{train}\\) denotes the training budget for the learned utility function.

    The subsets are then transformed into boolean vectors \\(\\phi\\) in which a \\(1\\) at index \\(k\\) means that the \\(k\\)-th sample of the dataset is present in the subset:

    \\[S_j \\mapsto \\phi_j \\in \\{ 0, 1 \\}^{N}\\]

    We fit a regression model \\(\\tilde{u}\\), called data utility model, on the transformed utility samples \\(\\phi (\\mathcal{S}_\\mathrm{train}) := \\{(\\phi(S_j), u(S_j): j = 1 , ..., m_\\mathrm{train}\\}\\) and use it to predict instead of computing the utility for any \\(S_j \\notin \\mathcal{S}_\\mathrm{train}\\). We abuse notation and identify \\(\\tilde{u}\\) with the composition \\(\\tilde{u} \\circ \\phi : N \\rightarrow \\mathbb{R}\\).

    The main assumption is that it is much faster to fit and use \\(\\tilde{u}\\) than it is to compute \\(u\\) and that for most \\(i\\), \\(v_\\tilde{u}(i) \\approx v_u(i)\\) in some sense.

    As is the case with all other Shapley methods, the main entry point is the function compute_shapley_values(), which provides a facade to all algorithms in this family. We use it with the usual classes Dataset and Utility. In addition, we must import the core class for learning a utility, DataUtilityLearning.

    %autoreload\nfrom pydvl.utils import DataUtilityLearning, top_k_value_accuracy\nfrom pydvl.reporting.plots import shaded_mean_std\nfrom pydvl.value import *\n
    dataset = Dataset.from_sklearn(\nload_iris(), train_size=0.1, random_state=random_state, stratify_by_target=True\n)\n

    We verify that, as in the paper, if we fit a Support-Vector Classifier to the training data, we obtain an accuracy of around 92%:

    model = LinearSVC()\nmodel.fit(dataset.x_train, dataset.y_train)\nprint(f\"Mean accuracy: {100 * model.score(dataset.x_test, dataset.y_test):0.2f}%\")\n
    \nMean accuracy: 92.59%\n\n
    computation_times = {}\n
    utility = Utility(model=model, data=dataset)\n
    start_time = time.monotonic()\nresult = compute_shapley_values(\nu=utility,\nmode=ShapleyMode.CombinatorialExact,\nn_jobs=-1,\nprogress=False,  # Does not display correctly in a notebook\n)\ncomputation_time = time.monotonic() - start_time\ncomputation_times[\"exact\"] = computation_time\ndf = result.to_dataframe(column=\"exact\").drop(columns=[\"exact_stderr\"])\n

    We now estimate the Data Shapley values using the DataUtilityLearning wrapper. This class wraps a Utility and delegates calls to it, up until a given budget. Every call yields a utility sample which is saved under the hood for training of the given utility model. Once the budget is exhausted, DataUtilityLearning fits the model to the utility samples and all subsequent calls use the learned model to predict the wrapped utility instead of delegating to it.

    For the utility model we follow the paper and use a fully connected neural network. To train it we use a total of training_budget utility samples. We repeat this multiple times for each training budget.

    Note how we use a MonteCarlo approximation instead of `combinatorial_exact` as before. This is because the exact computation samples subsets in a particular order, from the lowest size to the largest. Because the training budget for the model to learn the utility is around 1/4th of the total number of subsets, this would mean that we would never see utility samples for the larger sizes and the model would be biased (try it!)
    mlp_kwargs = dict(\nhidden_layer_sizes=(20, 10),\nactivation=\"relu\",\nsolver=\"adam\",\nlearning_rate_init=0.001,\nbatch_size=32,\nmax_iter=800,\n)\nprint(\nf\"Doing {n_runs} runs for each of {len(training_budget_values)} different training budgets.\"\n)\npbar = tqdm(\nproduct(range(n_runs), training_budget_values),\ntotal=n_runs * len(training_budget_values),\n)\nfor idx, budget in pbar:\npbar.set_postfix_str(f\"Run {idx} for training budget: {budget}\")\ndul_utility = DataUtilityLearning(\nu=utility, training_budget=budget, model=MLPRegressor(**mlp_kwargs)\n)\nstart_time = time.monotonic()\n# DUL will kick in after training_budget calls to utility\nresult = compute_shapley_values(\nu=dul_utility,\nmode=ShapleyMode.PermutationMontecarlo,\ndone=MaxUpdates(300),\nn_jobs=-1,\n)\ncomputation_time = time.monotonic() - start_time\nif budget in computation_times:\ncomputation_times[budget].append(computation_time)\nelse:\ncomputation_times[budget] = [computation_time]\ndul_df = result.to_dataframe(column=f\"{budget}_{idx}\").drop(\ncolumns=[f\"{budget}_{idx}_stderr\"]\n)\ndf = pd.concat([df, dul_df], axis=1)\ncomputation_times_df = pd.DataFrame(computation_times)\n
    \nDoing 10 runs for each of 10 different training budgets.\n\n
    \n  0%|          | 0/100 [00:00<?, ?it/s]\n

    Next we compute the \\(l_1\\) error for the different training budgets across all runs and plot mean and standard deviation. We obtain results analogous to Figure 1 of the paper, verifying that the method indeed works for estimating the Data Shapley values (at least in this context).

    In the plot we also display the mean and standard deviation of the computation time taken for each training budget.

    errors = np.zeros((len(training_budget_values), n_runs), dtype=float)\naccuracies = np.zeros((len(training_budget_values), n_runs), dtype=float)\ntop_k = 3\nfor i, budget in enumerate(training_budget_values):\nfor j in range(n_runs):\ny_true = df[\"exact\"].values\ny_estimated = df[f\"{budget}_{j}\"].values\nerrors[i, j] = np.linalg.norm(y_true - y_estimated, ord=2)\naccuracies[i, j] = top_k_value_accuracy(y_true, y_estimated, k=top_k)\nerror_from_mean = np.linalg.norm(df[\"exact\"].values - df[\"exact\"].values.mean(), ord=2)\n

    Let us next look at how well the ranking of values resulting from using the surrogate \\(\\tilde{u}\\) matches the ranking by the exact values. For this we fix \\(k=3\\) and consider the \\(k\\) samples with the highest value according to \\(\\tilde{u}\\) and \\(u\\):

    Finally, for each sample, we look at the distance of the estimates to the exact value across runs. Boxes are centered at the 50th percentile with wiskers at the 25th and 75th. We plot relative distances, as a percentage. We observe a general tendency to underestimate the value:

    highest_value_index = df.index[df[\"exact\"].argmax()]\ny_train_corrupted = dataset.y_train.copy()\ny_train_corrupted[highest_value_index] = (\ny_train_corrupted[highest_value_index] + 1\n) % 3\ncorrupted_dataset = Dataset(\nx_train=dataset.x_train,\ny_train=y_train_corrupted,\nx_test=dataset.x_test,\ny_test=dataset.y_test,\n)\n

    We retrain the model on the new dataset and verify that the accuracy decreases:

    model = LinearSVC()\nmodel.fit(dataset.x_train, y_train_corrupted)\nprint(f\"Mean accuracy: {100 * model.score(dataset.x_test, dataset.y_test):0.2f}%\")\n
    \nMean accuracy: 82.96%\n\n

    Finally, we recompute the values of all samples using the exact method and the best training budget previously obtained and then plot the resulting scores.

    best_training_budget = training_budget_values[errors.mean(axis=1).argmin()]\nutility = Utility(\nmodel=LinearSVC(),\ndata=corrupted_dataset,\n)\nresult = compute_shapley_values(\nu=utility,\nmode=ShapleyMode.CombinatorialExact,\nn_jobs=-1,\nprogress=False,\n)\ndf_corrupted = result.to_dataframe(column=\"exact\").drop(columns=[\"exact_stderr\"])\ndul_utility = DataUtilityLearning(\nu=utility, training_budget=best_training_budget, model=MLPRegressor(**mlp_kwargs)\n)\nresult = compute_shapley_values(\nu=dul_utility,\nmode=ShapleyMode.PermutationMontecarlo,\ndone=MaxUpdates(300),\nn_jobs=-1,\n)\ndul_df = result.to_dataframe(column=\"estimated\").drop(columns=[\"estimated_stderr\"])\ndf_corrupted = pd.concat([df_corrupted, dul_df], axis=1)\n

    We can see in the figure that both methods assign the lowest value to the sample with the corrupted label.

    As mentioned above, despite the previous results, this work is preliminary and the usefulness of Data Utility Learning remains to be tested in practice."},{"location":"examples/shapley_utility_learning/#data-utility-learning","title":"Data Utility Learning","text":""},{"location":"examples/shapley_utility_learning/#setup","title":"Setup","text":"

    We begin by importing the main libraries and setting some defaults.

    If you are reading this in the documentation, some boilerplate (including most plotting code) has been omitted for convenience."},{"location":"examples/shapley_utility_learning/#dataset","title":"Dataset","text":"

    Following the paper, we take 15 samples (10%) from the Iris dataset and compute their Data Shapley values by using all the remaining samples as test set for computing the utility, which in this case is accuracy.

    "},{"location":"examples/shapley_utility_learning/#data-shapley","title":"Data Shapley","text":"

    We start by defining the utility using the model and computing the exact Data Shapley values by definition \\(\\ref{eq:shapley-def}\\).

    "},{"location":"examples/shapley_utility_learning/#evaluation-on-anomalous-data","title":"Evaluation on anomalous data","text":"

    One interesting way to assess the Data Utility Learning approach is to corrupt some data and monitor how the value changes. To do this, we will take the sample with the highest score and change its label.

    "},{"location":"getting-started/first-steps/","title":"Getting started","text":"

    Warning

    Make sure you have read Installing pyDVL before using the library. In particular read about how caching and parallelization work, since they might require additional setup.

    "},{"location":"getting-started/first-steps/#main-concepts","title":"Main concepts","text":"

    pyDVL aims to be a repository of production-ready, reference implementations of algorithms for data valuation and influence functions. Even though we only briefly introduce key concepts in the documentation, the following sections should be enough to get you started.

    • Basics of data valuation for key objects and usage patterns for Shapley value computation and related methods.
    • Computing Influence Values for instructions on how to compute influence functions.
    "},{"location":"getting-started/first-steps/#running-the-examples","title":"Running the examples","text":"

    If you are somewhat familiar with the concepts of data valuation, you can start by browsing our worked-out examples illustrating pyDVL's capabilities either:

    • In the examples under Basics of data valuation and Computing Influence Values.
    • Using binder notebooks, deployed from each example's page.
    • Locally, by starting a jupyter server at the root of the project. You will have to install jupyter first manually since it's not a dependency of the library.
    "},{"location":"getting-started/first-steps/#advanced-usage","title":"Advanced usage","text":"

    Besides the do's and don'ts of data valuation itself, which are the subject of the examples and the documentation of each method, there are two main things to keep in mind when using pyDVL.

    "},{"location":"getting-started/first-steps/#caching","title":"Caching","text":"

    pyDVL uses memcached to cache the computation of the utility function and speed up some computations (see the installation guide).

    Caching of the utility function is disabled by default. When it is enabled it takes into account the data indices passed as argument and the utility function wrapped into the Utility object. This means that care must be taken when reusing the same utility function with different data, see the documentation for the caching module for more information.

    In general, caching won't play a major role in the computation of Shapley values because the probability of sampling the same subset twice, and hence needing the same utility function computation, is very low. However, it can be very useful when comparing methods that use the same utility function, or when running multiple experiments with the same data.

    When is the cache really necessary?

    Crucially, semi-value computations with the PermutationSampler require caching to be enabled, or they will take twice as long as the direct implementation in compute_shapley_values.

    "},{"location":"getting-started/first-steps/#parallelization","title":"Parallelization","text":"

    pyDVL supports joblib for local parallelization (within one machine) and ray for distributed parallelization (across multiple machines).

    The former works out of the box but for the latter you will need to provide a running cluster (or run ray in local mode).

    As of v0.7.0 pyDVL does not allow requesting resources per task sent to the cluster, so you will need to make sure that each worker has enough resources to handle the tasks it receives. A data valuation task using game-theoretic methods will typically make a copy of the whole model and dataset to each worker, even if the re-training only happens on a subset of the data. This means that you should make sure that each worker has enough memory to handle the whole dataset.

    "},{"location":"getting-started/installation/","title":"Installing pyDVL","text":"

    To install the latest release use:

    pip install pyDVL\n

    To use all features of influence functions use instead:

    pip install pyDVL[influence]\n

    This includes a dependency on PyTorch (Version 2.0 and above) and thus is left out by default.

    In case that you have a supported version of CUDA installed (v11.2 to 11.8 as of this writing), you can enable eigenvalue computations for low-rank approximations with CuPy on the GPU by using:

    pip install pyDVL[cupy]\n

    If you use a different version of CUDA, please install CuPy manually.

    In order to check the installation you can use:

    python -c \"import pydvl; print(pydvl.__version__)\"\n

    You can also install the latest development version from TestPyPI:

    pip install pyDVL --index-url https://test.pypi.org/simple/\n
    "},{"location":"getting-started/installation/#dependencies","title":"Dependencies","text":"

    pyDVL requires Python >= 3.8, Memcached for caching and Ray for parallelization in a cluster (locally it uses joblib). Additionally, the Influence functions module requires PyTorch (see Installing pyDVL).

    ray is used to distribute workloads across nodes in a cluster (it can be used locally as well, but for this we recommend joblib instead). Please follow the instructions in their documentation to set up the cluster. Once you have a running cluster, you can use it by passing the address of the head node to parallel methods via ParallelConfig.

    "},{"location":"getting-started/installation/#setting-up-the-cache","title":"Setting up the cache","text":"

    memcached is an in-memory key-value store accessible over the network. pyDVL uses it to cache the computation of the utility function and speed up some computations (in particular, semi-value computations with the PermutationSampler but other methods may benefit as well).

    You can either install it as a package or run it inside a docker container (the simplest). For installation instructions, refer to the Getting started section in memcached's wiki. Then you can run it with:

    memcached -u user\n

    To run memcached inside a container in daemon mode instead, do:

    docker container run -d --rm -p 11211:11211 memcached:latest\n

    Using the cache

    Continue reading about the cache in the First Steps and the documentation for the caching module.

    "},{"location":"influence/","title":"The influence function","text":""},{"location":"influence/#the-influence-function","title":"The influence function","text":"

    Warning

    The code in the package pydvl.influence is experimental. Package structure and basic API are bound to change before v1.0.0

    The influence function (IF) is a method to quantify the effect (influence) that each training point has on the parameters of a model, and by extension on any function thereof. In particular, it allows to estimate how much each training sample affects the error on a test point, making the IF useful for understanding and debugging models.

    Alas, the influence function relies on some assumptions that can make their application difficult. Yet another drawback is that they require the computation of the inverse of the Hessian of the model wrt. its parameters, which is intractable for large models like deep neural networks. Much of the recent research tackles this issue using approximations, like a Neuman series (Agarwal et al., 2017)1, with the most successful solution using a low-rank approximation that iteratively finds increasing eigenspaces of the Hessian (Schioppa et al., 2021)2.

    pyDVL implements several methods for the efficient computation of the IF for machine learning. In the examples we document some of the difficulties that can arise when using the IF.

    "},{"location":"influence/#construction","title":"Construction","text":"

    First introduced in the context of robust statistics in (Hampel, 1974)3, the IF was popularized in the context of machine learning in (Koh and Liang, 2017)4.

    Following their formulation, consider an input space \\(\\mathcal{X}\\) (e.g. images) and an output space \\(\\mathcal{Y}\\) (e.g. labels). Let's take \\(z_i = (x_i, y_i)\\), for \\(i \\in \\{1,...,n\\}\\) to be the \\(i\\)-th training point, and \\(\\theta\\) to be the (potentially highly) multi-dimensional parameters of a model (e.g. \\(\\theta\\) is a big array with all of a neural network's parameters, including biases and/or dropout rates). We will denote with \\(L(z, \\theta)\\) the loss of the model for point \\(z\\) when the parameters are \\(\\theta.\\)

    To train a model, we typically minimize the loss over all \\(z_i\\), i.e. the optimal parameters are

    \\[\\hat{\\theta} = \\arg \\min_\\theta \\sum_{i=1}^n L(z_i, \\theta).\\]

    In practice, lack of convexity means that one doesn't really obtain the minimizer of the loss, and the training is stopped when the validation loss stops decreasing.

    For notational convenience, let's define

    \\[\\hat{\\theta}_{-z} = \\arg \\min_\\theta \\sum_{z_i \\ne z} L(z_i, \\theta), \\]

    i.e. \\(\\hat{\\theta}_{-z}\\) are the model parameters that minimize the total loss when \\(z\\) is not in the training dataset.

    In order to compute the impact of each training point on the model, we would need to calculate \\(\\hat{\\theta}_{-z}\\) for each \\(z\\) in the training dataset, thus re-training the model at least ~\\(n\\) times (more if model training is stochastic). This is computationally very expensive, especially for big neural networks. To circumvent this problem, we can just calculate a first order approximation of \\(\\hat{\\theta}\\). This can be done through single backpropagation and without re-training the full model.

    pyDVL supports two ways of computing the empirical influence function, namely up-weighting of samples and perturbation influences. The choice is done by the parameter influence_type in the main entry point compute_influences.

    "},{"location":"influence/#approximating-the-influence-of-a-point","title":"Approximating the influence of a point","text":"

    Let's define

    \\[\\hat{\\theta}_{\\epsilon, z} = \\arg \\min_\\theta \\frac{1}{n}\\sum_{i=1}^n L(z_i, \\theta) + \\epsilon L(z, \\theta), \\]

    which is the optimal \\(\\hat{\\theta}\\) when we up-weight \\(z\\) by an amount \\(\\epsilon \\gt 0\\).

    From a classical result (a simple derivation is available in Appendix A of (Koh and Liang, 2017)4), we know that:

    \\[\\frac{d \\ \\hat{\\theta}_{\\epsilon, z}}{d \\epsilon} \\Big|_{\\epsilon=0} = -H_{\\hat{\\theta}}^{-1} \\nabla_\\theta L(z, \\hat{\\theta}), \\]

    where \\(H_{\\hat{\\theta}} = \\frac{1}{n} \\sum_{i=1}^n \\nabla_\\theta^2 L(z_i, \\hat{\\theta})\\) is the Hessian of \\(L\\). These quantities are also knows as influence factors.

    Importantly, notice that this expression is only valid when \\(\\hat{\\theta}\\) is a minimum of \\(L\\), or otherwise \\(H_{\\hat{\\theta}}\\) cannot be inverted! At the same time, in machine learning full convergence is rarely achieved, so direct Hessian inversion is not possible. Approximations need to be developed that circumvent the problem of inverting the Hessian of the model in all those (frequent) cases where it is not positive definite.

    The influence of training point \\(z\\) on test point \\(z_{\\text{test}}\\) is defined as:

    \\[\\mathcal{I}(z, z_{\\text{test}}) = L(z_{\\text{test}}, \\hat{\\theta}_{-z}) - L(z_{\\text{test}}, \\hat{\\theta}). \\]

    Notice that \\(\\mathcal{I}\\) is higher for points \\(z\\) which positively impact the model score, since the loss is higher when they are excluded from training. In practice, one needs to rely on the following infinitesimal approximation:

    \\[\\mathcal{I}_{up}(z, z_{\\text{test}}) = - \\frac{d L(z_{\\text{test}}, \\hat{\\theta}_{\\epsilon, z})}{d \\epsilon} \\Big|_{\\epsilon=0} \\]

    Using the chain rule and the results calculated above, we get:

    \\[\\mathcal{I}_{up}(z, z_{\\text{test}}) = - \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ \\frac{d \\hat{\\theta}_{\\epsilon, z}}{d \\epsilon} \\Big|_{\\epsilon=0} = \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ H_{\\hat{\\theta}}^{-1} \\ \\nabla_\\theta L(z, \\hat{\\theta}) \\]

    All the resulting factors are gradients of the loss wrt. the model parameters \\(\\hat{\\theta}\\). This can be easily computed through one or more backpropagation passes.

    "},{"location":"influence/#perturbation-definition-of-the-influence-score","title":"Perturbation definition of the influence score","text":"

    How would the loss of the model change if, instead of up-weighting an individual point \\(z\\), we were to up-weight only a single feature of that point? Given \\(z = (x, y)\\), we can define \\(z_{\\delta} = (x+\\delta, y)\\), where \\(\\delta\\) is a vector of zeros except for a 1 in the position of the feature we want to up-weight. In order to approximate the effect of modifying a single feature of a single point on the model score we can define

    \\[\\hat{\\theta}_{\\epsilon, z_{\\delta} ,-z} = \\arg \\min_\\theta \\frac{1}{n}\\sum_{i=1}^n L(z_{i}, \\theta) + \\epsilon L(z_{\\delta}, \\theta) - \\epsilon L(z, \\theta), \\]

    Similarly to what was done above, we up-weight point \\(z_{\\delta}\\), but then we also remove the up-weighting for all the features that are not modified by \\(\\delta\\). From the calculations in the previous section, it is then easy to see that

    \\[\\frac{d \\ \\hat{\\theta}_{\\epsilon, z_{\\delta} ,-z}}{d \\epsilon} \\Big|_{\\epsilon=0} = -H_{\\hat{\\theta}}^{-1} \\nabla_\\theta \\Big( L(z_{\\delta}, \\hat{\\theta}) - L(z, \\hat{\\theta}) \\Big) \\]

    and if the feature space is continuous and as \\(\\delta \\to 0\\) we can write

    \\[\\frac{d \\ \\hat{\\theta}_{\\epsilon, z_{\\delta} ,-z}}{d \\epsilon} \\Big|_{\\epsilon=0} = -H_{\\hat{\\theta}}^{-1} \\ \\nabla_x \\nabla_\\theta L(z, \\hat{\\theta}) \\delta + \\mathcal{o}(\\delta) \\]

    The influence of each feature of \\(z\\) on the loss of the model can therefore be estimated through the following quantity:

    \\[\\mathcal{I}_{pert}(z, z_{\\text{test}}) = - \\lim_{\\delta \\to 0} \\ \\frac{1}{\\delta} \\frac{d L(z_{\\text{test}}, \\hat{\\theta}_{\\epsilon, \\ z_{\\delta}, \\ -z})}{d \\epsilon} \\Big|_{\\epsilon=0} \\]

    which, using the chain rule and the results calculated above, is equal to

    \\[\\mathcal{I}_{pert}(z, z_{\\text{test}}) = - \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ \\frac{d \\hat{\\theta}_{\\epsilon, z_{\\delta} ,-z}}{d \\epsilon} \\Big|_{\\epsilon=0} = \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ H_{\\hat{\\theta}}^{-1} \\ \\nabla_x \\nabla_\\theta L(z, \\hat{\\theta}) \\]

    The perturbation definition of the influence score is not straightforward to understand, but it has a simple interpretation: it tells how much the loss of the model changes when a certain feature of point z is up-weighted. A positive perturbation influence score indicates that the feature might have a positive effect on the accuracy of the model.

    It is worth noting that the perturbation influence score is a very rough estimate of the impact of a point on the models loss and it is subject to large approximation errors. It can nonetheless be used to build training-set attacks, as done in (Koh and Liang, 2017)4.

    "},{"location":"influence/#computation","title":"Computation","text":"

    The main entry point of the library for influence calculation is compute_influences. Given a pre-trained pytorch model with a loss, first an instance of TorchTwiceDifferentiable needs to be created:

    from pydvl.influence import TorchTwiceDifferentiable\nwrapped_model = TorchTwiceDifferentiable(model, loss, device)\n

    The device specifies where influence calculation will be run.

    Given training and test data loaders, the influence of each training point on each test point can be calculated via:

    from pydvl.influence import compute_influences\nfrom torch.utils.data import DataLoader\ntraining_data_loader = DataLoader(...)\ntest_data_loader = DataLoader(...)\ncompute_influences(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\n)\n

    The result is a tensor with one row per test point and one column per training point. Thus, each entry \\((i, j)\\) represents the influence of training point \\(j\\) on test point \\(i\\). A large positive influence indicates that training point \\(j\\) tends to improve the performance of the model on test point \\(i\\), and vice versa, a large negative influence indicates that training point \\(j\\) tends to worsen the performance of the model on test point \\(i\\).

    "},{"location":"influence/#perturbation-influences","title":"Perturbation influences","text":"

    The method of empirical influence computation can be selected in compute_influences with the parameter influence_type:

    from pydvl.influence import compute_influences\ncompute_influences(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\ninfluence_type=\"perturbation\",\n)\n

    The result is a tensor with at least three dimensions. The first two dimensions are the same as in the case of influence_type=up case, i.e. one row per test point and one column per training point. The remaining dimensions are the same as the number of input features in the data. Therefore, each entry in the tensor represents the influence of each feature of each training point on each test point.

    "},{"location":"influence/#approximate-matrix-inversion","title":"Approximate matrix inversion","text":"

    In almost every practical application it is not possible to construct, even less invert the complete Hessian in memory. pyDVL offers several approximate algorithms to invert it by setting the parameter inversion_method of compute_influences.

    from pydvl.influence import compute_influences\ncompute_influences(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\ninversion_method=\"cg\"\n)\n

    Each inversion method has its own set of parameters that can be tuned to improve the final result. These parameters can be passed directly to compute_influences as keyword arguments. For example, the following code sets the maximum number of iterations for conjugate gradient to \\(100\\) and the minimum relative error to \\(0.01\\):

    from pydvl.influence import compute_influences\ncompute_influences(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\ninversion_method=\"cg\",\nhessian_regularization=1e-4,\nmaxiter=100,\nrtol=0.01\n)\n
    "},{"location":"influence/#hessian-regularization","title":"Hessian regularization","text":"

    Additionally, and as discussed in the introduction, in machine learning training rarely converges to a global minimum of the loss. Despite good apparent convergence, \\(\\hat{\\theta}\\) might be located in a region with flat curvature or close to a saddle point. In particular, the Hessian might have vanishing eigenvalues making its direct inversion impossible. Certain methods, such as the Arnoldi method are robust against these problems, but most are not.

    To circumvent this problem, many approximate methods can be implemented. The simplest adds a small hessian perturbation term, i.e. \\(H_{\\hat{\\theta}} + \\lambda \\mathbb{I}\\), with \\(\\mathbb{I}\\) being the identity matrix. This standard trick ensures that the eigenvalues of \\(H_{\\hat{\\theta}}\\) are bounded away from zero and therefore the matrix is invertible. In order for this regularization not to corrupt the outcome too much, the parameter \\(\\lambda\\) should be as small as possible while still allowing a reliable inversion of \\(H_{\\hat{\\theta}} + \\lambda \\mathbb{I}\\).

    from pydvl.influence import compute_influences\ncompute_influences(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\ninversion_method=\"cg\",\nhessian_regularization=1e-4\n)\n
    "},{"location":"influence/#influence-factors","title":"Influence factors","text":"

    The compute_influences method offers a fast way to obtain the influence scores given a model and a dataset. Nevertheless, it is often more convenient to inspect and save some of the intermediate results of influence calculation for later use.

    The influence factors(refer to the previous section for a definition) are typically the most computationally demanding part of influence calculation. They can be obtained via the compute_influence_factors function, saved, and later used for influence calculation on different subsets of the training dataset.

    from pydvl.influence import compute_influence_factors\ninfluence_factors = compute_influence_factors(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\ninversion_method=\"cg\"\n)\n

    The result is an object of type InverseHvpResult, which holds the calculated influence factors (influence_factors.x) and a dictionary with the info on the inversion process (influence_factors.info).

    "},{"location":"influence/#methods-for-inverse-hvp-calculation","title":"Methods for inverse HVP calculation","text":"

    In order to calculate influence values, pydvl implements several methods for the calculation of the inverse Hessian vector product (iHVP). More precisely, given a model, training data and a tensor \\(b\\), the function solve_hvp will find \\(x\\) such that \\(H x = b\\), with \\(H\\) is the hessian of model.

    Many different inversion methods can be selected via the parameter inversion_method of compute_influences.

    The following subsections will offer more detailed explanations for each method.

    "},{"location":"influence/#direct-inversion","title":"Direct inversion","text":"

    With inversion_method = \"direct\" pyDVL will calculate the inverse Hessian using the direct matrix inversion. This means that the Hessian will first be explicitly created and then inverted. This method is the most accurate, but also the most computationally demanding. It is therefore not recommended for large datasets or models with many parameters.

    import torch\nfrom pydvl.influence.inversion import solve_hvp\nb = torch.Tensor(...)\nsolve_hvp(\n\"direct\",\nwrapped_model,\ntraining_data_loader,\nb,\n)\n

    The result, an object of type InverseHvpResult, which holds two objects: influence_factors.x and influence_factors.info. The first one is the inverse Hessian vector product, while the second one is a dictionary with the info on the inversion process. For this method, the info consists of the Hessian matrix itself.

    "},{"location":"influence/#conjugate-gradient","title":"Conjugate Gradient","text":"

    This classical procedure for solving linear systems of equations is an iterative method that does not require the explicit inversion of the Hessian. Instead, it only requires the calculation of Hessian-vector products, making it a good choice for large datasets or models with many parameters. It is nevertheless much slower to converge than the direct inversion method and not as accurate. More info on the theory of conjugate gradient can be found on Wikipedia.

    In pyDVL, you can select conjugate gradient with inversion_method = \"cg\", like this:

    from pydvl.influence.inversion import solve_hvp\nsolve_hvp(\n\"cg\",\nwrapped_model,\ntraining_data_loader,\nb,\nx0=None,\nrtol=1e-7,\natol=1e-7,\nmaxiter=None,\n)\n

    The additional optional parameters x0, rtol, atol, and maxiter are passed to the solve_batch_cg function, and are respecively the initial guess for the solution, the relative tolerance, the absolute tolerance, and the maximum number of iterations.

    The resulting InverseHvpResult holds the solution of the iHVP, influence_factors.x, and some info on the inversion process influence_factors.info. More specifically, for each batch this will contain the number of iterations, a boolean indicating if the inversion converged, and the residual of the inversion.

    "},{"location":"influence/#linear-time-stochastic-second-order-approximation-lissa","title":"Linear time Stochastic Second-Order Approximation (LiSSA)","text":"

    The LiSSA method is a stochastic approximation of the inverse Hessian vector product. Compared to conjugate gradient it is faster but less accurate and typically suffers from instability.

    In order to find the solution of the HVP, LiSSA iteratively approximates the inverse of the Hessian matrix with the following update:

    \\[H^{-1}_{j+1} b = b + (I - d) \\ H - \\frac{H^{-1}_j b}{s},\\]

    where \\(d\\) and \\(s\\) are a dampening and a scaling factor, which are essential for the convergence of the method and they need to be chosen carefully, and I is the identity matrix. More info on the theory of LiSSA can be found in the original paper (Agarwal et al., 2017)1.

    In pyDVL, you can select LiSSA with inversion_method = \"lissa\", like this:

    from pydvl.influence.inversion import solve_hvp\nsolve_hvp(\n\"lissa\",\nwrapped_model,\ntraining_data_loader,\nb,\nmaxiter=1000,\ndampen=0.0,\nscale=10.0,\nh0=None,\nrtol=1e-4,\n)\n

    with the additional optional parameters maxiter, dampen, scale, h0, and rtol, which are passed to the solve_lissa function, being the maximum number of iterations, the dampening factor, the scaling factor, the initial guess for the solution and the relative tolerance, respectively.

    The resulting InverseHvpResult holds the solution of the iHVP, influence_factors.x, and, within influence_factors.info, the maximum percentage error and the mean percentage error of the approximation.

    "},{"location":"influence/#arnoldi-solver","title":"Arnoldi solver","text":"

    The Arnoldi method is a Krylov subspace method for approximating dominating eigenvalues and eigenvectors. Under a low rank assumption on the Hessian at a minimizer (which is typically observed for deep neural networks), this approximation captures the essential action of the Hessian. More concretely, for \\(Hx=b\\) the solution is approximated by

    \\[x \\approx V D^{-1} V^T b\\]

    where \\(D\\) is a diagonal matrix with the top (in absolute value) eigenvalues of the Hessian and \\(V\\) contains the corresponding eigenvectors. See also (Schioppa et al., 2021)2.

    In pyDVL, you can use Arnoldi with inversion_method = \"arnoldi\", as follows:

    from pydvl.influence.inversion import solve_hvp\nsolve_hvp(\n\"arnoldi\",\nwrapped_model,\ntraining_data_loader,\nb,\nhessian_perturbation=0.0,\nrank_estimate=10,\ntol=1e-6,\neigen_computation_on_gpu=False \n)\n

    For the parameters, check solve_arnoldi. The resulting InverseHvpResult holds the solution of the iHVP, influence_factors.x, and, within influence_factors.info, the computed eigenvalues and eigenvectors.

    1. Agarwal, N., Bullins, B., Hazan, E., 2017. Second-Order Stochastic Optimization for Machine Learning in Linear Time. JMLR 18, 1\u201340.\u00a0\u21a9\u21a9

    2. Schioppa, A., Zablotskaia, P., Vilar, D., Sokolov, A., 2021. Scaling Up Influence Functions. Presented at the AAAI-22, arXiv. https://doi.org/10.48550/arXiv.2112.03052 \u21a9\u21a9

    3. Hampel, F.R., 1974. The Influence Curve and Its Role in Robust Estimation. J. Am. Stat. Assoc. 69, 383\u2013393. https://doi.org/10.2307/2285666 \u21a9

    4. Koh, P.W., Liang, P., 2017. Understanding Black-box Predictions via Influence Functions, in: Proceedings of the 34th International Conference on Machine Learning. Presented at the International Conference on Machine Learning, PMLR, pp. 1885\u20131894.\u00a0\u21a9\u21a9\u21a9

    "},{"location":"value/","title":"Data valuation","text":"

    Note

    If you want to jump right into the steps to compute values, skip ahead to Computing data values.

    Data valuation is the task of assigning a number to each element of a training set which reflects its contribution to the final performance of some model trained on it. Some methods attempt to be model-agnostic, but in most cases the model is an integral part of the method. In these cases, this number is not an intrinsic property of the element of interest, but typically a function of three factors:

    1. The dataset \\(D\\), or more generally, the distribution it was sampled from: In some cases one only cares about values wrt. a given data set, in others value would ideally be the (expected) contribution of a data point to any random set \\(D\\) sampled from the same distribution. pyDVL implements methods of the first kind.

    2. The algorithm \\(\\mathcal{A}\\) mapping the data \\(D\\) to some estimator \\(f\\) in a model class \\(\\mathcal{F}\\). E.g. MSE minimization to find the parameters of a linear model.

    3. The performance metric of interest \\(u\\) for the problem. When value depends on a model, it must be measured in some way which uses it. E.g. the \\(R^2\\) score or the negative MSE over a test set. This metric will be computed over a held-out valuation set.

    pyDVL collects algorithms for the computation of data values in this sense, mostly those derived from cooperative game theory. The methods can be found in the package pydvl.value , with support from modules pydvl.utils.dataset and pydvl.utils.utility, as detailed below.

    Warning

    Be sure to read the section on the difficulties using data values.

    There are three main families of methods for data valuation: game-theoretic, influence-based and intrinsic. As of v0.7.0 pyDVL supports the first two. Here, we focus on game-theoretic concepts and refer to the main documentation on the influence funtion for the second.

    "},{"location":"value/#game-theoretical-methods","title":"Game theoretical methods","text":"

    The main contenders in game-theoretic approaches are Shapley values (Ghorbani and Zou, 2019)1, (Kwon et al., 2021)2, (Schoch et al., 2022)3, their generalization to so-called semi-values by (Kwon and Zou, 2022)4 and (Wang and Jia, 2022)5, and the Core (Yan and Procaccia, 2021)6. All of these are implemented in pyDVL.

    In these methods, data points are considered players in a cooperative game whose outcome is the performance of the model when trained on subsets (coalitions) of the data, measured on a held-out valuation set. This outcome, or utility, must typically be computed for every subset of the training set, so that an exact computation is \\(\\mathcal{O} (2^n)\\) in the number of samples \\(n\\), with each iteration requiring a full re-fitting of the model using a coalition as training set. Consequently, most methods involve Monte Carlo approximations, and sometimes approximate utilities which are faster to compute, e.g. proxy models (Wang et al., 2022)7 or constant-cost approximations like Neural Tangent Kernels (Wu et al., 2022)8.

    The reasoning behind using game theory is that, in order to be useful, an assignment of value, dubbed valuation function, is usually required to fulfil certain requirements of consistency and \"fairness\". For instance, in some applications value should not depend on the order in which data are considered, or it should be equal for samples that contribute equally to any subset of the data (of equal size). When considering aggregated value for (sub-)sets of data there are additional desiderata, like having a value function that does not increase with repeated samples. Game-theoretic methods are all rooted in axioms that by construction ensure different desiderata, but despite their practical usefulness, none of them are either necessary or sufficient for all applications. For instance, SV methods try to equitably distribute all value among all samples, failing to identify repeated ones as unnecessary, with e.g. a zero value.

    "},{"location":"value/#applications-of-data-valuation","title":"Applications of data valuation","text":"

    Many applications are touted for data valuation, but the results can be inconsistent. Values have a strong dependency on the training procedure and the performance metric used. For instance, accuracy is a poor metric for imbalanced sets and this has a stark effect on data values. Some models exhibit great variance in some regimes and this again has a detrimental effect on values.

    Nevertheless, some of the most promising applications are:

    • Cleaning of corrupted data.
    • Pruning unnecessary or irrelevant data.
    • Repairing mislabeled data.
    • Guiding data acquisition and annotation (active learning).
    • Anomaly detection and model debugging and interpretation.

    Additionally, one of the motivating applications for the whole field is that of data markets: a marketplace where data owners can sell their data to interested parties. In this setting, data valuation can be key component to determine the price of data. Algorithm-agnostic methods like LAVA (Just et al., 2023)9 are particularly well suited for this, as they use the Wasserstein distance between a vendor's data and the buyer's to determine the value of the former.

    However, this is a complex problem which can face practical banal problems like the fact that data owners may not wish to disclose their data for valuation.

    "},{"location":"value/#computing-data-values","title":"Computing data values","text":"

    Using pyDVL to compute data values is a simple process that can be broken down into three steps:

    1. Creating a Dataset object from your data.
    2. Creating a Utility which ties your model to the dataset and a scoring function.
    3. Computing values with a method of your choice, e.g. via compute_shapley_values.
    "},{"location":"value/#creating-a-dataset","title":"Creating a Dataset","text":"

    The first item in the tuple \\((D, \\mathcal{A}, u)\\) characterising data value is the dataset. The class Dataset is a simple convenience wrapper for the train and test splits that is used throughout pyDVL. The test set will be used to evaluate a scoring function for the model.

    It can be used as follows:

    import numpy as np\nfrom pydvl.utils import Dataset\nfrom sklearn.model_selection import train_test_split\nX, y = np.arange(100).reshape((50, 2)), np.arange(50)\nX_train, X_test, y_train, y_test = train_test_split(\nX, y, test_size=0.5, random_state=16\n)\ndataset = Dataset(X_train, X_test, y_train, y_test)\n

    It is also possible to construct Datasets from sklearn toy datasets for illustrative purposes using from_sklearn.

    "},{"location":"value/#grouping-data","title":"Grouping data","text":"

    Be it because data valuation methods are computationally very expensive, or because we are interested in the groups themselves, it can be often useful or necessary to group samples to valuate them together. GroupedDataset provides an alternative to Dataset with the same interface which allows this.

    You can see an example in action in the Spotify notebook, but here's a simple example grouping a pre-existing Dataset. First we construct an array mapping each index in the dataset to a group, then use from_dataset:

    import numpy as np\nfrom pydvl.utils import GroupedDataset\n# Randomly assign elements to any one of num_groups:\ndata_groups = np.random.randint(0, num_groups, len(dataset))\ngrouped_dataset = GroupedDataset.from_dataset(dataset, data_groups)\n
    "},{"location":"value/#creating-a-utility","title":"Creating a Utility","text":"

    In pyDVL we have slightly overloaded the name \"utility\" and use it to refer to an object that keeps track of all three items in \\((D, \\mathcal{A}, u)\\). This will be an instance of Utility which, as mentioned, is a convenient wrapper for the dataset, model and scoring function used for valuation methods.

    Here's a minimal example:

    import sklearn as sk\nfrom pydvl.utils import Dataset, Utility\ndataset = Dataset.from_sklearn(sk.datasets.load_iris())\nmodel = sk.svm.SVC()\nutility = Utility(model, dataset)\n

    The object utility is a callable that data valuation methods will execute with different subsets of training data. Each call will retrain the model on a subset and evaluate it on the test data using a scoring function. By default, Utility will use model.score(), but it is possible to use any scoring function (greater values must be better). In particular, the constructor accepts the same types as argument as sklearn.model_selection.cross_validate: a string, a scorer callable or None for the default.

    utility = Utility(model, dataset, \"explained_variance\")\n

    Utility will wrap the fit() method of the model to cache its results. This greatly reduces computation times of Monte Carlo methods. Because of how caching is implemented, it is important not to reuse Utility objects for different datasets. You can read more about setting up the cache in the installation guide and the documentation of the caching module.

    "},{"location":"value/#using-custom-scorers","title":"Using custom scorers","text":"

    The scoring argument of Utility can be used to specify a custom Scorer object. This is a simple wrapper for a callable that takes a model, and test data and returns a score.

    More importantly, the object provides information about the range of the score, which is used by some methods by estimate the number of samples necessary, and about what default value to use when the model fails to train.

    Note

    The most important property of a Scorer is its default value. Because many models will fail to fit on small subsets of the data, it is important to provide a sensible default value for the score.

    It is possible to skip the construction of the Scorer when constructing the Utility object. The two following calls are equivalent:

    from pydvl.utils import Utility, Scorer\nutility = Utility(\nmodel, dataset, \"explained_variance\", score_range=(-np.inf, 1), default_score=0.0\n)\nutility = Utility(\nmodel, dataset, Scorer(\"explained_variance\", range=(-np.inf, 1), default=0.0)\n)\n
    "},{"location":"value/#learning-the-utility","title":"Learning the utility","text":"

    Because each evaluation of the utility entails a full retrain of the model with a new subset of the training set, it is natural to try to learn this mapping from subsets to scores. This is the idea behind Data Utility Learning (DUL) (Wang et al., 2022)7 and in pyDVL it's as simple as wrapping the Utility inside DataUtilityLearning:

    from pydvl.utils import Utility, DataUtilityLearning, Dataset\nfrom sklearn.linear_model import LinearRegression, LogisticRegression\nfrom sklearn.datasets import load_iris\ndataset = Dataset.from_sklearn(load_iris())\nu = Utility(LogisticRegression(), dataset, enable_cache=False)\ntraining_budget = 3\nwrapped_u = DataUtilityLearning(u, training_budget, LinearRegression())\n# First 3 calls will be computed normally\nfor i in range(training_budget):\n_ = wrapped_u((i,))\n# Subsequent calls will be computed using the fit model for DUL\nwrapped_u((1, 2, 3))\n

    As you can see, all that is required is a model to learn the utility itself and the fitting and using of the learned model happens behind the scenes.

    There is a longer example with an investigation of the results achieved by DUL in a dedicated notebook.

    "},{"location":"value/#leave-one-out-values","title":"Leave-One-Out values","text":"

    LOO is the simplest approach to valuation. It assigns to each sample its marginal utility as value:

    \\[v_u(i) = u(D) \u2212 u(D_{-i}).\\]

    For notational simplicity, we consider the valuation function as defined over the indices of the dataset \\(D\\), and \\(i \\in D\\) is the index of the sample, \\(D_{-i}\\) is the training set without the sample \\(x_i\\), and \\(u\\) is the utility function.

    For the purposes of data valuation, this is rarely useful beyond serving as a baseline for benchmarking. Although in some benchmarks it can perform astonishingly well on occasion. One particular weakness is that it does not necessarily correlate with an intrinsic value of a sample: since it is a marginal utility, it is affected by diminishing returns. Often, the training set is large enough for a single sample not to have any significant effect on training performance, despite any qualities it may possess. Whether this is indicative of low value or not depends on each one's goals and definitions, but other methods are typically preferable.

    from pydvl.value.loo import compute_loo\nvalues = compute_loo(utility, n_jobs=-1)\n

    The return value of all valuation functions is an object of type ValuationResult. This can be iterated over, indexed with integers, slices and Iterables, as well as converted to a pandas.DataFrame.

    "},{"location":"value/#problems-of-data-values","title":"Problems of data values","text":"

    There are a number of factors that affect how useful values can be for your project. In particular, regression can be especially tricky, but the particular nature of every (non-trivial) ML problem can have an effect:

    • Unbounded utility: Choosing a scorer for a classifier is simple: accuracy or some F-score provides a bounded number with a clear interpretation. However, in regression problems most scores, like \\(R^2\\), are not bounded because regressors can be arbitrarily bad. This leads to great variability in the utility for low sample sizes, and hence unreliable Monte Carlo approximations to the values. Nevertheless, in practice it is only the ranking of samples that matters, and this tends to be accurate (wrt. to the true ranking) despite inaccurate values.

      Squashing scores

      pyDVL offers a dedicated function composition for scorer functions which can be used to squash a score. The following is defined in module score:

      import numpy as np\nfrom pydvl.utils import compose_score\ndef sigmoid(x: float) -> float:\nreturn float(1 / (1 + np.exp(-x)))\nsquashed_r2 = compose_score(\"r2\", sigmoid, \"squashed r2\")\nsquashed_variance = compose_score(\n\"explained_variance\", sigmoid, \"squashed explained variance\"\n)\n
      These squashed scores can prove useful in regression problems, but they can also introduce issues in the low-value regime.

    • High variance utility: Classical applications of game theoretic value concepts operate with deterministic utilities, but in ML we use an evaluation of the model on a validation set as a proxy for the true risk. Even if the utility is bounded, if it has high variance then values will also have high variance, as will their Monte Carlo estimates. One workaround in pyDVL is to configure the caching system to allow multiple evaluations of the utility for every index set. A moving average is computed and returned once the standard error is small, see MemcachedConfig. (Wang and Jia, 2022)5 prove that by relaxing one of the Shapley axioms and considering the general class of semi-values, of which Shapley is an instance, one can prove that a choice of constant weights is the best one can do in a utility-agnostic setting. This method, dubbed Data Banzhaf, is available in pyDVL as compute_banzhaf_semivalues.

    • Data set size: Computing exact Shapley values is NP-hard, and Monte Carlo approximations can converge slowly. Massive datasets are thus impractical, at least with game-theoretical methods. A workaround is to group samples and investigate their value together. You can do this using GroupedDataset. There is a fully worked-out example here. Some algorithms also provide different sampling strategies to reduce the variance, but due to a no-free-lunch-type theorem, no single strategy can be optimal for all utilities.

    • Model size: Since every evaluation of the utility entails retraining the whole model on a subset of the data, large models require great amounts of computation. But also, they will effortlessly interpolate small to medium datasets, leading to great variance in the evaluation of performance on the dedicated validation set. One mitigation for this problem is cross-validation, but this would incur massive computational cost. As of v.0.7.0 there are no facilities in pyDVL for cross-validating the utility (note that this would require cross-validating the whole value computation).

    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning, in: Proceedings of the 36th International Conference on Machine Learning, PMLR. Presented at the International Conference on Machine Learning (ICML 2019), PMLR, pp. 2242\u20132251.\u00a0\u21a9

    2. Kwon, Y., Rivas, M.A., Zou, J., 2021. Efficient Computation and Analysis of Distributional Shapley Values, in: Proceedings of the 24th International Conference on Artificial Intelligence and Statistics. Presented at the International Conference on Artificial Intelligence and Statistics, PMLR, pp. 793\u2013801.\u00a0\u21a9

    3. Schoch, S., Xu, H., Ji, Y., 2022. CS-Shapley: Class-wise Shapley Values for Data Valuation in Classification, in: Proc. Of the Thirty-Sixth Conference on Neural Information Processing Systems (NeurIPS). Presented at the Advances in Neural Information Processing Systems (NeurIPS 2022).\u00a0\u21a9

    4. Kwon, Y., Zou, J., 2022. Beta Shapley: A Unified and Noise-reduced Data Valuation Framework for Machine Learning, in: Proceedings of the 25th International Conference on Artificial Intelligence and Statistics (AISTATS) 2022,. Presented at the AISTATS 2022, PMLR.\u00a0\u21a9

    5. Wang, J.T., Jia, R., 2022. Data Banzhaf: A Robust Data Valuation Framework for Machine Learning [WWW Document]. https://doi.org/10.48550/arXiv.2205.15466 \u21a9\u21a9

    6. Yan, T., Procaccia, A.D., 2021. If You Like Shapley Then You\u2019ll Love the Core, in: Proceedings of the 35th AAAI Conference on Artificial Intelligence, 2021. Presented at the AAAI Conference on Artificial Intelligence, Association for the Advancement of Artificial Intelligence, pp. 5751\u20135759. https://doi.org/10.1609/aaai.v35i6.16721 \u21a9

    7. Wang, T., Yang, Y., Jia, R., 2022. Improving Cooperative Game Theory-based Data Valuation via Data Utility Learning. Presented at the International Conference on Learning Representations (ICLR 2022). Workshop on Socially Responsible Machine Learning, arXiv. https://doi.org/10.48550/arXiv.2107.06336 \u21a9\u21a9

    8. Wu, Z., Shu, Y., Low, B.K.H., 2022. DAVINZ: Data Valuation using Deep Neural Networks at Initialization, in: Proceedings of the 39th International Conference on Machine Learning. Presented at the International Conference on Machine Learning, PMLR, pp. 24150\u201324176.\u00a0\u21a9

    9. Just, H.A., Kang, F., Wang, T., Zeng, Y., Ko, M., Jin, M., Jia, R., 2023. LAVA: Data Valuation without Pre-Specified Learning Algorithms. Presented at the The Eleventh International Conference on Learning Representations (ICLR 2023).\u00a0\u21a9

    "},{"location":"value/notation/","title":"Notation for valuation","text":"

    The following notation is used throughout the documentation:

    Let \\(D = \\{x_1, \\ldots, x_n\\}\\) be a training set of \\(n\\) samples.

    The utility function \\(u:\\mathcal{D} \\rightarrow \\mathbb{R}\\) maps subsets of \\(D\\) to real numbers.

    The value \\(v\\) of the \\(i\\)-th sample in dataset \\(D\\) wrt. utility \\(u\\) is denoted as \\(v_u(x_i)\\) or simply \\(v(i)\\).

    For any \\(S \\subseteq D\\), we donote by \\(S_{-i}\\) the set of samples in \\(D\\) excluding \\(x_i\\), and \\(S_{+i}\\) denotes the set \\(S\\) with \\(x_i\\) added.

    The marginal utility of adding sample \\(x_i\\) to a subset \\(S\\) is denoted as \\(\\delta(i) := u(S_{+i}) - u(S)\\).

    The set \\(D_{-i}^{(k)}\\) contains all subsets of \\(D\\) of size \\(k\\) that do not include sample \\(x_i\\).

    "},{"location":"value/semi-values/","title":"Semi-values","text":"

    SV is a particular case of a more general concept called semi-value, which is a generalization to different weighting schemes. A semi-value is any valuation function with the form:

    \\[ v_\\text{semi}(i) = \\sum_{i=1}^n w(k) \\sum_{S \\subset D_{-i}^{(k)}} [u(S_{+i}) - u(S)], \\]

    where the coefficients \\(w(k)\\) satisfy the property:

    \\[\\sum_{k=1}^n w(k) = 1,\\]

    the set \\(D_{-i}^{(k)}\\) contains all subsets of \\(D\\) of size \\(k\\) that do not include sample \\(x_i\\), \\(S_{+i}\\) is the set \\(S\\) with \\(x_i\\) added, and \\(u\\) is the utility function.

    Two instances of this are Banzhaf indices (Wang and Jia, 2022)1, and Beta Shapley (Kwon and Zou, 2022)2, with better numerical and rank stability in certain situations.

    Note

    Shapley values are a particular case of semi-values and can therefore also be computed with the methods described here. However, as of version 0.7.0, we recommend using compute_shapley_values instead, in particular because it implements truncation policies for TMCS.

    "},{"location":"value/semi-values/#beta-shapley","title":"Beta Shapley","text":"

    For some machine learning applications, where the utility is typically the performance when trained on a set \\(S \\subset D\\), diminishing returns are often observed when computing the marginal utility of adding a new data point.

    Beta Shapley is a weighting scheme that uses the Beta function to place more weight on subsets deemed to be more informative. The weights are defined as:

    \\[ w(k) := \\frac{B(k+\\beta, n-k+1+\\alpha)}{B(\\alpha, \\beta)}, \\]

    where \\(B\\) is the Beta function, and \\(\\alpha\\) and \\(\\beta\\) are parameters that control the weighting of the subsets. Setting both to 1 recovers Shapley values, and setting \\(\\alpha = 1\\), and \\(\\beta = 16\\) is reported in (Kwon and Zou, 2022)2 to be a good choice for some applications. Beta Shapley values are available in pyDVL through compute_beta_shapley_semivalues:

    from pydvl.value import *\nutility = Utility(model, data)\nvalues = compute_beta_shapley_semivalues(\nu=utility, done=AbsoluteStandardError(threshold=1e-4), alpha=1, beta=16\n)\n

    See however the Banzhaf indices section for an alternative choice of weights which is reported to work better.

    "},{"location":"value/semi-values/#banzhaf-indices","title":"Banzhaf indices","text":"

    As noted in the section Problems of Data Values, the Shapley value can be very sensitive to variance in the utility function. For machine learning applications, where the utility is typically the performance when trained on a set \\(S \\subset D\\), this variance is often largest for smaller subsets \\(S\\). It is therefore reasonable to try reducing the relative contribution of these subsets with adequate weights.

    One such choice of weights is the Banzhaf index, which is defined as the constant:

    \\[w(k) := 2^{n-1},\\]

    for all set sizes \\(k\\). The intuition for picking a constant weight is that for any choice of weight function \\(w\\), one can always construct a utility with higher variance where \\(w\\) is greater. Therefore, in a worst-case sense, the best one can do is to pick a constant weight.

    The authors of (Wang and Jia, 2022)1 show that Banzhaf indices are more robust to variance in the utility function than Shapley and Beta Shapley values. They are available in pyDVL through compute_banzhaf_semivalues:

    from pydvl.value import *\nutility = Utility(model, data)\nvalues = compute_banzhaf_semivalues(\nu=utility, done=AbsoluteStandardError(threshold=1e-4), alpha=1, beta=16\n)\n
    "},{"location":"value/semi-values/#general-semi-values","title":"General semi-values","text":"

    As explained above, both Beta Shapley and Banzhaf indices are special cases of semi-values. In pyDVL we provide a general method for computing these with any combination of the three ingredients that define a semi-value:

    • A utility function \\(u\\).
    • A sampling method
    • A weighting scheme \\(w\\).

    You can construct any combination of these three ingredients with compute_generic_semivalues. The utility function is the same as for Shapley values, and the sampling method can be any of the types defined in the samplers module. For instance, the following snippet is equivalent to the above:

    from pydvl.value import *\ndata = Dataset(...)\nutility = Utility(model, data)\nvalues = compute_generic_semivalues(\nsampler=PermutationSampler(data.indices),\nu=utility,\ncoefficient=beta_coefficient(alpha=1, beta=16),\ndone=AbsoluteStandardError(threshold=1e-4),\n)\n

    Allowing any coefficient can help when experimenting with models which are more sensitive to changes in training set size. However, Data Banzhaf indices are proven to be the most robust to variance in the utility function, in the sense of rank stability, across a range of models and datasets (Wang and Jia, 2022)1.

    Careful with permutation sampling

    This generic implementation of semi-values allowing for any combination of sampling and weighting schemes is very flexible and, in principle, it recovers the original Shapley value, so that compute_shapley_values is no longer necessary. However, it loses the optimization in permutation sampling that reuses the utility computation from the last iteration when iterating over a permutation. This doubles the computation requirements (and slightly increases variance) when using permutation sampling, unless the cache is enabled. In addition, as mentioned above, truncation policies are not supported by this generic implementation (as of v0.7.0). For these reasons it is preferable to use compute_shapley_values whenever not computing other semi-values.

    1. Wang, J.T., Jia, R., 2022. Data Banzhaf: A Robust Data Valuation Framework for Machine Learning [WWW Document]. https://doi.org/10.48550/arXiv.2205.15466 \u21a9\u21a9\u21a9

    2. Kwon, Y., Zou, J., 2022. Beta Shapley: A Unified and Noise-reduced Data Valuation Framework for Machine Learning, in: Proceedings of the 25th International Conference on Artificial Intelligence and Statistics (AISTATS) 2022,. Presented at the AISTATS 2022, PMLR.\u00a0\u21a9\u21a9

    "},{"location":"value/shapley/","title":"Shapley value","text":""},{"location":"value/shapley/#shapley-value","title":"Shapley value","text":"

    The Shapley method is an approach to compute data values originating in cooperative game theory. Shapley values are a common way of assigning payoffs to each participant in a cooperative game (i.e. one in which players can form coalitions) in a way that ensures that certain axioms are fulfilled.

    pyDVL implements several methods for the computation and approximation of Shapley values. They can all be accessed via the facade function compute_shapley_values. The supported methods are enumerated in ShapleyMode.

    Empirically, the most useful method is the so-called Truncated Monte Carlo Shapley (Ghorbani and Zou, 2019)1, which is a Monte Carlo approximation of the permutation Shapley value.

    "},{"location":"value/shapley/#combinatorial-shapley","title":"Combinatorial Shapley","text":"

    The first algorithm is just a verbatim implementation of the definition. As such it returns as exact a value as the utility function allows (see what this means in Problems of Data Values).

    The value \\(v\\) of the \\(i\\)-th sample in dataset \\(D\\) wrt. utility \\(u\\) is computed as a weighted sum of its marginal utility wrt. every possible coalition of training samples within the training set:

    \\[ v(i) = \\frac{1}{n} \\sum_{S \\subseteq D_{-i}} \\binom{n-1}{ | S | }^{-1} [u(S_{+i}) \u2212 u(S)] ,\\]

    where \\(D_{-i}\\) denotes the set of samples in \\(D\\) excluding \\(x_i\\), and \\(S_{+i}\\) denotes the set \\(S\\) with \\(x_i\\) added.

    from pydvl.value import compute_shapley_values\nvalues = compute_shapley_values(utility, mode=\"combinatorial_exact\")\ndf = values.to_dataframe(column='value')\n

    We can convert the return value to a pandas.DataFrame. and name the column with the results as value. Please refer to the documentation in shapley and ValuationResult for more information.

    "},{"location":"value/shapley/#monte-carlo-combinatorial-shapley","title":"Monte Carlo Combinatorial Shapley","text":"

    Because the number of subsets \\(S \\subseteq D_{-i}\\) is \\(2^{ | D | - 1 }\\), one typically must resort to approximations. The simplest one is done via Monte Carlo sampling of the powerset \\(\\mathcal{P}(D)\\). In pyDVL this simple technique is called \"Monte Carlo Combinatorial\". The method has very poor converge rate and others are preferred, but if desired, usage follows the same pattern:

    from pydvl.value import compute_shapley_values, MaxUpdates\nvalues = compute_shapley_values(\nutility, mode=\"combinatorial_montecarlo\", done=MaxUpdates(1000)\n)\ndf = values.to_dataframe(column='cmc')\n

    The DataFrames returned by most Monte Carlo methods will contain approximate standard errors as an additional column, in this case named cmc_stderr.

    Note the usage of the object MaxUpdates as the stop condition. This is an instance of a StoppingCriterion. Other examples are MaxTime and AbsoluteStandardError.

    "},{"location":"value/shapley/#owen-sampling","title":"Owen sampling","text":"

    Owen Sampling (Okhrati and Lipani, 2021)2 is a practical algorithm based on the combinatorial definition. It uses a continuous extension of the utility from \\(\\{0,1\\}^n\\), where a 1 in position \\(i\\) means that sample \\(x_i\\) is used to train the model, to \\([0,1]^n\\). The ensuing expression for Shapley value uses integration instead of discrete weights:

    \\[ v_u(i) = \\int_0^1 \\mathbb{E}_{S \\sim P_q(D_{-i})} [u(S_{+i}) - u(S)]. \\]

    Using Owen sampling follows the same pattern as every other method for Shapley values in pyDVL. First construct the dataset and utility, then call compute_shapley_values:

    from pydvl.value import compute_shapley_values\nvalues = compute_shapley_values(\nu=utility, mode=\"owen\", n_iterations=4, max_q=200\n)\n

    There are more details on Owen sampling, and its variant Antithetic Owen Sampling in the documentation for the function doing the work behind the scenes: owen_sampling_shapley.

    Note that in this case we do not pass a StoppingCriterion to the function, but instead the number of iterations and the maximum number of samples to use in the integration.

    "},{"location":"value/shapley/#permutation-shapley","title":"Permutation Shapley","text":"

    An equivalent way of computing Shapley values (ApproShapley) appeared in (Castro et al., 2009)3 and is the basis for the method most often used in practice. It uses permutations over indices instead of subsets:

    \\[ v_u(x_i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)} [u(\\sigma_{:i} \\cup \\{x_i\\}) \u2212 u(\\sigma_{:i})], \\]

    where \\(\\sigma_{:i}\\) denotes the set of indices in permutation sigma before the position where \\(i\\) appears. To approximate this sum (which has \\(\\mathcal{O}(n!)\\) terms!) one uses Monte Carlo sampling of permutations, something which has surprisingly low sample complexity. One notable difference wrt. the combinatorial approach above is that the approximations always fulfill the efficiency axiom of Shapley, namely \\(\\sum_{i=1}^n \\hat{v}_i = u(D)\\) (see (Castro et al., 2009)3, Proposition 3.2).

    By adding two types of early stopping, the result is the so-called Truncated Monte Carlo Shapley (Ghorbani and Zou, 2019)1, which is efficient enough to be useful in applications. The first is simply a convergence criterion, of which there are several to choose from. The second is a criterion to truncate the iteration over single permutations. RelativeTruncation chooses to stop iterating over samples in a permutation when the marginal utility becomes too small.

    from pydvl.value import compute_shapley_values, MaxUpdates, RelativeTruncation\nvalues = compute_shapley_values(\nu=utility,\nmode=\"permutation_montecarlo\",\ndone=MaxUpdates(1000),\ntruncation=RelativeTruncation(utility, rtol=0.01)\n)\n

    You can see this method in action in this example using the Spotify dataset.

    "},{"location":"value/shapley/#exact-shapley-for-knn","title":"Exact Shapley for KNN","text":"

    It is possible to exploit the local structure of K-Nearest Neighbours to reduce the amount of subsets to consider: because no sample besides the K closest affects the score, most are irrelevant and it is possible to compute a value in linear time. This method was introduced by (Jia et al., 2019)4, and can be used in pyDVL with:

    from pydvl.utils import Dataset, Utility\nfrom pydvl.value import compute_shapley_values\nfrom sklearn.neighbors import KNeighborsClassifier\nmodel = KNeighborsClassifier(n_neighbors=5)\ndata = Dataset(...)\nutility = Utility(model, data)\nvalues = compute_shapley_values(u=utility, mode=\"knn\")\n
    "},{"location":"value/shapley/#group-testing","title":"Group testing","text":"

    An alternative approach introduced in (Jia et al., 2019)4 first approximates the differences of values with a Monte Carlo sum. With

    \\[\\hat{\\Delta}_{i j} \\approx v_i - v_j,\\]

    one then solves the following linear constraint satisfaction problem (CSP) to infer the final values:

    \\[ \\begin{array}{lll} \\sum_{i = 1}^N v_i & = & U (D)\\\\ | v_i - v_j - \\hat{\\Delta}_{i j} | & \\leqslant & \\frac{\\varepsilon}{2 \\sqrt{N}} \\end{array} \\]

    Warning

    We have reproduced this method in pyDVL for completeness and benchmarking, but we don't advocate its use because of the speed and memory cost. Despite our best efforts, the number of samples required in practice for convergence can be several orders of magnitude worse than with e.g. TMCS. Additionally, the CSP can sometimes turn out to be infeasible.

    Usage follows the same pattern as every other Shapley method, but with the addition of an epsilon parameter required for the solution of the CSP. It should be the same value used to compute the minimum number of samples required. This can be done with num_samples_eps_delta, but note that the number returned will be huge! In practice, fewer samples can be enough, but the actual number will strongly depend on the utility, in particular its variance.

    from pydvl.utils import Dataset, Utility\nfrom pydvl.value import compute_shapley_values\nmodel = ...\ndata = Dataset(...)\nutility = Utility(model, data, score_range=(_min, _max))\nmin_iterations = num_samples_eps_delta(epsilon, delta, n, utility.score_range)\nvalues = compute_shapley_values(\nu=utility, mode=\"group_testing\", n_iterations=min_iterations, eps=eps\n)\n
    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning, in: Proceedings of the 36th International Conference on Machine Learning, PMLR. Presented at the International Conference on Machine Learning (ICML 2019), PMLR, pp. 2242\u20132251.\u00a0\u21a9\u21a9

    2. Okhrati, R., Lipani, A., 2021. A Multilinear Sampling Algorithm to Estimate Shapley Values, in: 2020 25th International Conference on Pattern Recognition (ICPR). Presented at the 2020 25th International Conference on Pattern Recognition (ICPR), IEEE, pp. 7992\u20137999. https://doi.org/10.1109/ICPR48806.2021.9412511 \u21a9

    3. Castro, J., G\u00f3mez, D., Tejada, J., 2009. Polynomial calculation of the Shapley value based on sampling. Computers & Operations Research, Selected papers presented at the Tenth International Symposium on Locational Decisions (ISOLDE X) 36, 1726\u20131730. https://doi.org/10.1016/j.cor.2008.04.004 \u21a9\u21a9

    4. Jia, R., Dao, D., Wang, B., Hubis, F.A., Gurel, N.M., Li, B., Zhang, C., Spanos, C., Song, D., 2019. Efficient task-specific data valuation for nearest neighbor algorithms. Proc. VLDB Endow. 12, 1610\u20131623. https://doi.org/10.14778/3342263.3342637 \u21a9\u21a9

    "},{"location":"value/the-core/","title":"Core values","text":"

    The Shapley values define a fair way to distribute payoffs amongst all participants when they form a grand coalition. But they do not consider the question of stability: under which conditions do all participants form the grand coalition? Would the participants be willing to form the grand coalition given how the payoffs are assigned, or would some of them prefer to form smaller coalitions?

    The Core is another approach to computing data values originating in cooperative game theory that attempts to ensure this stability. It is the set of feasible payoffs that cannot be improved upon by a coalition of the participants.

    It satisfies the following 2 properties:

    • Efficiency: The payoffs are distributed such that it is not possible to make any participant better off without making another one worse off. \\(\\(\\sum_{i\\in D} v(i) = u(D)\\,\\)\\)

    • Coalitional rationality: The sum of payoffs to the agents in any coalition S is at least as large as the amount that these agents could earn by forming a coalition on their own. \\(\\(\\sum_{i \\in S} v(i) \\geq u(S), \\forall S \\subset D\\,\\)\\)

    The second property states that the sum of payoffs to the agents in any subcoalition \\(S\\) is at least as large as the amount that these agents could earn by forming a coalition on their own.

    "},{"location":"value/the-core/#least-core-values","title":"Least Core values","text":"

    Unfortunately, for many cooperative games the Core may be empty. By relaxing the coalitional rationality property by a subsidy \\(e \\gt 0\\), we are then able to find approximate payoffs:

    \\[ \\sum_{i\\in S} v(i) + e \\geq u(S), \\forall S \\subset D, S \\neq \\emptyset \\ ,\\]

    The least core value \\(v\\) of the \\(i\\)-th sample in dataset \\(D\\) wrt. utility \\(u\\) is computed by solving the following Linear Program:

    \\[ \\begin{array}{lll} \\text{minimize} & e & \\\\ \\text{subject to} & \\sum_{i\\in D} v(i) = u(D) & \\\\ & \\sum_{i\\in S} v(i) + e \\geq u(S) &, \\forall S \\subset D, S \\neq \\emptyset \\\\ \\end{array} \\]"},{"location":"value/the-core/#exact-least-core","title":"Exact Least Core","text":"

    This first algorithm is just a verbatim implementation of the definition. As such it returns as exact a value as the utility function allows (see what this means in Problems of Data Values][problems-of-data-values]).

    from pydvl.value import compute_least_core_values\nvalues = compute_least_core_values(utility, mode=\"exact\")\n
    "},{"location":"value/the-core/#monte-carlo-least-core","title":"Monte Carlo Least Core","text":"

    Because the number of subsets \\(S \\subseteq D \\setminus \\{i\\}\\) is \\(2^{ | D | - 1 }\\), one typically must resort to approximations.

    The simplest approximation consists in using a fraction of all subsets for the constraints. (Yan and Procaccia, 2021)1 show that a quantity of order \\(\\mathcal{O}((n - \\log \\Delta ) / \\delta^2)\\) is enough to obtain a so-called \\(\\delta\\)-approximate least core with high probability. I.e. the following property holds with probability \\(1-\\Delta\\) over the choice of subsets:

    \\[ \\mathbb{P}_{S\\sim D}\\left[\\sum_{i\\in S} v(i) + e^{*} \\geq u(S)\\right] \\geq 1 - \\delta, \\]

    where \\(e^{*}\\) is the optimal least core subsidy.

    from pydvl.value import compute_least_core_values\nvalues = compute_least_core_values(\nutility, mode=\"montecarlo\", n_iterations=n_iterations\n)\n

    Note

    Although any number is supported, it is best to choose n_iterations to be at least equal to the number of data points.

    Because computing the Least Core values requires the solution of a linear and a quadratic problem after computing all the utility values, we offer the possibility of splitting the latter from the former. This is useful when running multiple experiments: use mclc_prepare_problem to prepare a list of problems to solve, then solve them in parallel with lc_solve_problems.

    from pydvl.value.least_core import mclc_prepare_problem, lc_solve_problems\nn_experiments = 10\nproblems = [mclc_prepare_problem(utility, n_iterations=n_iterations)\nfor _ in range(n_experiments)]\nvalues = lc_solve_problems(problems)\n
    "},{"location":"value/the-core/#method-comparison","title":"Method comparison","text":"

    The TransferLab team reproduced the results of the original paper in a publication for the 2022 MLRC (Benmerzoug and Delgado, 2023)2.

    Best sample removal on binary image classification

    Roughly speaking, MCLC performs better in identifying high value points, as measured by best-sample removal tasks. In all other aspects, it performs worse or similarly to TMCS at comparable sample budgets. But using an equal number of subsets is more computationally expensive because of the need to solve large linear and quadratic optimization problems.

    Worst sample removal on binary image classification

    For these reasons we recommend some variation of SV like TMCS for outlier detection, data cleaning and pruning, and perhaps MCLC for the selection of interesting points to be inspected for the improvement of data collection or model design.

    1. Yan, T., Procaccia, A.D., 2021. If You Like Shapley Then You\u2019ll Love the Core, in: Proceedings of the 35th AAAI Conference on Artificial Intelligence, 2021. Presented at the AAAI Conference on Artificial Intelligence, Association for the Advancement of Artificial Intelligence, pp. 5751\u20135759. https://doi.org/10.1609/aaai.v35i6.16721 \u21a9

    2. Benmerzoug, A., Delgado, M. de B., 2023. [Re] If you like Shapley, then you\u2019ll love the core. ReScience C 9. https://doi.org/10.5281/zenodo.8173733 \u21a9

    "}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"The python library for data valuation","text":"

    pyDVL collects algorithms for data valuation and influence function computation. It runs most of them in parallel either locally or in a cluster and supports distributed caching of results.

    If you're a first time user of pyDVL, we recommend you to go through the Getting Started and Installing pyDVL guides.

    Installation

    Steps to install and requirements

    Data valuation

    Basics of data valuation and description of the main algorithms

    Influence Function

    An introduction to the influence function and its computation with pyDVL

    Browse the API

    Full documentation of the API

    "},{"location":"CHANGELOG/","title":"Changelog","text":""},{"location":"CHANGELOG/#unreleased","title":"Unreleased","text":"
    • Implementation of Data-OOB by @BastienZim PR #426, PR $431
    • Refactoring of parallel module. Old imports will stop working in v0.9.0 PR #421
    "},{"location":"CHANGELOG/#070-documentation-and-if-overhaul-new-methods-and-bug-fixes","title":"0.7.0 - \ud83d\udcda\ud83c\udd95 Documentation and IF overhaul, new methods and bug fixes \ud83d\udca5\ud83d\udc1e","text":"

    This is our first \u03b2 release! We have worked hard to deliver improvements across the board, with a focus on documentation and usability. We have also reworked the internals of the influence module, improved parallelism and handling of randomness.

    "},{"location":"CHANGELOG/#added","title":"Added","text":"
    • Implemented solving the Hessian equation via spectral low-rank approximation PR #365
    • Enabled parallel computation for Leave-One-Out values PR #406
    • Added more abbreviations to documentation PR #415
    • Added seed to functions from pydvl.utils.numeric, pydvl.value.shapley and pydvl.value.semivalues. Introduced new type Seed and conversion function ensure_seed_sequence. PR #396
    • Added batch_size parameter to compute_banzhaf_semivalues, compute_beta_shapley_semivalues, compute_shapley_semivalues and compute_generic_semivalues. PR #428
    "},{"location":"CHANGELOG/#changed","title":"Changed","text":"
    • Replaced sphinx with mkdocs for documentation. Major overhaul of documentation PR #352
    • Made ray an optional dependency, relying on joblib as default parallel backend PR #408
    • Decoupled ray.init from ParallelConfig PR #373
    • Breaking Changes
    • Signature change: return information about Hessian inversion from compute_influence_factors PR #375
    • Major changes to IF interface and functionality. Foundation for a framework abstraction for IF computation. PR #278 PR #394
    • Renamed semivalues to compute_generic_semivalues PR #413
    • New joblib backend as default instead of ray. Simplify MapReduceJob. PR #355
    • Bump torch dependency for influence package to 2.0 PR #365
    "},{"location":"CHANGELOG/#fixed","title":"Fixed","text":"
    • Fixes to parallel computation of generic semi-values: properly handle all samplers and stopping criteria, irrespective of parallel backend. PR #372
    • Optimises memory usage in IF calculation PR #375
    • Fix adding valuation results with overlapping indices and different lengths PR #370
    • Fixed bugs in conjugate gradient and linear_solve PR #358
    • Fix installation of dev requirements for Python3.10 PR #382
    • Improvements to IF documentation PR #371
    "},{"location":"CHANGELOG/#061-bug-fixes-and-small-improvements","title":"0.6.1 - \ud83c\udfd7 Bug fixes and small improvements","text":"
    • Fix parsing keyword arguments of compute_semivalues dispatch function PR #333
    • Create new RayExecutor class based on the concurrent.futures API, use the new class to fix an issue with Truncated Monte Carlo Shapley (TMCS) starting too many processes and dying, plus other small changes PR #329
    • Fix creation of GroupedDataset objects using the from_arrays and from_sklearn class methods PR #324
    • Fix release job not triggering on CI when a new tag is pushed PR #331
    • Added alias ApproShapley from Castro et al. 2009 for permutation Shapley PR #332
    "},{"location":"CHANGELOG/#060-new-algorithms-cleanup-and-bug-fixes","title":"0.6.0 - \ud83c\udd95 New algorithms, cleanup and bug fixes \ud83c\udfd7","text":"
    • Fixes in ValuationResult: bugs around data names, semantics of empty(), new method zeros() and normalised random values PR #327
    • New method: Implements generalised semi-values for data valuation, including Data Banzhaf and Beta Shapley, with configurable sampling strategies PR #319
    • Adds kwargs parameter to from_array and from_sklearn Dataset and GroupedDataset class methods PR #316
    • PEP-561 conformance: added py.typed PR #307
    • Removed default non-negativity constraint on least core subsidy and added instead a non_negative_subsidy boolean flag. Renamed options to solver_options and pass it as dict. Change default least-core solver to SCS with 10000 max_iters. PR #304
    • Cleanup: removed unnecessary decorator @unpackable PR #233
    • Stopping criteria: fixed problem with StandardError and enable proper composition of index convergence statuses. Fixed a bug with n_jobs in truncated_montecarlo_shapley. PR #300 and PR #305
    • Shuffling code around to allow for simpler user imports, some cleanup and documentation fixes. PR #284
    • Bug fix: Warn instead of raising an error when n_iterations is less than the size of the dataset in Monte Carlo Least Core PR #281
    "},{"location":"CHANGELOG/#050-fixes-nicer-interfaces-and-more-breaking-changes","title":"0.5.0 - \ud83d\udca5 Fixes, nicer interfaces and... more breaking changes \ud83d\ude12","text":"
    • Fixed parallel and antithetic Owen sampling for Shapley values. Simplified and extended tests. PR #267
    • Added Scorer class for a cleaner interface. Fixed minor bugs around Group-Testing Shapley, added more tests and switched to cvxpy for the solver. PR #264
    • Generalised stopping criteria for valuation algorithms. Improved classes ValuationResult and Status with more operations. Some minor issues fixed. PR #252
    • Fixed a bug whereby compute_shapley_values would only spawn one process when using n_jobs=-1 and Monte Carlo methods. PR #270
    • Bugfix in RayParallelBackend: wrong semantics for kwargs. PR #268
    • Splitting of problem preparation and solution in Least-Core computation. Umbrella function for LC methods. PR #257
    • Operations on ValuationResult and Status and some cleanup PR #248
    • Bug fix and minor improvements: Fixes bug in TMCS with remote Ray cluster, raises an error for dummy sequential parallel backend with TMCS, clones model inside Utility before fitting by default, with flag clone_before_fit to disable it, catches all warnings in Utility when show_warnings is False. Adds Miner and Gloves toy games utilities PR #247
    "},{"location":"CHANGELOG/#040-new-algorithms-and-more-breaking-changes","title":"0.4.0 - \ud83c\udfed\ud83d\udca5 New algorithms and more breaking changes","text":"
    • GH action to mark issues as stale PR #201
    • Disabled caching of Utility values as well as repeated evaluations by default PR #211
    • Test and officially support Python version 3.9 and 3.10 PR #208
    • Breaking change: Introduces a class ValuationResult to gather and inspect results from all valuation algorithms PR #214
    • Fixes bug in Influence calculation with multidimensional input and adds new example notebook PR #195
    • Breaking change: Passes the input to MapReduceJob at initialization, removes chunkify_inputs argument from MapReduceJob, removes n_runs argument from MapReduceJob, calls the parallel backend's put() method for each generated chunk in _chunkify(), renames ParallelConfig's num_workers attribute to n_local_workers, fixes a bug in MapReduceJob's chunkification when n_runs >= n_jobs, and defines a sequential parallel backend to run all jobs in the current thread PR #232
    • New method: Implements exact and monte carlo Least Core for data valuation, adds from_arrays() class method to the Dataset and GroupedDataset classes, adds extra_values argument to ValuationResult, adds compute_removal_score() and compute_random_removal_score() helper functions PR #237
    • New method: Group Testing Shapley for valuation, from Jia et al. 2019 PR #240
    • Fixes bug in ray initialization in RayParallelBackend class PR #239
    • Implements \"Egalitarian Least Core\", adds cvxpy as a dependency and uses it instead of scipy as optimizer PR #243
    "},{"location":"CHANGELOG/#030-breaking-changes","title":"0.3.0 - \ud83d\udca5 Breaking changes","text":"
    • Simplified and fixed powerset sampling and testing PR #181
    • Simplified and fixed publishing to PyPI from CI PR #183
    • Fixed bug in release script and updated contributing docs. PR #184
    • Added Pull Request template PR #185
    • Modified Pull Request template to automatically link PR to issue PR ##186
    • First implementation of Owen Sampling, squashed scores, better testing PR #194
    • Improved documentation on caching, Shapley, caveats of values, bibtex PR #194
    • Breaking change: Rearranging of modules to accommodate for new methods PR #194
    "},{"location":"CHANGELOG/#020-better-docs","title":"0.2.0 - \ud83d\udcda Better docs","text":"

    Mostly API documentation and notebooks, plus some bugfixes.

    "},{"location":"CHANGELOG/#added_1","title":"Added","text":"

    In PR #161: - Support for $$ math in sphinx docs. - Usage of sphinx extension for external links (introducing new directives like :gh:, :issue: and :tfl: to construct standardised links to external resources). - Only update auto-generated documentation files if there are changes. Some minor additions to update_docs.py. - Parallelization of exact combinatorial Shapley. - Integrated KNN shapley into the main interface compute_shapley_values.

    "},{"location":"CHANGELOG/#changed_1","title":"Changed","text":"

    In PR #161: - Improved main docs and Shapley notebooks. Added or fixed many docstrings, readme and documentation for contributors. Typos, grammar and style in code, documentation and notebooks. - Internal renaming and rearranging in the parallelization and caching modules.

    "},{"location":"CHANGELOG/#fixed_1","title":"Fixed","text":"
    • Bug in random matrix generation PR #161.
    • Bugs in MapReduceJob's _chunkify and _backpressure methods PR #176.
    "},{"location":"CHANGELOG/#010-first-release","title":"0.1.0 - \ud83c\udf89 first release","text":"

    This is very first release of pyDVL.

    It contains:

    • Data Valuation Methods:

    • Leave-One-Out

    • Influence Functions
    • Shapley:
      • Exact Permutation and Combinatorial
      • Montecarlo Permutation and Combinatorial
      • Truncated Montecarlo Permutation
    • Caching of results with Memcached
    • Parallelization of computations with Ray
    • Documentation
    • Notebooks containing examples of different use cases
    "},{"location":"api/pydvl/","title":"API","text":""},{"location":"api/pydvl/#pydvl--the-python-data-valuation-library-api","title":"The Python Data Valuation Library API","text":"

    This is the API documentation for the Python Data Valuation Library (PyDVL). Use the table of contents to access the documentation for each module.

    The two main modules you will want to look at are value and influence.

    "},{"location":"api/pydvl/influence/","title":"Influence","text":"

    This package contains algorithms for the computation of the influence function.

    Warning: Much of the code in this package is experimental or untested and is subject to modification. In particular, the package structure and basic API will probably change.

    "},{"location":"api/pydvl/influence/general/","title":"General","text":"

    This module contains influence calculation functions for general models, as introduced in (Koh and Liang, 2017)1.

    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general--references","title":"References","text":"
    1. Koh, P.W., Liang, P., 2017. Understanding Black-box Predictions via Influence Functions. In: Proceedings of the 34th International Conference on Machine Learning, pp. 1885\u20131894. PMLR.\u00a0\u21a9

    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general.InfluenceType","title":"InfluenceType","text":"

    Bases: str, Enum

    Enum representation for the types of influence.

    ATTRIBUTE DESCRIPTION Up

    Up-weighting a training point, see section 2.1 of (Koh and Liang, 2017)1

    Perturbation

    Perturb a training point, see section 2.2 of (Koh and Liang, 2017)1

    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general.compute_influence_factors","title":"compute_influence_factors(model, training_data, test_data, inversion_method, *, hessian_perturbation=0.0, progress=False, **kwargs)","text":"

    Calculates influence factors of a model for training and test data.

    Given a test point \\(z_{test} = (x_{test}, y_{test})\\), a loss \\(L(z_{test}, \\theta)\\) (\\(\\theta\\) being the parameters of the model) and the Hessian of the model \\(H_{\\theta}\\), influence factors are defined as:

    \\[ s_{test} = H_{\\theta}^{-1} \\operatorname{grad}_{\\theta} L(z_{test}, \\theta). \\]

    They are used for efficient influence calculation. This method first (implicitly) calculates the Hessian and then (explicitly) finds the influence factors for the model using the given inversion method. The parameter hessian_perturbation is used to regularize the inversion of the Hessian. For more info, refer to (Koh and Liang, 2017)1, paragraph 3.

    PARAMETER DESCRIPTION model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TwiceDifferentiable

    training_data

    DataLoader containing the training data.

    TYPE: DataLoaderType

    test_data

    DataLoader containing the test data.

    TYPE: DataLoaderType

    inversion_method

    Name of method for computing inverse hessian vector products.

    TYPE: InversionMethod

    hessian_perturbation

    Regularization of the hessian.

    TYPE: float DEFAULT: 0.0

    progress

    If True, display progress bars.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION array

    An array of size (N, D) containing the influence factors for each dimension (D) and test sample (N).

    TYPE: InverseHvpResult

    Source code in src/pydvl/influence/general.py
    def compute_influence_factors(\nmodel: TwiceDifferentiable,\ntraining_data: DataLoaderType,\ntest_data: DataLoaderType,\ninversion_method: InversionMethod,\n*,\nhessian_perturbation: float = 0.0,\nprogress: bool = False,\n**kwargs: Any,\n) -> InverseHvpResult:\nr\"\"\"\n    Calculates influence factors of a model for training and test data.\n    Given a test point \\(z_{test} = (x_{test}, y_{test})\\), a loss \\(L(z_{test}, \\theta)\\)\n    (\\(\\theta\\) being the parameters of the model) and the Hessian of the model \\(H_{\\theta}\\),\n    influence factors are defined as:\n    \\[\n    s_{test} = H_{\\theta}^{-1} \\operatorname{grad}_{\\theta} L(z_{test}, \\theta).\n    \\]\n    They are used for efficient influence calculation. This method first (implicitly) calculates\n    the Hessian and then (explicitly) finds the influence factors for the model using the given\n    inversion method. The parameter `hessian_perturbation` is used to regularize the inversion of\n    the Hessian. For more info, refer to (Koh and Liang, 2017)<sup><a href=\"#koh_liang_2017\">1</a></sup>, paragraph 3.\n    Args:\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data: DataLoader containing the training data.\n        test_data: DataLoader containing the test data.\n        inversion_method: Name of method for computing inverse hessian vector products.\n        hessian_perturbation: Regularization of the hessian.\n        progress: If True, display progress bars.\n    Returns:\n        array: An array of size (N, D) containing the influence factors for each dimension (D) and test sample (N).\n    \"\"\"\ntensor_util: Type[TensorUtilities] = TensorUtilities.from_twice_differentiable(\nmodel\n)\nstack = tensor_util.stack\nunsqueeze = tensor_util.unsqueeze\ncat_gen = tensor_util.cat_gen\ncat = tensor_util.cat\ndef test_grads() -> Generator[TensorType, None, None]:\nfor x_test, y_test in maybe_progress(\ntest_data, progress, desc=\"Batch Test Gradients\"\n):\nyield stack(\n[\nmodel.grad(inpt, target)\nfor inpt, target in zip(unsqueeze(x_test, 1), y_test)\n]\n)  # type:ignore\ntry:\n# if provided input_data implements __len__, pre-allocate the result tensor to reduce memory consumption\nresulting_shape = (len(test_data), model.num_params)  # type:ignore\nrhs = cat_gen(\ntest_grads(), resulting_shape, model  # type:ignore\n)  # type:ignore\nexcept Exception as e:\nlogger.warning(\nf\"Failed to pre-allocate result tensor: {e}\\n\"\nf\"Evaluate all resulting tensor and concatenate\"\n)\nrhs = cat(list(test_grads()))\nreturn solve_hvp(\ninversion_method,\nmodel,\ntraining_data,\nrhs,\nhessian_perturbation=hessian_perturbation,\n**kwargs,\n)\n
    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general.compute_influences_up","title":"compute_influences_up(model, input_data, influence_factors, *, progress=False)","text":"

    Given the model, the training points, and the influence factors, this function calculates the influences using the up-weighting method.

    The procedure involves two main steps: 1. Calculating the gradients of the model with respect to each training sample (\\(\\operatorname{grad}_{\\theta} L\\), where \\(L\\) is the loss of a single point and \\(\\theta\\) are the parameters of the model). 2. Multiplying each gradient with the influence factors.

    For a detailed description of the methodology, see section 2.1 of (Koh and Liang, 2017)1.

    PARAMETER DESCRIPTION model

    A model that implements the TwiceDifferentiable interface.

    TYPE: TwiceDifferentiable

    input_data

    DataLoader containing the samples for which the influence will be calculated.

    TYPE: DataLoaderType

    influence_factors

    Array containing pre-computed influence factors.

    TYPE: TensorType

    progress

    If set to True, progress bars will be displayed during computation.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION TensorType

    An array of shape [NxM], where N is the number of influence factors, and M is the number of input samples.

    Source code in src/pydvl/influence/general.py
    def compute_influences_up(\nmodel: TwiceDifferentiable,\ninput_data: DataLoaderType,\ninfluence_factors: TensorType,\n*,\nprogress: bool = False,\n) -> TensorType:\nr\"\"\"\n    Given the model, the training points, and the influence factors, this function calculates the\n    influences using the up-weighting method.\n    The procedure involves two main steps:\n    1. Calculating the gradients of the model with respect to each training sample\n       (\\(\\operatorname{grad}_{\\theta} L\\), where \\(L\\) is the loss of a single point and \\(\\theta\\) are the\n       parameters of the model).\n    2. Multiplying each gradient with the influence factors.\n    For a detailed description of the methodology, see section 2.1 of (Koh and Liang, 2017)<sup><a href=\"#koh_liang_2017\">1</a></sup>.\n    Args:\n        model: A model that implements the TwiceDifferentiable interface.\n        input_data: DataLoader containing the samples for which the influence will be calculated.\n        influence_factors: Array containing pre-computed influence factors.\n        progress: If set to True, progress bars will be displayed during computation.\n    Returns:\n        An array of shape [NxM], where N is the number of influence factors, and M is the number of input samples.\n    \"\"\"\ntensor_util: Type[TensorUtilities] = TensorUtilities.from_twice_differentiable(\nmodel\n)\nstack = tensor_util.stack\nunsqueeze = tensor_util.unsqueeze\ncat_gen = tensor_util.cat_gen\ncat = tensor_util.cat\neinsum = tensor_util.einsum\ndef train_grads() -> Generator[TensorType, None, None]:\nfor x, y in maybe_progress(\ninput_data, progress, desc=\"Batch Split Input Gradients\"\n):\nyield stack(\n[model.grad(inpt, target) for inpt, target in zip(unsqueeze(x, 1), y)]\n)  # type:ignore\ntry:\n# if provided input_data implements __len__, pre-allocate the result tensor to reduce memory consumption\nresulting_shape = (len(input_data), model.num_params)  # type:ignore\ntrain_grad_tensor = cat_gen(\ntrain_grads(), resulting_shape, model  # type:ignore\n)  # type:ignore\nexcept Exception as e:\nlogger.warning(\nf\"Failed to pre-allocate result tensor: {e}\\n\"\nf\"Evaluate all resulting tensor and concatenate\"\n)\ntrain_grad_tensor = cat([x for x in train_grads()])  # type:ignore\nreturn einsum(\"ta,va->tv\", influence_factors, train_grad_tensor)  # type:ignore\n
    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general.compute_influences_pert","title":"compute_influences_pert(model, input_data, influence_factors, *, progress=False)","text":"

    Calculates the influence values based on the influence factors and training samples using the perturbation method.

    The process involves two main steps: 1. Calculating the gradient of the model with respect to each training sample (\\(\\operatorname{grad}_{\\theta} L\\), where \\(L\\) is the loss of the model for a single data point and \\(\\theta\\) are the parameters of the model). 2. Using the method TwiceDifferentiable.mvp to efficiently compute the product of the influence factors and \\(\\operatorname{grad}_x \\operatorname{grad}_{\\theta} L\\).

    For a detailed methodology, see section 2.2 of (Koh and Liang, 2017)1.

    PARAMETER DESCRIPTION model

    A model that implements the TwiceDifferentiable interface.

    TYPE: TwiceDifferentiable

    input_data

    DataLoader containing the samples for which the influence will be calculated.

    TYPE: DataLoaderType

    influence_factors

    Array containing pre-computed influence factors.

    TYPE: TensorType

    progress

    If set to True, progress bars will be displayed during computation.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION TensorType

    A 3D array with shape [NxMxP], where N is the number of influence factors, M is the number of input samples, and P is the number of features.

    Source code in src/pydvl/influence/general.py
    def compute_influences_pert(\nmodel: TwiceDifferentiable,\ninput_data: DataLoaderType,\ninfluence_factors: TensorType,\n*,\nprogress: bool = False,\n) -> TensorType:\nr\"\"\"\n    Calculates the influence values based on the influence factors and training samples using the perturbation method.\n    The process involves two main steps:\n    1. Calculating the gradient of the model with respect to each training sample\n       (\\(\\operatorname{grad}_{\\theta} L\\), where \\(L\\) is the loss of the model for a single data point and \\(\\theta\\)\n       are the parameters of the model).\n    2. Using the method [TwiceDifferentiable.mvp][pydvl.influence.twice_differentiable.TwiceDifferentiable.mvp]\n       to efficiently compute the product of the\n       influence factors and \\(\\operatorname{grad}_x \\operatorname{grad}_{\\theta} L\\).\n    For a detailed methodology, see section 2.2 of (Koh and Liang, 2017)<sup><a href=\"#koh_liang_2017\">1</a></sup>.\n    Args:\n        model: A model that implements the TwiceDifferentiable interface.\n        input_data: DataLoader containing the samples for which the influence will be calculated.\n        influence_factors: Array containing pre-computed influence factors.\n        progress: If set to True, progress bars will be displayed during computation.\n    Returns:\n        A 3D array with shape [NxMxP], where N is the number of influence factors,\n            M is the number of input samples, and P is the number of features.\n    \"\"\"\ntensor_util: Type[TensorUtilities] = TensorUtilities.from_twice_differentiable(\nmodel\n)\nstack = tensor_util.stack\ntu_slice = tensor_util.slice\nreshape = tensor_util.reshape\nget_element = tensor_util.get_element\nshape = tensor_util.shape\nall_pert_influences = []\nfor x, y in maybe_progress(\ninput_data,\nprogress,\ndesc=\"Batch Influence Perturbation\",\n):\nfor i in range(len(x)):\ntensor_x = tu_slice(x, i, i + 1)\ngrad_xy = model.grad(tensor_x, get_element(y, i), create_graph=True)\nperturbation_influences = model.mvp(\ngrad_xy,\ninfluence_factors,\nbackprop_on=tensor_x,\n)\nall_pert_influences.append(\nreshape(perturbation_influences, (-1, *shape(get_element(x, i))))\n)\nreturn stack(all_pert_influences, axis=1)  # type:ignore\n
    "},{"location":"api/pydvl/influence/general/#pydvl.influence.general.compute_influences","title":"compute_influences(differentiable_model, training_data, *, test_data=None, input_data=None, inversion_method=InversionMethod.Direct, influence_type=InfluenceType.Up, hessian_regularization=0.0, progress=False, **kwargs)","text":"

    Calculates the influence of each input data point on the specified test points.

    This method operates in two primary stages: 1. Computes the influence factors for all test points concerning the model and its training data. 2. Uses these factors to derive the influences over the complete set of input data.

    The influence calculation relies on the twice-differentiable nature of the provided model.

    PARAMETER DESCRIPTION differentiable_model

    A model bundled with its corresponding loss in the TwiceDifferentiable wrapper.

    TYPE: TwiceDifferentiable

    training_data

    DataLoader instance supplying the training data. This data is pivotal in computing the Hessian matrix for the model's loss.

    TYPE: DataLoaderType

    test_data

    DataLoader instance with the test samples. Defaults to training_data if None.

    TYPE: Optional[DataLoaderType] DEFAULT: None

    input_data

    DataLoader instance holding samples whose influences need to be computed. Defaults to training_data if None.

    TYPE: Optional[DataLoaderType] DEFAULT: None

    inversion_method

    An enumeration value determining the approach for inverting matrices or computing inverse operations, see [.inversion.InversionMethod]

    TYPE: InversionMethod DEFAULT: Direct

    progress

    A boolean indicating whether progress bars should be displayed during computation.

    TYPE: bool DEFAULT: False

    influence_type

    Determines the methodology for computing influences. Valid choices include 'up' (for up-weighting) and 'perturbation'. For an in-depth understanding, see (Koh and Liang, 2017)1.

    TYPE: InfluenceType DEFAULT: Up

    hessian_regularization

    A lambda value used in Hessian regularization. The regularized Hessian, \\( H_{reg} \\), is computed as \\( H + \\lambda \\times I \\), where \\( I \\) is the identity matrix and \\( H \\) is the simple, unmodified Hessian. This regularization is typically utilized for more sophisticated models to ensure that the Hessian remains positive definite.

    TYPE: float DEFAULT: 0.0

    RETURNS DESCRIPTION TensorType

    The shape of this array varies based on the influence_type. If 'up', the shape is [NxM], where N denotes the number of test points and M denotes the number of training points. Conversely, if the influence_type is 'perturbation', the shape is [NxMxP], with P representing the number of input features.

    Source code in src/pydvl/influence/general.py
    def compute_influences(\ndifferentiable_model: TwiceDifferentiable,\ntraining_data: DataLoaderType,\n*,\ntest_data: Optional[DataLoaderType] = None,\ninput_data: Optional[DataLoaderType] = None,\ninversion_method: InversionMethod = InversionMethod.Direct,\ninfluence_type: InfluenceType = InfluenceType.Up,\nhessian_regularization: float = 0.0,\nprogress: bool = False,\n**kwargs: Any,\n) -> TensorType:  # type: ignore # ToDO fix typing\nr\"\"\"\n    Calculates the influence of each input data point on the specified test points.\n    This method operates in two primary stages:\n    1. Computes the influence factors for all test points concerning the model and its training data.\n    2. Uses these factors to derive the influences over the complete set of input data.\n    The influence calculation relies on the twice-differentiable nature of the provided model.\n    Args:\n        differentiable_model: A model bundled with its corresponding loss in the `TwiceDifferentiable` wrapper.\n        training_data: DataLoader instance supplying the training data. This data is pivotal in computing the\n                       Hessian matrix for the model's loss.\n        test_data: DataLoader instance with the test samples. Defaults to `training_data` if None.\n        input_data: DataLoader instance holding samples whose influences need to be computed. Defaults to\n                    `training_data` if None.\n        inversion_method: An enumeration value determining the approach for inverting matrices\n            or computing inverse operations, see [.inversion.InversionMethod]\n        progress: A boolean indicating whether progress bars should be displayed during computation.\n        influence_type: Determines the methodology for computing influences.\n            Valid choices include 'up' (for up-weighting) and 'perturbation'.\n            For an in-depth understanding, see (Koh and Liang, 2017)<sup><a href=\"#koh_liang_2017\">1</a></sup>.\n        hessian_regularization: A lambda value used in Hessian regularization. The regularized Hessian, \\( H_{reg} \\),\n            is computed as \\( H + \\lambda \\times I \\), where \\( I \\) is the identity matrix and \\( H \\)\n            is the simple, unmodified Hessian. This regularization is typically utilized for more\n            sophisticated models to ensure that the Hessian remains positive definite.\n    Returns:\n        The shape of this array varies based on the `influence_type`. If 'up', the shape is [NxM], where\n            N denotes the number of test points and M denotes the number of training points. Conversely, if the\n            influence_type is 'perturbation', the shape is [NxMxP], with P representing the number of input features.\n    \"\"\"\nif input_data is None:\ninput_data = deepcopy(training_data)\nif test_data is None:\ntest_data = deepcopy(training_data)\ninfluence_factors, _ = compute_influence_factors(\ndifferentiable_model,\ntraining_data,\ntest_data,\ninversion_method,\nhessian_perturbation=hessian_regularization,\nprogress=progress,\n**kwargs,\n)\nreturn influence_type_registry[influence_type](\ndifferentiable_model,\ninput_data,\ninfluence_factors,\nprogress=progress,\n)\n
    "},{"location":"api/pydvl/influence/inversion/","title":"Inversion","text":"

    Contains methods to invert the hessian vector product.

    "},{"location":"api/pydvl/influence/inversion/#pydvl.influence.inversion.InversionMethod","title":"InversionMethod","text":"

    Bases: str, Enum

    Different inversion methods types.

    "},{"location":"api/pydvl/influence/inversion/#pydvl.influence.inversion.InversionRegistry","title":"InversionRegistry","text":"

    A registry to hold inversion methods for different models.

    "},{"location":"api/pydvl/influence/inversion/#pydvl.influence.inversion.InversionRegistry.register","title":"register(model_type, inversion_method, overwrite=False) classmethod","text":"

    Register a function for a specific model type and inversion method.

    The function to be registered must conform to the following signature: (model: TwiceDifferentiable, training_data: DataLoaderType, b: TensorType, hessian_perturbation: float = 0.0, ...).

    PARAMETER DESCRIPTION model_type

    The type of the model the function should be registered for.

    TYPE: Type[TwiceDifferentiable]

    inversion_method

    The inversion method the function should be registered for.

    TYPE: InversionMethod

    overwrite

    If True, allows overwriting of an existing registered function for the same model type and inversion method. If False, logs a warning when attempting to register a function for an already registered model type and inversion method.

    TYPE: bool DEFAULT: False

    RAISES DESCRIPTION TypeError

    If the provided model_type or inversion_method are of the wrong type.

    ValueError

    If the function to be registered does not match the required signature.

    RETURNS DESCRIPTION

    A decorator for registering a function.

    Source code in src/pydvl/influence/inversion.py
    @classmethod\ndef register(\ncls,\nmodel_type: Type[TwiceDifferentiable],\ninversion_method: InversionMethod,\noverwrite: bool = False,\n):\n\"\"\"\n    Register a function for a specific model type and inversion method.\n    The function to be registered must conform to the following signature:\n    `(model: TwiceDifferentiable, training_data: DataLoaderType, b: TensorType,\n    hessian_perturbation: float = 0.0, ...)`.\n    Args:\n        model_type: The type of the model the function should be registered for.\n        inversion_method: The inversion method the function should be\n            registered for.\n        overwrite: If ``True``, allows overwriting of an existing registered\n            function for the same model type and inversion method. If ``False``,\n            logs a warning when attempting to register a function for an already\n            registered model type and inversion method.\n    Raises:\n        TypeError: If the provided model_type or inversion_method are of the wrong type.\n        ValueError: If the function to be registered does not match the required signature.\n    Returns:\n        A decorator for registering a function.\n    \"\"\"\nif not isinstance(model_type, type):\nraise TypeError(\nf\"'model_type' is of type {type(model_type)} but should be a Type[TwiceDifferentiable]\"\n)\nif not isinstance(inversion_method, InversionMethod):\nraise TypeError(\nf\"'inversion_method' must be an 'InversionMethod' \"\nf\"but has type {type(inversion_method)} instead.\"\n)\nkey = (model_type, inversion_method)\ndef decorator(func):\nif not overwrite and key in cls.registry:\nwarnings.warn(\nf\"There is already a function registered for model type {model_type} \"\nf\"and inversion method {inversion_method}. \"\nf\"To overwrite the existing function {cls.registry.get(key)} with {func}, set overwrite to True.\"\n)\nsig = inspect.signature(func)\nparams = list(sig.parameters.values())\nexpected_args = [\n(\"model\", model_type),\n(\"training_data\", DataLoaderType.__bound__),\n(\"b\", model_type.tensor_type()),\n(\"hessian_perturbation\", float),\n]\nfor (name, typ), param in zip(expected_args, params):\nif not (\nisinstance(param.annotation, typ)\nor issubclass(param.annotation, typ)\n):\nraise ValueError(\nf'Parameter \"{name}\" must be of type \"{typ.__name__}\"'\n)\n@functools.wraps(func)\ndef wrapper(*args, **kwargs):\nreturn func(*args, **kwargs)\ncls.registry[key] = wrapper\nreturn wrapper\nreturn decorator\n
    "},{"location":"api/pydvl/influence/inversion/#pydvl.influence.inversion.InversionRegistry.call","title":"call(inversion_method, model, training_data, b, hessian_perturbation, **kwargs) classmethod","text":"

    Call a registered function with the provided parameters.

    PARAMETER DESCRIPTION inversion_method

    The inversion method to use.

    TYPE: InversionMethod

    model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TwiceDifferentiable

    training_data

    The training data to use.

    TYPE: DataLoaderType

    b

    Array as the right hand side of the equation \\(Ax = b\\).

    TYPE: TensorType

    hessian_perturbation

    Regularization of the hessian.

    kwargs

    Additional keyword arguments to pass to the inversion method.

    DEFAULT: {}

    RETURNS DESCRIPTION InverseHvpResult

    An instance of InverseHvpResult, that contains an array, which solves the inverse problem, i.e. it returns \\(x\\) such that \\(Ax = b\\), and a dictionary containing information about the inversion process.

    Source code in src/pydvl/influence/inversion.py
    @classmethod\ndef call(\ncls,\ninversion_method: InversionMethod,\nmodel: TwiceDifferentiable,\ntraining_data: DataLoaderType,\nb: TensorType,\nhessian_perturbation,\n**kwargs,\n) -> InverseHvpResult:\nr\"\"\"\n    Call a registered function with the provided parameters.\n    Args:\n        inversion_method: The inversion method to use.\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data: The training data to use.\n        b: Array as the right hand side of the equation \\(Ax = b\\).\n        hessian_perturbation: Regularization of the hessian.\n        kwargs: Additional keyword arguments to pass to the inversion method.\n    Returns:\n        An instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult],\n            that contains an array, which solves the inverse problem,\n            i.e. it returns \\(x\\) such that \\(Ax = b\\), and a dictionary containing information\n            about the inversion process.\n    \"\"\"\nreturn cls.get(type(model), inversion_method)(\nmodel, training_data, b, hessian_perturbation, **kwargs\n)\n
    "},{"location":"api/pydvl/influence/inversion/#pydvl.influence.inversion.solve_hvp","title":"solve_hvp(inversion_method, model, training_data, b, *, hessian_perturbation=0.0, **kwargs)","text":"

    Finds \\( x \\) such that \\( Ax = b \\), where \\( A \\) is the hessian of the model, and \\( b \\) a vector. Depending on the inversion method, the hessian is either calculated directly and then inverted, or implicitly and then inverted through matrix vector product. The method also allows to add a small regularization term (hessian_perturbation) to facilitate inversion of non fully trained models.

    PARAMETER DESCRIPTION inversion_method

    TYPE: InversionMethod

    model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TwiceDifferentiable

    training_data

    TYPE: DataLoaderType

    b

    Array as the right hand side of the equation \\( Ax = b \\)

    TYPE: TensorType

    hessian_perturbation

    regularization of the hessian.

    TYPE: float DEFAULT: 0.0

    kwargs

    kwargs to pass to the inversion method.

    TYPE: Any DEFAULT: {}

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, with an array that solves the inverse problem, i.e., it returns \\( x \\) such that \\( Ax = b \\) and a dictionary containing information about the inversion process.

    Source code in src/pydvl/influence/inversion.py
    def solve_hvp(\ninversion_method: InversionMethod,\nmodel: TwiceDifferentiable,\ntraining_data: DataLoaderType,\nb: TensorType,\n*,\nhessian_perturbation: float = 0.0,\n**kwargs: Any,\n) -> InverseHvpResult:\nr\"\"\"\n    Finds \\( x \\) such that \\( Ax = b \\), where \\( A \\) is the hessian of the model,\n    and \\( b \\) a vector. Depending on the inversion method, the hessian is either\n    calculated directly and then inverted, or implicitly and then inverted through\n    matrix vector product. The method also allows to add a small regularization term\n    (hessian_perturbation) to facilitate inversion of non fully trained models.\n    Args:\n        inversion_method:\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data:\n        b: Array as the right hand side of the equation \\( Ax = b \\)\n        hessian_perturbation: regularization of the hessian.\n        kwargs: kwargs to pass to the inversion method.\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult], with\n            an array that solves the inverse problem, i.e., it returns \\( x \\) such that \\( Ax = b \\)\n            and a dictionary containing information about the inversion process.\n    \"\"\"\nreturn InversionRegistry.call(\ninversion_method,\nmodel,\ntraining_data,\nb,\nhessian_perturbation=hessian_perturbation,\n**kwargs,\n)\n
    "},{"location":"api/pydvl/influence/twice_differentiable/","title":"Twice differentiable","text":""},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorType","title":"TensorType = TypeVar('TensorType', bound=Sequence) module-attribute","text":"

    Type variable for tensors, i.e. sequences of numbers

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.ModelType","title":"ModelType = TypeVar('ModelType', bound='TwiceDifferentiable') module-attribute","text":"

    Type variable for twice differentiable models

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.DataLoaderType","title":"DataLoaderType = TypeVar('DataLoaderType', bound=Iterable) module-attribute","text":"

    Type variable for data loaders

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.InverseHvpResult","title":"InverseHvpResult dataclass","text":"

    Bases: Generic[TensorType]

    Container class for results of solving a problem \\(Ax=b\\)

    PARAMETER DESCRIPTION x

    solution of a problem \\(Ax=b\\)

    TYPE: TensorType

    info

    additional information, to couple with the solution itself

    TYPE: Dict[str, Any]

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable","title":"TwiceDifferentiable","text":"

    Bases: ABC, Generic[TensorType]

    Abstract base class for wrappers of differentiable models and losses. Meant to be subclassed for each supported framework. Provides methods to compute gradients and second derivative of the loss wrt. the model parameters

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable.num_params","title":"num_params: int abstractmethod property","text":"

    Returns the number of parameters of the model

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable.parameters","title":"parameters: List[TensorType] abstractmethod property","text":"

    Returns all the model parameters that require differentiation

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable.grad","title":"grad(x, y, create_graph=False)","text":"

    Calculates gradient of model parameters with respect to the model parameters.

    PARAMETER DESCRIPTION x

    A matrix representing the features \\(x_i\\).

    TYPE: TensorType

    y

    A matrix representing the target values \\(y_i\\).

    TYPE: TensorType

    create_graph

    Used for further differentiation on input parameters.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION TensorType

    An array with the gradients of the model.

    Source code in src/pydvl/influence/twice_differentiable.py
    def grad(\nself, x: TensorType, y: TensorType, create_graph: bool = False\n) -> TensorType:\nr\"\"\"\n    Calculates gradient of model parameters with respect to the model parameters.\n    Args:\n        x: A matrix representing the features \\(x_i\\).\n        y: A matrix representing the target values \\(y_i\\).\n        create_graph: Used for further differentiation on input parameters.\n    Returns:\n        An array with the gradients of the model.\n    \"\"\"\npass\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable.hessian","title":"hessian(x, y)","text":"

    Calculates the full Hessian of \\(L(f(x),y)\\) with respect to the model parameters given data \\(x\\) and \\(y\\).

    PARAMETER DESCRIPTION x

    An array representing the features \\(x_i\\).

    TYPE: TensorType

    y

    An array representing the target values \\(y_i\\).

    TYPE: TensorType

    RETURNS DESCRIPTION TensorType

    A tensor representing the Hessian of the model, i.e. the second derivative with respect to the model parameters.

    Source code in src/pydvl/influence/twice_differentiable.py
    def hessian(self, x: TensorType, y: TensorType) -> TensorType:\nr\"\"\"\n    Calculates the full Hessian of \\(L(f(x),y)\\) with respect to the model parameters given data \\(x\\) and \\(y\\).\n    Args:\n        x: An array representing the features \\(x_i\\).\n        y: An array representing the target values \\(y_i\\).\n    Returns:\n        A tensor representing the Hessian of the model, i.e. the second derivative\n            with respect to the model parameters.\n    \"\"\"\npass\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TwiceDifferentiable.mvp","title":"mvp(grad_xy, v, backprop_on, *, progress=False) abstractmethod staticmethod","text":"

    Calculates the second order derivative of the model along directions \\(v\\). The second order derivative can be selected through the backprop_on argument.

    PARAMETER DESCRIPTION grad_xy

    An array [P] holding the gradients of the model parameters with respect to input \\(x\\) and labels \\(y\\). \\(P\\) is the number of parameters of the model. Typically obtained through self.grad.

    TYPE: TensorType

    v

    An array ([DxP] or even one-dimensional [D]) which multiplies the matrix. \\(D\\) is the number of directions.

    TYPE: TensorType

    progress

    If True, progress is displayed.

    TYPE: bool DEFAULT: False

    backprop_on

    Tensor used in the second backpropagation. The first one is along \\(x\\) and \\(y\\) as defined via grad_xy.

    TYPE: TensorType

    RETURNS DESCRIPTION TensorType

    A matrix representing the implicit matrix-vector product of the model along the given directions. Output shape is [DxM], where \\(M\\) is the number of elements of backprop_on.

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef mvp(\ngrad_xy: TensorType,\nv: TensorType,\nbackprop_on: TensorType,\n*,\nprogress: bool = False,\n) -> TensorType:\nr\"\"\"\n    Calculates the second order derivative of the model along directions \\(v\\).\n    The second order derivative can be selected through the `backprop_on` argument.\n    Args:\n        grad_xy: An array [P] holding the gradients of the model parameters with respect to input \\(x\\) and\n            labels \\(y\\). \\(P\\) is the number of parameters of the model. Typically obtained through `self.grad`.\n        v: An array ([DxP] or even one-dimensional [D]) which multiplies the matrix.\n            \\(D\\) is the number of directions.\n        progress: If `True`, progress is displayed.\n        backprop_on: Tensor used in the second backpropagation. The first one is along \\(x\\) and \\(y\\)\n            as defined via `grad_xy`.\n    Returns:\n        A matrix representing the implicit matrix-vector product of the model along the given directions.\n            Output shape is [DxM], where \\(M\\) is the number of elements of `backprop_on`.\n    \"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities","title":"TensorUtilities","text":"

    Bases: Generic[TensorType, ModelType], ABC

    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.__init_subclass__","title":"__init_subclass__(**kwargs)","text":"

    Automatically registers non-abstract subclasses in the registry.

    This method checks if twice_differentiable_type is defined in the subclass and if it is of the correct type. If either attribute is missing or incorrect, a TypeError is raised.

    PARAMETER DESCRIPTION kwargs

    Additional keyword arguments.

    DEFAULT: {}

    RAISES DESCRIPTION TypeError

    If the subclass does not define twice_differentiable_type, or if it is not of the correct type.

    Source code in src/pydvl/influence/twice_differentiable.py
    def __init_subclass__(cls, **kwargs):\n\"\"\"\n    Automatically registers non-abstract subclasses in the registry.\n    This method checks if `twice_differentiable_type` is defined in the subclass and if it is of the correct type.\n    If either attribute is missing or incorrect, a `TypeError` is raised.\n    Args:\n        kwargs: Additional keyword arguments.\n    Raises:\n        TypeError: If the subclass does not define `twice_differentiable_type`, or if it is not of the correct type.\n    \"\"\"\nif not hasattr(cls, \"twice_differentiable_type\") or not isinstance(\ncls.twice_differentiable_type, type\n):\nraise TypeError(\nf\"'twice_differentiable_type' must be a Type[TwiceDifferentiable]\"\n)\ncls.registry[cls.twice_differentiable_type] = cls\nsuper().__init_subclass__(**kwargs)\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.einsum","title":"einsum(equation, *operands) abstractmethod staticmethod","text":"

    Sums the product of the elements of the input operands along dimensions specified using a notation based on the Einstein summation convention.

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef einsum(equation, *operands) -> TensorType:\n\"\"\"Sums the product of the elements of the input `operands` along dimensions specified using a notation\n    based on the Einstein summation convention.\n    \"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.cat","title":"cat(a, **kwargs) abstractmethod staticmethod","text":"

    Concatenates a sequence of tensors into a single torch tensor

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef cat(a: Sequence[TensorType], **kwargs) -> TensorType:\n\"\"\"Concatenates a sequence of tensors into a single torch tensor\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.stack","title":"stack(a, **kwargs) abstractmethod staticmethod","text":"

    Stacks a sequence of tensors into a single torch tensor

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef stack(a: Sequence[TensorType], **kwargs) -> TensorType:\n\"\"\"Stacks a sequence of tensors into a single torch tensor\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.unsqueeze","title":"unsqueeze(x, dim) abstractmethod staticmethod","text":"

    Add a singleton dimension at a specified position in a tensor

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef unsqueeze(x: TensorType, dim: int) -> TensorType:\n\"\"\"Add a singleton dimension at a specified position in a tensor\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.get_element","title":"get_element(x, idx) abstractmethod staticmethod","text":"

    Get the tensor element x[i] from the first non-singular dimension

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef get_element(x: TensorType, idx: int) -> TensorType:\n\"\"\"Get the tensor element x[i] from the first non-singular dimension\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.slice","title":"slice(x, start, stop, axis=0) abstractmethod staticmethod","text":"

    Slice a tensor in the provided axis

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef slice(x: TensorType, start: int, stop: int, axis: int = 0) -> TensorType:\n\"\"\"Slice a tensor in the provided axis\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.shape","title":"shape(x) abstractmethod staticmethod","text":"

    Slice a tensor in the provided axis

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef shape(x: TensorType) -> Tuple[int, ...]:\n\"\"\"Slice a tensor in the provided axis\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.reshape","title":"reshape(x, shape) abstractmethod staticmethod","text":"

    Reshape a tensor to the provided shape

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef reshape(x: TensorType, shape: Tuple[int, ...]) -> TensorType:\n\"\"\"Reshape a tensor to the provided shape\"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.cat_gen","title":"cat_gen(a, resulting_shape, model) abstractmethod staticmethod","text":"

    Concatenate tensors from a generator. Resulting tensor is of shape resulting_shape and compatible to model

    Source code in src/pydvl/influence/twice_differentiable.py
    @staticmethod\n@abstractmethod\ndef cat_gen(\na: Generator[TensorType, None, None],\nresulting_shape: Tuple[int, ...],\nmodel: ModelType,\n) -> TensorType:\n\"\"\"Concatenate tensors from a generator. Resulting tensor is of shape resulting_shape\n    and compatible to model\n    \"\"\"\n
    "},{"location":"api/pydvl/influence/twice_differentiable/#pydvl.influence.twice_differentiable.TensorUtilities.from_twice_differentiable","title":"from_twice_differentiable(twice_diff) classmethod","text":"

    Factory method to create an instance of a subclass TensorUtilities from an instance of a subclass of TwiceDifferentiable.

    PARAMETER DESCRIPTION twice_diff

    An instance of a subclass of TwiceDifferentiable for which a corresponding TensorUtilities object is required.

    TYPE: TwiceDifferentiable

    RETURNS DESCRIPTION Type[TensorUtilities]

    An subclass of TensorUtilities registered to the provided subclass instance of TwiceDifferentiable object.

    RAISES DESCRIPTION KeyError

    If there's no registered TensorUtilities for the provided TwiceDifferentiable type.

    Source code in src/pydvl/influence/twice_differentiable.py
    @classmethod\ndef from_twice_differentiable(\ncls,\ntwice_diff: TwiceDifferentiable,\n) -> Type[\"TensorUtilities\"]:\n\"\"\"\n    Factory method to create an instance of a subclass\n    [TensorUtilities][pydvl.influence.twice_differentiable.TensorUtilities] from an instance of a subclass of\n    [TwiceDifferentiable][pydvl.influence.twice_differentiable.TwiceDifferentiable].\n    Args:\n        twice_diff: An instance of a subclass of\n            [TwiceDifferentiable][pydvl.influence.twice_differentiable.TwiceDifferentiable]\n            for which a corresponding [TensorUtilities][pydvl.influence.twice_differentiable.TensorUtilities]\n            object is required.\n    Returns:\n        An subclass of [TensorUtilities][pydvl.influence.twice_differentiable.TensorUtilities]\n            registered to the provided subclass instance of\n            [TwiceDifferentiable][pydvl.influence.twice_differentiable.TwiceDifferentiable] object.\n    Raises:\n        KeyError: If there's no registered [TensorUtilities][pydvl.influence.twice_differentiable.TensorUtilities]\n            for the provided [TwiceDifferentiable][pydvl.influence.twice_differentiable.TwiceDifferentiable] type.\n    \"\"\"\ntu = cls.registry.get(type(twice_diff), None)\nif tu is None:\nraise KeyError(\nf\"No registered TensorUtilities for the type {type(twice_diff).__name__}\"\n)\nreturn tu\n
    "},{"location":"api/pydvl/influence/torch/","title":"Torch","text":""},{"location":"api/pydvl/influence/torch/functional/","title":"Functional","text":""},{"location":"api/pydvl/influence/torch/functional/#pydvl.influence.torch.functional.hvp","title":"hvp(func, params, vec, reverse_only=True)","text":"

    Computes the Hessian-vector product (HVP) for a given function at given parameters, i.e.

    \\[\\nabla_{\\theta} \\nabla_{\\theta} f (\\theta)\\cdot v\\]

    This function can operate in two modes, either reverse-mode autodiff only or both forward- and reverse-mode autodiff.

    PARAMETER DESCRIPTION func

    The scalar-valued function for which the HVP is computed.

    TYPE: Callable[[TorchTensorContainerType], Tensor]

    params

    The parameters at which the HVP is computed.

    TYPE: TorchTensorContainerType

    vec

    The vector with which the Hessian is multiplied.

    TYPE: TorchTensorContainerType

    reverse_only

    Whether to use only reverse-mode autodiff (True, default) or both forward- and reverse-mode autodiff (False).

    TYPE: bool DEFAULT: True

    RETURNS DESCRIPTION TorchTensorContainerType

    The HVP of the function at the given parameters with the given vector.

    Example:

    >>> def f(z): return torch.sum(z**2)\n>>> u = torch.ones(10, requires_grad=True)\n>>> v = torch.ones(10)\n>>> hvp_vec = hvp(f, u, v)\n>>> assert torch.allclose(hvp_vec, torch.full((10, ), 2.0))\n

    Source code in src/pydvl/influence/torch/functional.py
    def hvp(\nfunc: Callable[[TorchTensorContainerType], torch.Tensor],\nparams: TorchTensorContainerType,\nvec: TorchTensorContainerType,\nreverse_only: bool = True,\n) -> TorchTensorContainerType:\nr\"\"\"\n    Computes the Hessian-vector product (HVP) for a given function at given parameters, i.e.\n    \\[\\nabla_{\\theta} \\nabla_{\\theta} f (\\theta)\\cdot v\\]\n    This function can operate in two modes, either reverse-mode autodiff only or both\n    forward- and reverse-mode autodiff.\n    Args:\n        func: The scalar-valued function for which the HVP is computed.\n        params: The parameters at which the HVP is computed.\n        vec: The vector with which the Hessian is multiplied.\n        reverse_only: Whether to use only reverse-mode autodiff\n            (True, default) or both forward- and reverse-mode autodiff (False).\n    Returns:\n       The HVP of the function at the given parameters with the given vector.\n    Example:\n    ```python\n    >>> def f(z): return torch.sum(z**2)\n    >>> u = torch.ones(10, requires_grad=True)\n    >>> v = torch.ones(10)\n    >>> hvp_vec = hvp(f, u, v)\n    >>> assert torch.allclose(hvp_vec, torch.full((10, ), 2.0))\n    ```\n    \"\"\"\noutput: TorchTensorContainerType\nif reverse_only:\n_, vjp_fn = vjp(grad(func), params)\noutput = vjp_fn(vec)[0]\nelse:\noutput = jvp(grad(func), (params,), (vec,))[1]\nreturn output\n
    "},{"location":"api/pydvl/influence/torch/functional/#pydvl.influence.torch.functional.batch_hvp_gen","title":"batch_hvp_gen(model, loss, data_loader, reverse_only=True)","text":"

    Generates a sequence of batch Hessian-vector product (HVP) computations for the provided model, loss function, and data loader. If \\(f_i\\) is the model's loss on the \\(i\\)-th batch and \\(\\theta\\) the model parameters, this is the sequence of the callable matrix vector products for the matrices

    \\[\\nabla_{\\theta}\\nabla_{\\theta}f_i(\\theta), \\quad i=1,\\dots, \\text{num_batches} \\]

    i.e. iterating over the data_loader, yielding partial function calls for calculating HVPs.

    PARAMETER DESCRIPTION model

    The PyTorch model for which the HVP is calculated.

    TYPE: Module

    loss

    The loss function used to calculate the gradient and HVP.

    TYPE: Callable[[Tensor, Tensor], Tensor]

    data_loader

    PyTorch DataLoader object containing the dataset for which the HVP is calculated.

    TYPE: DataLoader

    reverse_only

    Whether to use only reverse-mode autodiff (True, default) or both forward- and reverse-mode autodiff (False).

    TYPE: bool DEFAULT: True

    YIELDS DESCRIPTION Callable[[Tensor], Tensor]

    Partial functions H_{batch}(vec)=hvp(model, loss, inputs, targets, vec) that when called, will compute the Hessian-vector product H(vec) for the given model and loss in a batch-wise manner, where (inputs, targets) coming from one batch.

    Source code in src/pydvl/influence/torch/functional.py
    def batch_hvp_gen(\nmodel: torch.nn.Module,\nloss: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],\ndata_loader: DataLoader,\nreverse_only: bool = True,\n) -> Generator[Callable[[torch.Tensor], torch.Tensor], None, None]:\nr\"\"\"\n    Generates a sequence of batch Hessian-vector product (HVP) computations for the provided model, loss function,\n    and data loader. If \\(f_i\\) is the model's loss on the \\(i\\)-th batch and \\(\\theta\\) the model parameters,\n    this is the sequence of the callable matrix vector products for the matrices\n    \\[\\nabla_{\\theta}\\nabla_{\\theta}f_i(\\theta), \\quad i=1,\\dots, \\text{num_batches} \\]\n    i.e. iterating over the data_loader, yielding partial function calls for calculating HVPs.\n    Args:\n        model: The PyTorch model for which the HVP is calculated.\n        loss: The loss function used to calculate the gradient and HVP.\n        data_loader: PyTorch DataLoader object containing the dataset for which the HVP is calculated.\n        reverse_only: Whether to use only reverse-mode autodiff\n            (True, default) or both forward- and reverse-mode autodiff (False).\n    Yields:\n        Partial functions `H_{batch}(vec)=hvp(model, loss, inputs, targets, vec)` that when called,\n            will compute the Hessian-vector product H(vec) for the given model and loss in a batch-wise manner, where\n            (inputs, targets) coming from one batch.\n    \"\"\"\nfor inputs, targets in iter(data_loader):\nbatch_loss = batch_loss_function(model, loss, inputs, targets)\nmodel_params = dict(model.named_parameters())\ndef batch_hvp(vec: torch.Tensor):\nreturn flatten_tensors_to_vector(\nhvp(\nbatch_loss,\nmodel_params,\nalign_structure(model_params, vec),\nreverse_only=reverse_only,\n).values()\n)\nyield batch_hvp\n
    "},{"location":"api/pydvl/influence/torch/functional/#pydvl.influence.torch.functional.empirical_loss_function","title":"empirical_loss_function(model, loss, data_loader)","text":"

    Creates a function to compute the empirical loss of a given model on a given dataset. If we denote the model parameters with \\( \\theta \\), the resulting function approximates:

    \\[f(\\theta) = \\frac{1}{N}\\sum_{i=1}^N \\operatorname{loss}(y_i, \\operatorname{model}(\\theta, x_i))\\]

    Args: - model: The model for which the loss should be computed. - loss: The loss function to be used. - data_loader: The data loader for iterating over the dataset.

    RETURNS DESCRIPTION Callable[[Dict[str, Tensor]], Tensor]

    A function that computes the empirical loss of the model on the dataset for given model parameters.

    Source code in src/pydvl/influence/torch/functional.py
    def empirical_loss_function(\nmodel: torch.nn.Module,\nloss: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],\ndata_loader: DataLoader,\n) -> Callable[[Dict[str, torch.Tensor]], torch.Tensor]:\nr\"\"\"\n    Creates a function to compute the empirical loss of a given model on a given dataset.\n    If we denote the model parameters with \\( \\theta \\), the resulting function approximates:\n    \\[f(\\theta) = \\frac{1}{N}\\sum_{i=1}^N \\operatorname{loss}(y_i, \\operatorname{model}(\\theta, x_i))\\]\n    Args:\n    - model: The model for which the loss should be computed.\n    - loss: The loss function to be used.\n    - data_loader: The data loader for iterating over the dataset.\n    Returns:\n        A function that computes the empirical loss of the model on the dataset for given model parameters.\n    \"\"\"\ndef empirical_loss(params: Dict[str, torch.Tensor]):\ntotal_loss = to_model_device(torch.zeros((), requires_grad=True), model)\ntotal_samples = to_model_device(torch.zeros(()), model)\nfor x, y in iter(data_loader):\noutput = functional_call(\nmodel, params, (to_model_device(x, model),), strict=True\n)\nloss_value = loss(output, to_model_device(y, model))\ntotal_loss = total_loss + loss_value * x.size(0)\ntotal_samples += x.size(0)\nreturn total_loss / total_samples\nreturn empirical_loss\n
    "},{"location":"api/pydvl/influence/torch/functional/#pydvl.influence.torch.functional.batch_loss_function","title":"batch_loss_function(model, loss, x, y)","text":"

    Creates a function to compute the loss of a given model on a given batch of data, i.e. for the \\(i\\)-th batch \\(B_i\\)

    \\[\\frac{1}{|B_i|}\\sum_{x,y \\in B_i} \\operatorname{loss}(y, \\operatorname{model}(\\theta, x))\\] PARAMETER DESCRIPTION model

    The model for which the loss should be computed.

    TYPE: Module

    loss

    The loss function to be used.

    TYPE: Callable[[Tensor, Tensor], Tensor]

    x

    The input data for the batch.

    TYPE: Tensor

    y

    The true labels for the batch.

    TYPE: Tensor

    RETURNS DESCRIPTION Callable[[Dict[str, Tensor]], Tensor]

    A function that computes the loss of the model on the batch for given model parameters.

    Source code in src/pydvl/influence/torch/functional.py
    def batch_loss_function(\nmodel: torch.nn.Module,\nloss: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],\nx: torch.Tensor,\ny: torch.Tensor,\n) -> Callable[[Dict[str, torch.Tensor]], torch.Tensor]:\nr\"\"\"\n    Creates a function to compute the loss of a given model on a given batch of data, i.e. for the $i$-th batch $B_i$\n    \\[\\frac{1}{|B_i|}\\sum_{x,y \\in B_i} \\operatorname{loss}(y, \\operatorname{model}(\\theta, x))\\]\n    Args:\n        model: The model for which the loss should be computed.\n        loss: The loss function to be used.\n        x: The input data for the batch.\n        y: The true labels for the batch.\n    Returns:\n        A function that computes the loss of the model on the batch for given model parameters.\n    \"\"\"\ndef batch_loss(params: Dict[str, torch.Tensor]):\noutputs = functional_call(\nmodel, params, (to_model_device(x, model),), strict=True\n)\nreturn loss(outputs, y)\nreturn batch_loss\n
    "},{"location":"api/pydvl/influence/torch/functional/#pydvl.influence.torch.functional.get_hvp_function","title":"get_hvp_function(model, loss, data_loader, use_hessian_avg=True, reverse_only=True, track_gradients=False)","text":"

    Returns a function that calculates the approximate Hessian-vector product for a given vector. If you want to compute the exact hessian, i.e., pulling all data into memory and compute a full gradient computation, use the function hvp.

    PARAMETER DESCRIPTION model

    A PyTorch module representing the model whose loss function's Hessian is to be computed.

    TYPE: Module

    loss

    A callable that takes the model's output and target as input and returns the scalar loss.

    TYPE: Callable[[Tensor, Tensor], Tensor]

    data_loader

    A DataLoader instance that provides batches of data for calculating the Hessian-vector product. Each batch from the DataLoader is assumed to return a tuple where the first element is the model's input and the second element is the target output.

    TYPE: DataLoader

    use_hessian_avg

    If True, the returned function uses batch-wise Hessian computation via batch_loss_function and averages the results. If False, the function uses backpropagation on the full empirical_loss_function, which is more accurate than averaging the batch hessians, but probably has a way higher memory usage.

    TYPE: bool DEFAULT: True

    reverse_only

    Whether to use only reverse-mode autodiff (True, default) or both forward- and reverse-mode autodiff (False).

    TYPE: bool DEFAULT: True

    track_gradients

    Whether to track gradients for the resulting tensor of the hessian vector products are (False, default).

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION Callable[[Tensor], Tensor]

    A function that takes a single argument, a vector, and returns the product of the Hessian of the loss function with respect to the model's parameters and the input vector.

    Source code in src/pydvl/influence/torch/functional.py
    def get_hvp_function(\nmodel: torch.nn.Module,\nloss: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],\ndata_loader: DataLoader,\nuse_hessian_avg: bool = True,\nreverse_only: bool = True,\ntrack_gradients: bool = False,\n) -> Callable[[torch.Tensor], torch.Tensor]:\n\"\"\"\n    Returns a function that calculates the approximate Hessian-vector product for a given vector. If you want to\n    compute the exact hessian, i.e., pulling all data into memory and compute a full gradient computation, use\n    the function `hvp`.\n    Args:\n        model: A PyTorch module representing the model whose loss function's Hessian is to be computed.\n        loss: A callable that takes the model's output and target as input and returns the scalar loss.\n        data_loader: A DataLoader instance that provides batches of data for calculating the Hessian-vector product.\n            Each batch from the DataLoader is assumed to return a tuple where the first element\n            is the model's input and the second element is the target output.\n        use_hessian_avg: If True, the returned function uses batch-wise Hessian computation via\n            [batch_loss_function][pydvl.influence.torch.functional.batch_loss_function] and averages the results.\n            If False, the function uses backpropagation on the full\n            [empirical_loss_function][pydvl.influence.torch.functional.empirical_loss_function],\n            which is more accurate than averaging the batch hessians, but probably has a way higher memory usage.\n        reverse_only: Whether to use only reverse-mode autodiff (True, default) or\n            both forward- and reverse-mode autodiff (False).\n        track_gradients: Whether to track gradients for the resulting tensor of the hessian vector\n            products are (False, default).\n    Returns:\n        A function that takes a single argument, a vector, and returns the product of the Hessian of the `loss`\n            function with respect to the `model`'s parameters and the input vector.\n    \"\"\"\nparams = {\nk: p if track_gradients else p.detach() for k, p in model.named_parameters()\n}\ndef hvp_function(vec: torch.Tensor) -> torch.Tensor:\nv = align_structure(params, vec)\nempirical_loss = empirical_loss_function(model, loss, data_loader)\nreturn flatten_tensors_to_vector(\nhvp(empirical_loss, params, v, reverse_only=reverse_only).values()\n)\ndef avg_hvp_function(vec: torch.Tensor) -> torch.Tensor:\nv = align_structure(params, vec)\nbatch_hessians_vector_products: Iterable[torch.Tensor] = map(\nlambda x: x(v), batch_hvp_gen(model, loss, data_loader, reverse_only)\n)\nnum_batches = len(data_loader)\navg_hessian = to_model_device(torch.zeros_like(vec), model)\nfor batch_hvp in batch_hessians_vector_products:\navg_hessian += batch_hvp\nreturn avg_hessian / float(num_batches)\nreturn avg_hvp_function if use_hessian_avg else hvp_function\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/","title":"Torch differentiable","text":"

    Contains methods for differentiating a pyTorch model. Most of the methods focus on ways to calculate matrix vector products. Moreover, it contains several methods to invert the Hessian vector product. These are used to calculate the influence of a training point on the model.

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable--references","title":"References","text":"
    1. Koh, P.W., Liang, P., 2017. Understanding Black-box Predictions via Influence Functions. In: Proceedings of the 34th International Conference on Machine Learning, pp. 1885\u20131894. PMLR.\u00a0\u21a9

    2. Agarwal, N., Bullins, B., Hazan, E., 2017. Second-Order Stochastic Optimization for Machine Learning in Linear Time. In: Journal of Machine Learning Research, Vol. 18, pp. 1\u201340. JMLR.\u00a0\u21a9

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable","title":"TorchTwiceDifferentiable(model, loss)","text":"

    Bases: TwiceDifferentiable[Tensor]

    Wraps a torch.nn.Module and a loss function and provides methods to compute gradients and second derivative of the loss wrt. the model parameters

    PARAMETER DESCRIPTION model

    A (differentiable) function.

    TYPE: Module

    loss

    A differentiable scalar loss \\( L(\\hat{y}, y) \\), mapping a prediction and a target to a real value.

    TYPE: Callable[[Tensor, Tensor], Tensor]

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def __init__(\nself,\nmodel: nn.Module,\nloss: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],\n):\nif model.training:\nlogger.warning(\n\"Passed model not in evaluation mode. This can create several issues in influence \"\n\"computation, e.g. due to batch normalization. Please call model.eval() before \"\n\"computing influences.\"\n)\nself.loss = loss\nself.model = model\nfirst_param = next(model.parameters())\nself.device = first_param.device\nself.dtype = first_param.dtype\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable.parameters","title":"parameters: List[torch.Tensor] property","text":"RETURNS DESCRIPTION List[Tensor]

    All model parameters that require differentiating.

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable.num_params","title":"num_params: int property","text":"

    Get the number of parameters of model f.

    RETURNS DESCRIPTION int

    Number of parameters.

    TYPE: int

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable.grad","title":"grad(x, y, create_graph=False)","text":"

    Calculates gradient of model parameters with respect to the model parameters.

    PARAMETER DESCRIPTION x

    A matrix [NxD] representing the features \\( x_i \\).

    TYPE: Tensor

    y

    A matrix [NxK] representing the target values \\( y_i \\).

    TYPE: Tensor

    create_graph

    If True, the resulting gradient tensor can be used for further differentiation.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION Tensor

    An array [P] with the gradients of the model.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def grad(\nself, x: torch.Tensor, y: torch.Tensor, create_graph: bool = False\n) -> torch.Tensor:\nr\"\"\"\n    Calculates gradient of model parameters with respect to the model parameters.\n    Args:\n        x: A matrix [NxD] representing the features \\( x_i \\).\n        y: A matrix [NxK] representing the target values \\( y_i \\).\n        create_graph (bool): If True, the resulting gradient tensor can be used for further differentiation.\n    Returns:\n        An array [P] with the gradients of the model.\n    \"\"\"\nx = x.to(self.device)\ny = y.to(self.device)\nif create_graph and not x.requires_grad:\nx = x.requires_grad_(True)\nloss_value = self.loss(torch.squeeze(self.model(x)), torch.squeeze(y))\ngrad_f = torch.autograd.grad(\nloss_value, self.parameters, create_graph=create_graph\n)\nreturn flatten_tensors_to_vector(grad_f)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable.hessian","title":"hessian(x, y)","text":"

    Calculates the explicit hessian of model parameters given data \\(x\\) and \\(y\\).

    PARAMETER DESCRIPTION x

    A matrix [NxD] representing the features \\(x_i\\).

    TYPE: Tensor

    y

    A matrix [NxK] representing the target values \\(y_i\\).

    TYPE: Tensor

    RETURNS DESCRIPTION Tensor

    A tensor representing the hessian of the loss with respect to the model parameters.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def hessian(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:\nr\"\"\"\n    Calculates the explicit hessian of model parameters given data \\(x\\) and \\(y\\).\n    Args:\n        x: A matrix [NxD] representing the features \\(x_i\\).\n        y: A matrix [NxK] representing the target values \\(y_i\\).\n    Returns:\n        A tensor representing the hessian of the loss with respect to the model parameters.\n    \"\"\"\ndef model_func(param):\noutputs = torch.func.functional_call(\nself.model,\nalign_structure(\n{k: p for k, p in self.model.named_parameters() if p.requires_grad},\nparam,\n),\n(x.to(self.device),),\nstrict=True,\n)\nreturn self.loss(outputs, y.to(self.device))\nparams = flatten_tensors_to_vector(\np.detach() for p in self.model.parameters() if p.requires_grad\n)\nreturn torch.func.hessian(model_func)(params)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable.mvp","title":"mvp(grad_xy, v, backprop_on, *, progress=False) staticmethod","text":"

    Calculates the second-order derivative of the model along directions v. This second-order derivative can be selected through the backprop_on argument.

    PARAMETER DESCRIPTION grad_xy

    An array [P] holding the gradients of the model parameters with respect to input \\(x\\) and labels \\(y\\), where P is the number of parameters of the model. It is typically obtained through self.grad.

    TYPE: Tensor

    v

    An array ([DxP] or even one-dimensional [D]) which multiplies the matrix, where D is the number of directions.

    TYPE: Tensor

    progress

    If True, progress will be printed.

    TYPE: bool DEFAULT: False

    backprop_on

    Tensor used in the second backpropagation (the first one is defined via grad_xy).

    TYPE: Tensor

    RETURNS DESCRIPTION Tensor

    A matrix representing the implicit matrix-vector product of the model along the given directions. The output shape is [DxM], with M being the number of elements of backprop_on.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @staticmethod\ndef mvp(\ngrad_xy: torch.Tensor,\nv: torch.Tensor,\nbackprop_on: torch.Tensor,\n*,\nprogress: bool = False,\n) -> torch.Tensor:\nr\"\"\"\n    Calculates the second-order derivative of the model along directions v.\n    This second-order derivative can be selected through the `backprop_on` argument.\n    Args:\n        grad_xy: An array [P] holding the gradients of the model parameters with respect to input\n            \\(x\\) and labels \\(y\\), where P is the number of parameters of the model.\n            It is typically obtained through `self.grad`.\n        v: An array ([DxP] or even one-dimensional [D]) which multiplies the matrix,\n            where D is the number of directions.\n        progress: If True, progress will be printed.\n        backprop_on: Tensor used in the second backpropagation\n            (the first one is defined via grad_xy).\n    Returns:\n        A matrix representing the implicit matrix-vector product of the model along the given directions.\n            The output shape is [DxM], with M being the number of elements of `backprop_on`.\n    \"\"\"\ndevice = grad_xy.device\nv = as_tensor(v, warn=False).to(device)\nif v.ndim == 1:\nv = v.unsqueeze(0)\nz = (grad_xy * Variable(v)).sum(dim=1)\nmvp = []\nfor i in maybe_progress(range(len(z)), progress, desc=\"MVP\"):\nmvp.append(\nflatten_tensors_to_vector(\nautograd.grad(z[i], backprop_on, retain_graph=True)\n)\n)\nreturn torch.stack([grad.contiguous().view(-1) for grad in mvp]).detach()\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation","title":"LowRankProductRepresentation dataclass","text":"

    Representation of a low rank product of the form \\(H = V D V^T\\), where D is a diagonal matrix and V is orthogonal.

    PARAMETER DESCRIPTION eigen_vals

    Diagonal of D.

    TYPE: Tensor

    projections

    The matrix V.

    TYPE: Tensor

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation.to","title":"to(device)","text":"

    Move the representing tensors to a device

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def to(self, device: torch.device):\n\"\"\"\n    Move the representing tensors to a device\n    \"\"\"\nreturn LowRankProductRepresentation(\nself.eigen_vals.to(device), self.projections.to(device)\n)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTensorUtilities","title":"TorchTensorUtilities","text":"

    Bases: TensorUtilities[Tensor, TorchTwiceDifferentiable]

    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTensorUtilities.einsum","title":"einsum(equation, *operands) staticmethod","text":"

    Sums the product of the elements of the input :attr:operands along dimensions specified using a notation based on the Einstein summation convention.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @staticmethod\ndef einsum(equation: str, *operands) -> torch.Tensor:\n\"\"\"Sums the product of the elements of the input :attr:`operands` along dimensions specified using a notation\n    based on the Einstein summation convention.\n    \"\"\"\nreturn torch.einsum(equation, *operands)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTensorUtilities.cat","title":"cat(a, **kwargs) staticmethod","text":"

    Concatenates a sequence of tensors into a single torch tensor

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @staticmethod\ndef cat(a: Sequence[torch.Tensor], **kwargs) -> torch.Tensor:\n\"\"\"Concatenates a sequence of tensors into a single torch tensor\"\"\"\nreturn torch.cat(a, **kwargs)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTensorUtilities.stack","title":"stack(a, **kwargs) staticmethod","text":"

    Stacks a sequence of tensors into a single torch tensor

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @staticmethod\ndef stack(a: Sequence[torch.Tensor], **kwargs) -> torch.Tensor:\n\"\"\"Stacks a sequence of tensors into a single torch tensor\"\"\"\nreturn torch.stack(a, **kwargs)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.TorchTensorUtilities.unsqueeze","title":"unsqueeze(x, dim) staticmethod","text":"

    Add a singleton dimension at a specified position in a tensor.

    PARAMETER DESCRIPTION x

    A PyTorch tensor.

    TYPE: Tensor

    dim

    The position at which to add the singleton dimension. Zero-based indexing.

    TYPE: int

    RETURNS DESCRIPTION Tensor

    A new tensor with an additional singleton dimension.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @staticmethod\ndef unsqueeze(x: torch.Tensor, dim: int) -> torch.Tensor:\n\"\"\"\n    Add a singleton dimension at a specified position in a tensor.\n    Args:\n        x: A PyTorch tensor.\n        dim: The position at which to add the singleton dimension. Zero-based indexing.\n    Returns:\n        A new tensor with an additional singleton dimension.\n    \"\"\"\nreturn x.unsqueeze(dim)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.lanzcos_low_rank_hessian_approx","title":"lanzcos_low_rank_hessian_approx(hessian_vp, matrix_shape, hessian_perturbation=0.0, rank_estimate=10, krylov_dimension=None, tol=1e-06, max_iter=None, device=None, eigen_computation_on_gpu=False, torch_dtype=None)","text":"

    Calculates a low-rank approximation of the Hessian matrix of a scalar-valued function using the implicitly restarted Lanczos algorithm, i.e.:

    \\[ H_{\\text{approx}} = V D V^T\\]

    where \\(D\\) is a diagonal matrix with the top (in absolute value) rank_estimate eigenvalues of the Hessian and \\(V\\) contains the corresponding eigenvectors.

    PARAMETER DESCRIPTION hessian_vp

    A function that takes a vector and returns the product of the Hessian of the loss function.

    TYPE: Callable[[Tensor], Tensor]

    matrix_shape

    The shape of the matrix, represented by the hessian vector product.

    TYPE: Tuple[int, int]

    hessian_perturbation

    Regularization parameter added to the Hessian-vector product for numerical stability.

    TYPE: float DEFAULT: 0.0

    rank_estimate

    The number of eigenvalues and corresponding eigenvectors to compute. Represents the desired rank of the Hessian approximation.

    TYPE: int DEFAULT: 10

    krylov_dimension

    The number of Krylov vectors to use for the Lanczos method. If not provided, it defaults to \\( \\min(\\text{model.num_parameters}, \\max(2 \\times \\text{rank_estimate} + 1, 20)) \\).

    TYPE: Optional[int] DEFAULT: None

    tol

    The stopping criteria for the Lanczos algorithm, which stops when the difference in the approximated eigenvalue is less than tol. Defaults to 1e-6.

    TYPE: float DEFAULT: 1e-06

    max_iter

    The maximum number of iterations for the Lanczos method. If not provided, it defaults to \\( 10 \\cdot \\text{model.num_parameters}\\).

    TYPE: Optional[int] DEFAULT: None

    device

    The device to use for executing the hessian vector product.

    TYPE: Optional[device] DEFAULT: None

    eigen_computation_on_gpu

    If True, tries to execute the eigen pair approximation on the provided device via cupy implementation. Ensure that either your model is small enough, or you use a small rank_estimate to fit your device's memory. If False, the eigen pair approximation is executed on the CPU with scipy's wrapper to ARPACK.

    TYPE: bool DEFAULT: False

    torch_dtype

    If not provided, the current torch default dtype is used for conversion to torch.

    TYPE: dtype DEFAULT: None

    RETURNS DESCRIPTION LowRankProductRepresentation

    A LowRankProductRepresentation instance that contains the top (up until rank_estimate) eigenvalues and corresponding eigenvectors of the Hessian.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def lanzcos_low_rank_hessian_approx(\nhessian_vp: Callable[[torch.Tensor], torch.Tensor],\nmatrix_shape: Tuple[int, int],\nhessian_perturbation: float = 0.0,\nrank_estimate: int = 10,\nkrylov_dimension: Optional[int] = None,\ntol: float = 1e-6,\nmax_iter: Optional[int] = None,\ndevice: Optional[torch.device] = None,\neigen_computation_on_gpu: bool = False,\ntorch_dtype: torch.dtype = None,\n) -> LowRankProductRepresentation:\nr\"\"\"\n    Calculates a low-rank approximation of the Hessian matrix of a scalar-valued\n    function using the implicitly restarted Lanczos algorithm, i.e.:\n    \\[ H_{\\text{approx}} = V D V^T\\]\n    where \\(D\\) is a diagonal matrix with the top (in absolute value) `rank_estimate` eigenvalues of the Hessian\n    and \\(V\\) contains the corresponding eigenvectors.\n    Args:\n        hessian_vp: A function that takes a vector and returns the product of\n            the Hessian of the loss function.\n        matrix_shape: The shape of the matrix, represented by the hessian vector\n            product.\n        hessian_perturbation: Regularization parameter added to the\n            Hessian-vector product for numerical stability.\n        rank_estimate: The number of eigenvalues and corresponding eigenvectors\n            to compute. Represents the desired rank of the Hessian approximation.\n        krylov_dimension: The number of Krylov vectors to use for the Lanczos\n            method. If not provided, it defaults to\n            \\( \\min(\\text{model.num_parameters}, \\max(2 \\times \\text{rank_estimate} + 1, 20)) \\).\n        tol: The stopping criteria for the Lanczos algorithm, which stops when\n            the difference in the approximated eigenvalue is less than `tol`.\n            Defaults to 1e-6.\n        max_iter: The maximum number of iterations for the Lanczos method. If\n            not provided, it defaults to \\( 10 \\cdot \\text{model.num_parameters}\\).\n        device: The device to use for executing the hessian vector product.\n        eigen_computation_on_gpu: If True, tries to execute the eigen pair\n            approximation on the provided device via [cupy](https://cupy.dev/)\n            implementation. Ensure that either your model is small enough, or you\n            use a small rank_estimate to fit your device's memory. If False, the\n            eigen pair approximation is executed on the CPU with scipy's wrapper to\n            ARPACK.\n        torch_dtype: If not provided, the current torch default dtype is used for\n            conversion to torch.\n    Returns:\n        A [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation]\n            instance that contains the top (up until rank_estimate) eigenvalues\n            and corresponding eigenvectors of the Hessian.\n    \"\"\"\ntorch_dtype = torch.get_default_dtype() if torch_dtype is None else torch_dtype\nif eigen_computation_on_gpu:\ntry:\nimport cupy as cp\nfrom cupyx.scipy.sparse.linalg import LinearOperator, eigsh\nfrom torch.utils.dlpack import from_dlpack, to_dlpack\nexcept ImportError as e:\nraise ImportError(\nf\"Try to install missing dependencies or set eigen_computation_on_gpu to False: {e}\"\n)\nif device is None:\nraise ValueError(\n\"Without setting an explicit device, cupy is not supported\"\n)\ndef to_torch_conversion_function(x):\nreturn from_dlpack(x.toDlpack()).to(torch_dtype)\ndef mv(x):\nx = to_torch_conversion_function(x)\ny = hessian_vp(x) + hessian_perturbation * x\nreturn cp.from_dlpack(to_dlpack(y))\nelse:\nfrom scipy.sparse.linalg import LinearOperator, eigsh\ndef mv(x):\nx_torch = torch.as_tensor(x, device=device, dtype=torch_dtype)\ny: NDArray = (\n(hessian_vp(x_torch) + hessian_perturbation * x_torch)\n.detach()\n.cpu()\n.numpy()\n)\nreturn y\nto_torch_conversion_function = partial(torch.as_tensor, dtype=torch_dtype)\ntry:\neigen_vals, eigen_vecs = eigsh(\nLinearOperator(matrix_shape, matvec=mv),\nk=rank_estimate,\nmaxiter=max_iter,\ntol=tol,\nncv=krylov_dimension,\nreturn_eigenvectors=True,\n)\nexcept ArpackNoConvergence as e:\nlogger.warning(\nf\"ARPACK did not converge for parameters {max_iter=}, {tol=}, {krylov_dimension=}, \"\nf\"{rank_estimate=}. \\n Returning the best approximation found so far. Use those with care or \"\nf\"modify parameters.\\n Original error: {e}\"\n)\neigen_vals, eigen_vecs = e.eigenvalues, e.eigenvectors\neigen_vals = to_torch_conversion_function(eigen_vals)\neigen_vecs = to_torch_conversion_function(eigen_vecs)\nreturn LowRankProductRepresentation(eigen_vals, eigen_vecs)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.model_hessian_low_rank","title":"model_hessian_low_rank(model, training_data, hessian_perturbation=0.0, rank_estimate=10, krylov_dimension=None, tol=1e-06, max_iter=None, eigen_computation_on_gpu=False)","text":"

    Calculates a low-rank approximation of the Hessian matrix of the model's loss function using the implicitly restarted Lanczos algorithm, i.e.

    \\[ H_{\\text{approx}} = V D V^T\\]

    where \\(D\\) is a diagonal matrix with the top (in absolute value) rank_estimate eigenvalues of the Hessian and \\(V\\) contains the corresponding eigenvectors.

    PARAMETER DESCRIPTION model

    A PyTorch model instance that is twice differentiable, wrapped into TorchTwiceDifferential. The Hessian will be calculated with respect to this model's parameters.

    TYPE: TorchTwiceDifferentiable

    training_data

    A DataLoader instance that provides the model's training data. Used in calculating the Hessian-vector products.

    TYPE: DataLoader

    hessian_perturbation

    Optional regularization parameter added to the Hessian-vector product for numerical stability.

    TYPE: float DEFAULT: 0.0

    rank_estimate

    The number of eigenvalues and corresponding eigenvectors to compute. Represents the desired rank of the Hessian approximation.

    TYPE: int DEFAULT: 10

    krylov_dimension

    The number of Krylov vectors to use for the Lanczos method. If not provided, it defaults to min(model.num_parameters, max(2*rank_estimate + 1, 20)).

    TYPE: Optional[int] DEFAULT: None

    tol

    The stopping criteria for the Lanczos algorithm, which stops when the difference in the approximated eigenvalue is less than tol. Defaults to 1e-6.

    TYPE: float DEFAULT: 1e-06

    max_iter

    The maximum number of iterations for the Lanczos method. If not provided, it defaults to 10*model.num_parameters.

    TYPE: Optional[int] DEFAULT: None

    eigen_computation_on_gpu

    If True, tries to execute the eigen pair approximation on the provided device via cupy implementation. Make sure, that either your model is small enough or you use a small rank_estimate to fit your device's memory. If False, the eigen pair approximation is executed on the CPU by scipy wrapper to ARPACK.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION LowRankProductRepresentation

    A LowRankProductRepresentation instance that contains the top (up until rank_estimate) eigenvalues and corresponding eigenvectors of the Hessian.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def model_hessian_low_rank(\nmodel: TorchTwiceDifferentiable,\ntraining_data: DataLoader,\nhessian_perturbation: float = 0.0,\nrank_estimate: int = 10,\nkrylov_dimension: Optional[int] = None,\ntol: float = 1e-6,\nmax_iter: Optional[int] = None,\neigen_computation_on_gpu: bool = False,\n) -> LowRankProductRepresentation:\nr\"\"\"\n    Calculates a low-rank approximation of the Hessian matrix of the model's loss function using the implicitly\n    restarted Lanczos algorithm, i.e.\n    \\[ H_{\\text{approx}} = V D V^T\\]\n    where \\(D\\) is a diagonal matrix with the top (in absolute value) `rank_estimate` eigenvalues of the Hessian\n    and \\(V\\) contains the corresponding eigenvectors.\n    Args:\n        model: A PyTorch model instance that is twice differentiable, wrapped into `TorchTwiceDifferential`.\n            The Hessian will be calculated with respect to this model's parameters.\n        training_data: A DataLoader instance that provides the model's training data.\n            Used in calculating the Hessian-vector products.\n        hessian_perturbation: Optional regularization parameter added to the Hessian-vector product\n            for numerical stability.\n        rank_estimate: The number of eigenvalues and corresponding eigenvectors to compute.\n            Represents the desired rank of the Hessian approximation.\n        krylov_dimension: The number of Krylov vectors to use for the Lanczos method.\n            If not provided, it defaults to min(model.num_parameters, max(2*rank_estimate + 1, 20)).\n        tol: The stopping criteria for the Lanczos algorithm, which stops when the difference\n            in the approximated eigenvalue is less than `tol`. Defaults to 1e-6.\n        max_iter: The maximum number of iterations for the Lanczos method. If not provided, it defaults to\n            10*model.num_parameters.\n        eigen_computation_on_gpu: If True, tries to execute the eigen pair approximation on the provided\n            device via cupy implementation.\n            Make sure, that either your model is small enough or you use a\n            small rank_estimate to fit your device's memory.\n            If False, the eigen pair approximation is executed on the CPU by scipy wrapper to\n            ARPACK.\n    Returns:\n        A [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation]\n            instance that contains the top (up until rank_estimate) eigenvalues\n            and corresponding eigenvectors of the Hessian.\n    \"\"\"\nraw_hvp = get_hvp_function(\nmodel.model, model.loss, training_data, use_hessian_avg=True\n)\nreturn lanzcos_low_rank_hessian_approx(\nhessian_vp=raw_hvp,\nmatrix_shape=(model.num_params, model.num_params),\nhessian_perturbation=hessian_perturbation,\nrank_estimate=rank_estimate,\nkrylov_dimension=krylov_dimension,\ntol=tol,\nmax_iter=max_iter,\ndevice=model.device if hasattr(model, \"device\") else None,\neigen_computation_on_gpu=eigen_computation_on_gpu,\n)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.solve_linear","title":"solve_linear(model, training_data, b, hessian_perturbation=0.0)","text":"

    Given a model and training data, it finds x such that \\(Hx = b\\), with \\(H\\) being the model hessian.

    PARAMETER DESCRIPTION model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TorchTwiceDifferentiable

    training_data

    A DataLoader containing the training data.

    TYPE: DataLoader

    b

    A vector or matrix, the right hand side of the equation \\(Hx = b\\).

    TYPE: Tensor

    hessian_perturbation

    Regularization of the hessian.

    TYPE: float DEFAULT: 0.0

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, having an array that solves the inverse problem, i.e. it returns \\(x\\) such that \\(Hx = b\\), and a dictionary containing information about the solution.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Direct)\ndef solve_linear(\nmodel: TorchTwiceDifferentiable,\ntraining_data: DataLoader,\nb: torch.Tensor,\nhessian_perturbation: float = 0.0,\n) -> InverseHvpResult:\nr\"\"\"\n    Given a model and training data, it finds x such that \\(Hx = b\\), with \\(H\\) being the model hessian.\n    Args:\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data: A DataLoader containing the training data.\n        b: A vector or matrix, the right hand side of the equation \\(Hx = b\\).\n        hessian_perturbation: Regularization of the hessian.\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult],\n            having an array that solves the inverse problem, i.e. it returns \\(x\\) such that \\(Hx = b\\),\n            and a dictionary containing information about the solution.\n    \"\"\"\nall_x, all_y = [], []\nfor x, y in training_data:\nall_x.append(x)\nall_y.append(y)\nhessian = model.hessian(torch.cat(all_x), torch.cat(all_y))\nmatrix = hessian + hessian_perturbation * torch.eye(\nmodel.num_params, device=model.device\n)\ninfo = {\"hessian\": hessian}\nreturn InverseHvpResult(x=torch.linalg.solve(matrix, b.T).T, info=info)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.solve_batch_cg","title":"solve_batch_cg(model, training_data, b, hessian_perturbation=0.0, *, x0=None, rtol=1e-07, atol=1e-07, maxiter=None, progress=False)","text":"

    Given a model and training data, it uses conjugate gradient to calculate the inverse of the Hessian Vector Product. More precisely, it finds x such that \\(Hx = b\\), with \\(H\\) being the model hessian. For more info, see Wikipedia.

    PARAMETER DESCRIPTION model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TorchTwiceDifferentiable

    training_data

    A DataLoader containing the training data.

    TYPE: DataLoader

    b

    A vector or matrix, the right hand side of the equation \\(Hx = b\\).

    TYPE: Tensor

    hessian_perturbation

    Regularization of the hessian.

    TYPE: float DEFAULT: 0.0

    x0

    Initial guess for hvp. If None, defaults to b.

    TYPE: Optional[Tensor] DEFAULT: None

    rtol

    Maximum relative tolerance of result.

    TYPE: float DEFAULT: 1e-07

    atol

    Absolute tolerance of result.

    TYPE: float DEFAULT: 1e-07

    maxiter

    Maximum number of iterations. If None, defaults to 10*len(b).

    TYPE: Optional[int] DEFAULT: None

    progress

    If True, display progress bars.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, having a matrix of shape [NxP] with each line being a solution of \\(Ax=b\\), and a dictionary containing information about the convergence of CG, one entry for each line of the matrix.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Cg)\ndef solve_batch_cg(\nmodel: TorchTwiceDifferentiable,\ntraining_data: DataLoader,\nb: torch.Tensor,\nhessian_perturbation: float = 0.0,\n*,\nx0: Optional[torch.Tensor] = None,\nrtol: float = 1e-7,\natol: float = 1e-7,\nmaxiter: Optional[int] = None,\nprogress: bool = False,\n) -> InverseHvpResult:\nr\"\"\"\n    Given a model and training data, it uses conjugate gradient to calculate the\n    inverse of the Hessian Vector Product. More precisely, it finds x such that \\(Hx =\n    b\\), with \\(H\\) being the model hessian. For more info, see\n    [Wikipedia](https://en.wikipedia.org/wiki/Conjugate_gradient_method).\n    Args:\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data: A DataLoader containing the training data.\n        b: A vector or matrix, the right hand side of the equation \\(Hx = b\\).\n        hessian_perturbation: Regularization of the hessian.\n        x0: Initial guess for hvp. If None, defaults to b.\n        rtol: Maximum relative tolerance of result.\n        atol: Absolute tolerance of result.\n        maxiter: Maximum number of iterations. If None, defaults to 10*len(b).\n        progress: If True, display progress bars.\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult],\n            having a matrix of shape [NxP] with each line being a solution of \\(Ax=b\\),\n            and a dictionary containing information about the convergence of CG,\n            one entry for each line of the matrix.\n    \"\"\"\ntotal_grad_xy = 0\ntotal_points = 0\nfor x, y in maybe_progress(training_data, progress, desc=\"Batch Train Gradients\"):\ngrad_xy = model.grad(x, y, create_graph=True)\ntotal_grad_xy += grad_xy * len(x)\ntotal_points += len(x)\nbackprop_on = model.parameters\nreg_hvp = lambda v: model.mvp(\ntotal_grad_xy / total_points, v, backprop_on\n) + hessian_perturbation * v.type(torch.float64)\nbatch_cg = torch.zeros_like(b)\ninfo = {}\nfor idx, bi in enumerate(maybe_progress(b, progress, desc=\"Conjugate gradient\")):\nbatch_result, batch_info = solve_cg(\nreg_hvp, bi, x0=x0, rtol=rtol, atol=atol, maxiter=maxiter\n)\nbatch_cg[idx] = batch_result\ninfo[f\"batch_{idx}\"] = batch_info\nreturn InverseHvpResult(x=batch_cg, info=info)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.solve_cg","title":"solve_cg(hvp, b, *, x0=None, rtol=1e-07, atol=1e-07, maxiter=None)","text":"

    Conjugate gradient solver for the Hessian vector product.

    PARAMETER DESCRIPTION hvp

    A callable Hvp, operating with tensors of size N.

    TYPE: Callable[[Tensor], Tensor]

    b

    A vector or matrix, the right hand side of the equation \\(Hx = b\\).

    TYPE: Tensor

    x0

    Initial guess for hvp.

    TYPE: Optional[Tensor] DEFAULT: None

    rtol

    Maximum relative tolerance of result.

    TYPE: float DEFAULT: 1e-07

    atol

    Absolute tolerance of result.

    TYPE: float DEFAULT: 1e-07

    maxiter

    Maximum number of iterations. If None, defaults to 10*len(b).

    TYPE: Optional[int] DEFAULT: None

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, with a vector x, solution of \\(Ax=b\\), and a dictionary containing information about the convergence of CG.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    def solve_cg(\nhvp: Callable[[torch.Tensor], torch.Tensor],\nb: torch.Tensor,\n*,\nx0: Optional[torch.Tensor] = None,\nrtol: float = 1e-7,\natol: float = 1e-7,\nmaxiter: Optional[int] = None,\n) -> InverseHvpResult:\nr\"\"\"\n    Conjugate gradient solver for the Hessian vector product.\n    Args:\n        hvp: A callable Hvp, operating with tensors of size N.\n        b: A vector or matrix, the right hand side of the equation \\(Hx = b\\).\n        x0: Initial guess for hvp.\n        rtol: Maximum relative tolerance of result.\n        atol: Absolute tolerance of result.\n        maxiter: Maximum number of iterations. If None, defaults to 10*len(b).\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult],\n            with a vector x, solution of \\(Ax=b\\), and a dictionary containing\n            information about the convergence of CG.\n    \"\"\"\nif x0 is None:\nx0 = torch.clone(b)\nif maxiter is None:\nmaxiter = len(b) * 10\ny_norm = torch.sum(torch.matmul(b, b)).item()\nstopping_val = max([rtol**2 * y_norm, atol**2])\nx = x0\np = r = (b - hvp(x)).squeeze().type(torch.float64)\ngamma = torch.sum(torch.matmul(r, r)).item()\noptimal = False\nfor k in range(maxiter):\nif gamma < stopping_val:\noptimal = True\nbreak\nAp = hvp(p).squeeze()\nalpha = gamma / torch.sum(torch.matmul(p, Ap)).item()\nx += alpha * p\nr -= alpha * Ap\ngamma_ = torch.sum(torch.matmul(r, r)).item()\nbeta = gamma_ / gamma\ngamma = gamma_\np = r + beta * p\ninfo = {\"niter\": k, \"optimal\": optimal, \"gamma\": gamma}\nreturn InverseHvpResult(x=x, info=info)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.solve_lissa","title":"solve_lissa(model, training_data, b, hessian_perturbation=0.0, *, maxiter=1000, dampen=0.0, scale=10.0, h0=None, rtol=0.0001, progress=False)","text":"

    Uses LISSA, Linear time Stochastic Second-Order Algorithm, to iteratively approximate the inverse Hessian. More precisely, it finds x s.t. \\(Hx = b\\), with \\(H\\) being the model's second derivative wrt. the parameters. This is done with the update

    \\[H^{-1}_{j+1} b = b + (I - d) \\ H - \\frac{H^{-1}_j b}{s},\\]

    where \\(I\\) is the identity matrix, \\(d\\) is a dampening term and \\(s\\) a scaling factor that are applied to help convergence. For details, see (Koh and Liang, 2017)1 and the original paper (Agarwal et. al.)2.

    PARAMETER DESCRIPTION model

    A model wrapped in the TwiceDifferentiable interface.

    TYPE: TorchTwiceDifferentiable

    training_data

    A DataLoader containing the training data.

    TYPE: DataLoader

    b

    A vector or matrix, the right hand side of the equation \\(Hx = b\\).

    TYPE: Tensor

    hessian_perturbation

    Regularization of the hessian.

    TYPE: float DEFAULT: 0.0

    maxiter

    Maximum number of iterations.

    TYPE: int DEFAULT: 1000

    dampen

    Dampening factor, defaults to 0 for no dampening.

    TYPE: float DEFAULT: 0.0

    scale

    Scaling factor, defaults to 10.

    TYPE: float DEFAULT: 10.0

    h0

    Initial guess for hvp.

    TYPE: Optional[Tensor] DEFAULT: None

    rtol

    tolerance to use for early stopping

    TYPE: float DEFAULT: 0.0001

    progress

    If True, display progress bars.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, with a matrix of shape [NxP] with each line being a solution of \\(Ax=b\\), and a dictionary containing information about the accuracy of the solution.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Lissa)\ndef solve_lissa(\nmodel: TorchTwiceDifferentiable,\ntraining_data: DataLoader,\nb: torch.Tensor,\nhessian_perturbation: float = 0.0,\n*,\nmaxiter: int = 1000,\ndampen: float = 0.0,\nscale: float = 10.0,\nh0: Optional[torch.Tensor] = None,\nrtol: float = 1e-4,\nprogress: bool = False,\n) -> InverseHvpResult:\nr\"\"\"\n    Uses LISSA, Linear time Stochastic Second-Order Algorithm, to iteratively\n    approximate the inverse Hessian. More precisely, it finds x s.t. \\(Hx = b\\),\n    with \\(H\\) being the model's second derivative wrt. the parameters.\n    This is done with the update\n    \\[H^{-1}_{j+1} b = b + (I - d) \\ H - \\frac{H^{-1}_j b}{s},\\]\n    where \\(I\\) is the identity matrix, \\(d\\) is a dampening term and \\(s\\) a scaling\n    factor that are applied to help convergence. For details, see\n    (Koh and Liang, 2017)<sup><a href=\"#koh_liang_2017\">1</a></sup> and the original paper\n    (Agarwal et. al.)<sup><a href=\"#agarwal_secondorder_2017\">2</a></sup>.\n    Args:\n        model: A model wrapped in the TwiceDifferentiable interface.\n        training_data: A DataLoader containing the training data.\n        b: A vector or matrix, the right hand side of the equation \\(Hx = b\\).\n        hessian_perturbation: Regularization of the hessian.\n        maxiter: Maximum number of iterations.\n        dampen: Dampening factor, defaults to 0 for no dampening.\n        scale: Scaling factor, defaults to 10.\n        h0: Initial guess for hvp.\n        rtol: tolerance to use for early stopping\n        progress: If True, display progress bars.\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult], with a matrix of shape [NxP] with each line being a solution of \\(Ax=b\\),\n            and a dictionary containing information about the accuracy of the solution.\n    \"\"\"\nif h0 is None:\nh_estimate = torch.clone(b)\nelse:\nh_estimate = h0\nshuffled_training_data = DataLoader(\ntraining_data.dataset, training_data.batch_size, shuffle=True\n)\ndef lissa_step(\nh: torch.Tensor, reg_hvp: Callable[[torch.Tensor], torch.Tensor]\n) -> torch.Tensor:\n\"\"\"Given an estimate of the hessian inverse and the regularised hessian\n        vector product, it computes the next estimate.\n        Args:\n            h: An estimate of the hessian inverse.\n            reg_hvp: Regularised hessian vector product.\n        Returns:\n            The next estimate of the hessian inverse.\n        \"\"\"\nreturn b + (1 - dampen) * h - reg_hvp(h) / scale\nfor _ in maybe_progress(range(maxiter), progress, desc=\"Lissa\"):\nx, y = next(iter(shuffled_training_data))\ngrad_xy = model.grad(x, y, create_graph=True)\nreg_hvp = (\nlambda v: model.mvp(grad_xy, v, model.parameters) + hessian_perturbation * v\n)\nresidual = lissa_step(h_estimate, reg_hvp) - h_estimate\nh_estimate += residual\nif torch.isnan(h_estimate).any():\nraise RuntimeError(\"NaNs in h_estimate. Increase scale or dampening.\")\nmax_residual = torch.max(torch.abs(residual / h_estimate))\nif max_residual < rtol:\nbreak\nmean_residual = torch.mean(torch.abs(residual / h_estimate))\nlogger.info(\nf\"Terminated Lissa with {max_residual*100:.2f} % max residual.\"\nf\" Mean residual: {mean_residual*100:.5f} %\"\n)\ninfo = {\n\"max_perc_residual\": max_residual * 100,\n\"mean_perc_residual\": mean_residual * 100,\n}\nreturn InverseHvpResult(x=h_estimate / scale, info=info)\n
    "},{"location":"api/pydvl/influence/torch/torch_differentiable/#pydvl.influence.torch.torch_differentiable.solve_arnoldi","title":"solve_arnoldi(model, training_data, b, hessian_perturbation=0.0, *, rank_estimate=10, krylov_dimension=None, low_rank_representation=None, tol=1e-06, max_iter=None, eigen_computation_on_gpu=False)","text":"

    Solves the linear system Hx = b, where H is the Hessian of the model's loss function and b is the given right-hand side vector. It employs the implicitly restarted Arnoldi method for computing a partial eigen decomposition, which is used fo the inversion i.e.

    \\[x = V D^{-1} V^T b\\]

    where \\(D\\) is a diagonal matrix with the top (in absolute value) rank_estimate eigenvalues of the Hessian and \\(V\\) contains the corresponding eigenvectors.

    PARAMETER DESCRIPTION model

    A PyTorch model instance that is twice differentiable, wrapped into TorchTwiceDifferential. The Hessian will be calculated with respect to this model's parameters.

    TYPE: TorchTwiceDifferentiable

    training_data

    A DataLoader instance that provides the model's training data. Used in calculating the Hessian-vector products.

    TYPE: DataLoader

    b

    The right-hand side vector in the system Hx = b.

    TYPE: Tensor

    hessian_perturbation

    Optional regularization parameter added to the Hessian-vector product for numerical stability.

    TYPE: float DEFAULT: 0.0

    rank_estimate

    The number of eigenvalues and corresponding eigenvectors to compute. Represents the desired rank of the Hessian approximation.

    TYPE: int DEFAULT: 10

    krylov_dimension

    The number of Krylov vectors to use for the Lanczos method. Defaults to min(model's number of parameters, max(2 times rank_estimate + 1, 20)).

    TYPE: Optional[int] DEFAULT: None

    low_rank_representation

    An instance of LowRankProductRepresentation containing a previously computed low-rank representation of the Hessian. If provided, all other parameters are ignored; otherwise, a new low-rank representation is computed using provided parameters.

    TYPE: Optional[LowRankProductRepresentation] DEFAULT: None

    tol

    The stopping criteria for the Lanczos algorithm. Ignored if low_rank_representation is provided.

    TYPE: float DEFAULT: 1e-06

    max_iter

    The maximum number of iterations for the Lanczos method. Ignored if low_rank_representation is provided.

    TYPE: Optional[int] DEFAULT: None

    eigen_computation_on_gpu

    If True, tries to execute the eigen pair approximation on the model's device via a cupy implementation. Ensure the model size or rank_estimate is appropriate for device memory. If False, the eigen pair approximation is executed on the CPU by the scipy wrapper to ARPACK.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION InverseHvpResult

    Instance of InverseHvpResult, having the solution vector x that satisfies the system \\(Ax = b\\), where \\(A\\) is a low-rank approximation of the Hessian \\(H\\) of the model's loss function, and an instance of LowRankProductRepresentation, which represents the approximation of H.

    Source code in src/pydvl/influence/torch/torch_differentiable.py
    @InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Arnoldi)\ndef solve_arnoldi(\nmodel: TorchTwiceDifferentiable,\ntraining_data: DataLoader,\nb: torch.Tensor,\nhessian_perturbation: float = 0.0,\n*,\nrank_estimate: int = 10,\nkrylov_dimension: Optional[int] = None,\nlow_rank_representation: Optional[LowRankProductRepresentation] = None,\ntol: float = 1e-6,\nmax_iter: Optional[int] = None,\neigen_computation_on_gpu: bool = False,\n) -> InverseHvpResult:\nr\"\"\"\n    Solves the linear system Hx = b, where H is the Hessian of the model's loss function and b is the given\n    right-hand side vector.\n    It employs the [implicitly restarted Arnoldi method](https://en.wikipedia.org/wiki/Arnoldi_iteration) for\n    computing a partial eigen decomposition, which is used fo the inversion i.e.\n    \\[x = V D^{-1} V^T b\\]\n    where \\(D\\) is a diagonal matrix with the top (in absolute value) `rank_estimate` eigenvalues of the Hessian\n    and \\(V\\) contains the corresponding eigenvectors.\n    Args:\n        model: A PyTorch model instance that is twice differentiable, wrapped into\n            [TorchTwiceDifferential][pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable].\n            The Hessian will be calculated with respect to this model's parameters.\n        training_data: A DataLoader instance that provides the model's training data.\n            Used in calculating the Hessian-vector products.\n        b: The right-hand side vector in the system Hx = b.\n        hessian_perturbation: Optional regularization parameter added to the Hessian-vector\n            product for numerical stability.\n        rank_estimate: The number of eigenvalues and corresponding eigenvectors to compute.\n            Represents the desired rank of the Hessian approximation.\n        krylov_dimension: The number of Krylov vectors to use for the Lanczos method.\n            Defaults to min(model's number of parameters, max(2 times rank_estimate + 1, 20)).\n        low_rank_representation: An instance of\n            [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation]\n            containing a previously computed low-rank representation of the Hessian. If provided, all other parameters\n            are ignored; otherwise, a new low-rank representation is computed\n            using provided parameters.\n        tol: The stopping criteria for the Lanczos algorithm.\n            Ignored if `low_rank_representation` is provided.\n        max_iter: The maximum number of iterations for the Lanczos method.\n            Ignored if `low_rank_representation` is provided.\n        eigen_computation_on_gpu: If True, tries to execute the eigen pair approximation on the model's device\n            via a cupy implementation. Ensure the model size or rank_estimate is appropriate for device memory.\n            If False, the eigen pair approximation is executed on the CPU by the scipy wrapper to ARPACK.\n    Returns:\n        Instance of [InverseHvpResult][pydvl.influence.torch.torch_differentiable.InverseHvpResult],\n            having the solution vector x that satisfies the system \\(Ax = b\\),\n            where \\(A\\) is a low-rank approximation of the Hessian \\(H\\) of the model's loss function, and an instance\n            of [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation],\n            which represents the approximation of H.\n    \"\"\"\nb_device = b.device if hasattr(b, \"device\") else torch.device(\"cpu\")\nif low_rank_representation is None:\nif b_device.type == \"cuda\" and not eigen_computation_on_gpu:\nraise ValueError(\n\"Using 'eigen_computation_on_gpu=False' while 'b' is on a 'cuda' device is not supported. \"\n\"To address this, consider the following options:\\n\"\n\" - Set eigen_computation_on_gpu=True if your model and data are small enough \"\n\"and if 'cupy' is available in your environment.\\n\"\n\" - Move 'b' to the CPU with b.to('cpu').\\n\"\n\" - Precompute a low rank representation and move it to the 'b' device using:\\n\"\n\"     low_rank_representation = model_hessian_low_rank(model, training_data, ..., \"\n\"eigen_computation_on_gpu=False).to(b.device)\"\n)\nlow_rank_representation = model_hessian_low_rank(\nmodel,\ntraining_data,\nhessian_perturbation=hessian_perturbation,\nrank_estimate=rank_estimate,\nkrylov_dimension=krylov_dimension,\ntol=tol,\nmax_iter=max_iter,\neigen_computation_on_gpu=eigen_computation_on_gpu,\n)\nelse:\nif b_device.type != low_rank_representation.device.type:\nraise RuntimeError(\nf\"The devices for 'b' and 'low_rank_representation' do not match.\\n\"\nf\" - 'b' is on device: {b_device}\\n\"\nf\" - 'low_rank_representation' is on device: {low_rank_representation.device}\\n\"\nf\"\\nTo resolve this, consider moving 'low_rank_representation' to '{b_device}' by using:\\n\"\nf\"low_rank_representation = low_rank_representation.to(b.device)\"\n)\nlogger.info(\"Using provided low rank representation, ignoring other parameters\")\nresult = low_rank_representation.projections @ (\ntorch.diag_embed(1.0 / low_rank_representation.eigen_vals)\n@ (low_rank_representation.projections.t() @ b.t())\n)\nreturn InverseHvpResult(\nx=result.t(),\ninfo={\n\"eigenvalues\": low_rank_representation.eigen_vals,\n\"eigenvectors\": low_rank_representation.projections,\n},\n)\n
    "},{"location":"api/pydvl/influence/torch/util/","title":"Util","text":""},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.TorchTensorContainerType","title":"TorchTensorContainerType = TypeVar('TorchTensorContainerType', torch.Tensor, Tuple[torch.Tensor, ...], Dict[str, torch.Tensor]) module-attribute","text":"

    Type variable for a PyTorch tensor or a container thereof.

    "},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.to_model_device","title":"to_model_device(x, model)","text":"

    Returns the tensor x moved to the device of the model, if device of model is set

    PARAMETER DESCRIPTION x

    The tensor to be moved to the device of the model.

    TYPE: Tensor

    model

    The model whose device will be used to move the tensor.

    TYPE: Module

    RETURNS DESCRIPTION Tensor

    The tensor x moved to the device of the model, if device of model is set.

    Source code in src/pydvl/influence/torch/util.py
    def to_model_device(x: torch.Tensor, model: torch.nn.Module) -> torch.Tensor:\n\"\"\"\n    Returns the tensor `x` moved to the device of the `model`, if device of model is set\n    Args:\n        x: The tensor to be moved to the device of the model.\n        model: The model whose device will be used to move the tensor.\n    Returns:\n        The tensor `x` moved to the device of the `model`, if device of model is set.\n    \"\"\"\nif hasattr(model, \"device\"):\nreturn x.to(model.device)\nreturn x\n
    "},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.flatten_tensors_to_vector","title":"flatten_tensors_to_vector(tensors)","text":"

    Flatten multiple tensors into a single 1D tensor (vector).

    This function takes an iterable of tensors and reshapes each of them into a 1D tensor. These reshaped tensors are then concatenated together into a single 1D tensor in the order they were given.

    PARAMETER DESCRIPTION tensors

    An iterable of tensors to be reshaped and concatenated.

    TYPE: Iterable[Tensor]

    RETURNS DESCRIPTION Tensor

    A 1D tensor that is the concatenation of all the reshaped input tensors.

    Source code in src/pydvl/influence/torch/util.py
    def flatten_tensors_to_vector(tensors: Iterable[torch.Tensor]) -> torch.Tensor:\n\"\"\"\n    Flatten multiple tensors into a single 1D tensor (vector).\n    This function takes an iterable of tensors and reshapes each of them into a 1D tensor.\n    These reshaped tensors are then concatenated together into a single 1D tensor in the order they were given.\n    Args:\n        tensors: An iterable of tensors to be reshaped and concatenated.\n    Returns:\n        A 1D tensor that is the concatenation of all the reshaped input tensors.\n    \"\"\"\nreturn torch.cat([t.contiguous().view(-1) for t in tensors])\n
    "},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.reshape_vector_to_tensors","title":"reshape_vector_to_tensors(input_vector, target_shapes)","text":"

    Reshape a 1D tensor into multiple tensors with specified shapes.

    This function takes a 1D tensor (input_vector) and reshapes it into a series of tensors with shapes given by 'target_shapes'. The reshaped tensors are returned as a tuple in the same order as their corresponding shapes.

    Note: The total number of elements in 'input_vector' must be equal to the sum of the products of the shapes in 'target_shapes'.

    PARAMETER DESCRIPTION input_vector

    The 1D tensor to be reshaped. Must be 1D.

    TYPE: Tensor

    target_shapes

    An iterable of tuples. Each tuple defines the shape of a tensor to be reshaped from the 'input_vector'.

    TYPE: Iterable[Tuple[int, ...]]

    RETURNS DESCRIPTION Tuple[Tensor, ...]

    A tuple of reshaped tensors.

    RAISES DESCRIPTION ValueError

    If 'input_vector' is not a 1D tensor or if the total number of elements in 'input_vector' does not match the sum of the products of the shapes in 'target_shapes'.

    Source code in src/pydvl/influence/torch/util.py
    def reshape_vector_to_tensors(\ninput_vector: torch.Tensor, target_shapes: Iterable[Tuple[int, ...]]\n) -> Tuple[torch.Tensor, ...]:\n\"\"\"\n    Reshape a 1D tensor into multiple tensors with specified shapes.\n    This function takes a 1D tensor (input_vector) and reshapes it into a series of tensors with shapes given by 'target_shapes'.\n    The reshaped tensors are returned as a tuple in the same order as their corresponding shapes.\n    Note: The total number of elements in 'input_vector' must be equal to the sum of the products of the shapes in 'target_shapes'.\n    Args:\n        input_vector: The 1D tensor to be reshaped. Must be 1D.\n        target_shapes: An iterable of tuples. Each tuple defines the shape of a tensor to be reshaped from the 'input_vector'.\n    Returns:\n        A tuple of reshaped tensors.\n    Raises:\n        ValueError: If 'input_vector' is not a 1D tensor or if the total number of elements in 'input_vector' does not match the sum of the products of the shapes in 'target_shapes'.\n    \"\"\"\nif input_vector.dim() != 1:\nraise ValueError(\"Input vector must be a 1D tensor\")\ntotal_elements = sum(math.prod(shape) for shape in target_shapes)\nif total_elements != input_vector.shape[0]:\nraise ValueError(\nf\"The total elements in shapes {total_elements} does not match the vector length {input_vector.shape[0]}\"\n)\ntensors = []\nstart = 0\nfor shape in target_shapes:\nsize = math.prod(shape)  # compute the total size of the tensor with this shape\ntensors.append(\ninput_vector[start : start + size].view(shape)\n)  # slice the vector and reshape it\nstart += size\nreturn tuple(tensors)\n
    "},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.align_structure","title":"align_structure(source, target)","text":"

    This function transforms target to have the same structure as source, i.e., it should be a dictionary with the same keys as source and each corresponding value in target should have the same shape as the value in source.

    PARAMETER DESCRIPTION source

    The reference dictionary containing PyTorch tensors.

    TYPE: Dict[str, Tensor]

    target

    The input to be harmonized. It can be a dictionary, tuple, or tensor.

    TYPE: TorchTensorContainerType

    RETURNS DESCRIPTION Dict[str, Tensor]

    The harmonized version of target.

    RAISES DESCRIPTION ValueError

    If target cannot be harmonized to match source.

    Source code in src/pydvl/influence/torch/util.py
    def align_structure(\nsource: Dict[str, torch.Tensor],\ntarget: TorchTensorContainerType,\n) -> Dict[str, torch.Tensor]:\n\"\"\"\n    This function transforms `target` to have the same structure as `source`, i.e.,\n    it should be a dictionary with the same keys as `source` and each corresponding\n    value in `target` should have the same shape as the value in `source`.\n    Args:\n        source: The reference dictionary containing PyTorch tensors.\n        target: The input to be harmonized. It can be a dictionary, tuple, or tensor.\n    Returns:\n        The harmonized version of `target`.\n    Raises:\n        ValueError: If `target` cannot be harmonized to match `source`.\n    \"\"\"\ntangent_dict: Dict[str, torch.Tensor]\nif isinstance(target, dict):\nif list(target.keys()) != list(source.keys()):\nraise ValueError(\"The keys in 'target' do not match the keys in 'source'.\")\nif [v.shape for v in target.values()] != [v.shape for v in source.values()]:\nraise ValueError(\n\"The shapes of the values in 'target' do not match the shapes of the values in 'source'.\"\n)\ntangent_dict = target\nelif isinstance(target, tuple) or isinstance(target, list):\nif [v.shape for v in target] != [v.shape for v in source.values()]:\nraise ValueError(\n\"'target' is a tuple/list but its elements' shapes do not match the shapes \"\n\"of the values in 'source'.\"\n)\ntangent_dict = dict(zip(source.keys(), target))\nelif isinstance(target, torch.Tensor):\ntry:\ntangent_dict = dict(\nzip(\nsource.keys(),\nreshape_vector_to_tensors(\ntarget, [p.shape for p in source.values()]\n),\n)\n)\nexcept Exception as e:\nraise ValueError(\nf\"'target' is a tensor but cannot be reshaped to match 'source'. Original error: {e}\"\n)\nelse:\nraise ValueError(f\"'target' is of type {type(target)} which is not supported.\")\nreturn tangent_dict\n
    "},{"location":"api/pydvl/influence/torch/util/#pydvl.influence.torch.util.as_tensor","title":"as_tensor(a, warn=True, **kwargs)","text":"

    Converts an array into a torch tensor.

    PARAMETER DESCRIPTION a

    Array to convert to tensor.

    TYPE: Any

    warn

    If True, warns that a will be converted.

    DEFAULT: True

    RETURNS DESCRIPTION Tensor

    A torch tensor converted from the input array.

    Source code in src/pydvl/influence/torch/util.py
    def as_tensor(a: Any, warn=True, **kwargs) -> torch.Tensor:\n\"\"\"\n    Converts an array into a torch tensor.\n    Args:\n        a: Array to convert to tensor.\n        warn: If True, warns that `a` will be converted.\n    Returns:\n        A torch tensor converted from the input array.\n    \"\"\"\nif warn and not isinstance(a, torch.Tensor):\nlogger.warning(\"Converting tensor to type torch.Tensor.\")\nreturn torch.as_tensor(a, **kwargs)\n
    "},{"location":"api/pydvl/parallel/","title":"Parallel","text":"

    This module provides a common interface to parallelization backends. The list of supported backends is here. Backends can be selected with the backend argument of an instance of ParallelConfig, as seen in the examples below.

    We use executors to submit tasks in parallel. The basic high-level pattern is

    from pydvl.parallel import init_executor, ParallelConfig\nconfig = ParallelConfig(backend=\"ray\")\nwith init_executor(max_workers=1, config=config) as executor:\nfuture = executor.submit(lambda x: x + 1, 1)\nresult = future.result()\nassert result == 2\n

    Running a map-reduce job is also easy:

    from pydvl.parallel import init_executor, ParallelConfig\nconfig = ParallelConfig(backend=\"joblib\")\nwith init_executor(config=config) as executor:\nresults = list(executor.map(lambda x: x + 1, range(5)))\nassert results == [1, 2, 3, 4, 5]\n

    There is an alternative map-reduce implementation MapReduceJob which internally uses joblib's higher level API with Parallel()

    "},{"location":"api/pydvl/parallel/backend/","title":"Backend","text":""},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.CancellationPolicy","title":"CancellationPolicy","text":"

    Bases: Flag

    Policy to use when cancelling futures after exiting an Executor.

    Note

    Not all backends support all policies.

    ATTRIBUTE DESCRIPTION NONE

    Do not cancel any futures.

    PENDING

    Cancel all pending futures, but not running ones.

    RUNNING

    Cancel all running futures, but not pending ones.

    ALL

    Cancel all pending and running futures.

    "},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.BaseParallelBackend","title":"BaseParallelBackend","text":"

    Abstract base class for all parallel backends.

    "},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.BaseParallelBackend.executor","title":"executor(max_workers=None, config=ParallelConfig(), cancel_futures=CancellationPolicy.PENDING) abstractmethod classmethod","text":"

    Returns an executor for the parallel backend.

    Source code in src/pydvl/parallel/backend.py
    @classmethod\n@abstractmethod\ndef executor(\ncls,\nmax_workers: int | None = None,\nconfig: ParallelConfig = ParallelConfig(),\ncancel_futures: CancellationPolicy = CancellationPolicy.PENDING,\n) -> Executor:\n\"\"\"Returns an executor for the parallel backend.\"\"\"\n...\n
    "},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.init_parallel_backend","title":"init_parallel_backend(config)","text":"

    Initializes the parallel backend and returns an instance of it.

    The following example creates a parallel backend instance with the default configuration, which is a local joblib backend.

    Example
    config = ParallelConfig()\nparallel_backend = init_parallel_backend(config)\n

    To create a parallel backend instance with a different backend, e.g. ray, you can pass the backend name as a string to the constructor of ParallelConfig.

    Example
    config = ParallelConfig(backend=\"ray\")\nparallel_backend = init_parallel_backend(config)\n
    PARAMETER DESCRIPTION config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig

    Source code in src/pydvl/parallel/backend.py
    def init_parallel_backend(config: ParallelConfig) -> BaseParallelBackend:\n\"\"\"Initializes the parallel backend and returns an instance of it.\n    The following example creates a parallel backend instance with the default\n    configuration, which is a local joblib backend.\n    ??? Example\n        ``` python\n        config = ParallelConfig()\n        parallel_backend = init_parallel_backend(config)\n        ```\n    To create a parallel backend instance with a different backend, e.g. ray,\n    you can pass the backend name as a string to the constructor of\n    [ParallelConfig][pydvl.utils.config.ParallelConfig].\n    ??? Example\n        ```python\n        config = ParallelConfig(backend=\"ray\")\n        parallel_backend = init_parallel_backend(config)\n        ```\n    Args:\n        config: instance of [ParallelConfig][pydvl.utils.config.ParallelConfig]\n            with cluster address, number of cpus, etc.\n    \"\"\"\ntry:\nparallel_backend_cls = BaseParallelBackend.BACKENDS[config.backend]\nexcept KeyError:\nraise NotImplementedError(f\"Unexpected parallel backend {config.backend}\")\nreturn parallel_backend_cls.create(config)  # type: ignore\n
    "},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.available_cpus","title":"available_cpus()","text":"

    Platform-independent count of available cores.

    FIXME: do we really need this or is os.cpu_count enough? Is this portable?

    RETURNS DESCRIPTION int

    Number of cores, or 1 if it is not possible to determine.

    Source code in src/pydvl/parallel/backend.py
    def available_cpus() -> int:\n\"\"\"Platform-independent count of available cores.\n    FIXME: do we really need this or is `os.cpu_count` enough? Is this portable?\n    Returns:\n        Number of cores, or 1 if it is not possible to determine.\n    \"\"\"\nfrom platform import system\nif system() != \"Linux\":\nreturn os.cpu_count() or 1\nreturn len(os.sched_getaffinity(0))  # type: ignore\n
    "},{"location":"api/pydvl/parallel/backend/#pydvl.parallel.backend.effective_n_jobs","title":"effective_n_jobs(n_jobs, config=ParallelConfig())","text":"

    Returns the effective number of jobs.

    This number may vary depending on the parallel backend and the resources available.

    PARAMETER DESCRIPTION n_jobs

    the number of jobs requested. If -1, the number of available CPUs is returned.

    TYPE: int

    config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    RETURNS DESCRIPTION int

    The effective number of jobs, guaranteed to be >= 1.

    RAISES DESCRIPTION RuntimeError

    if the effective number of jobs returned by the backend is < 1.

    Source code in src/pydvl/parallel/backend.py
    def effective_n_jobs(n_jobs: int, config: ParallelConfig = ParallelConfig()) -> int:\n\"\"\"Returns the effective number of jobs.\n    This number may vary depending on the parallel backend and the resources\n    available.\n    Args:\n        n_jobs: the number of jobs requested. If -1, the number of available\n            CPUs is returned.\n        config: instance of [ParallelConfig][pydvl.utils.config.ParallelConfig] with\n            cluster address, number of cpus, etc.\n    Returns:\n        The effective number of jobs, guaranteed to be >= 1.\n    Raises:\n        RuntimeError: if the effective number of jobs returned by the backend\n            is < 1.\n    \"\"\"\nparallel_backend = init_parallel_backend(config)\nif (eff_n_jobs := parallel_backend.effective_n_jobs(n_jobs)) < 1:\nraise RuntimeError(\nf\"Invalid number of jobs {eff_n_jobs} obtained from parallel backend {config.backend}\"\n)\nreturn eff_n_jobs\n
    "},{"location":"api/pydvl/parallel/config/","title":"Config","text":""},{"location":"api/pydvl/parallel/config/#pydvl.parallel.config.ParallelConfig","title":"ParallelConfig dataclass","text":"

    Configuration for parallel computation backend.

    PARAMETER DESCRIPTION backend

    Type of backend to use. Defaults to 'joblib'

    TYPE: Literal['joblib', 'ray'] DEFAULT: 'joblib'

    address

    Address of existing remote or local cluster to use.

    TYPE: Optional[Union[str, Tuple[str, int]]] DEFAULT: None

    n_cpus_local

    Number of CPUs to use when creating a local ray cluster. This has no effect when using an existing ray cluster.

    TYPE: Optional[int] DEFAULT: None

    logging_level

    Logging level for the parallel backend's worker.

    TYPE: int DEFAULT: WARNING

    wait_timeout

    Timeout in seconds for waiting on futures.

    TYPE: float DEFAULT: 1.0

    "},{"location":"api/pydvl/parallel/map_reduce/","title":"Map reduce","text":"

    This module contains a wrapper around joblib's Parallel() class that makes it easy to run map-reduce jobs.

    Deprecation

    This interface might be deprecated or changed in a future release before 1.0

    "},{"location":"api/pydvl/parallel/map_reduce/#pydvl.parallel.map_reduce.MapReduceJob","title":"MapReduceJob(inputs, map_func, reduce_func=identity, map_kwargs=None, reduce_kwargs=None, config=ParallelConfig(), *, n_jobs=-1, timeout=None)","text":"

    Bases: Generic[T, R]

    Takes an embarrassingly parallel fun and runs it in n_jobs parallel jobs, splitting the data evenly into a number of chunks equal to the number of jobs.

    Typing information for objects of this class requires the type of the inputs that are split for map_func and the type of its output.

    PARAMETER DESCRIPTION inputs

    The input that will be split and passed to map_func. if it's not a sequence object. It will be repeat n_jobs number of times.

    TYPE: Union[Collection[T], T]

    map_func

    Function that will be applied to the input chunks in each job.

    TYPE: MapFunction[R]

    reduce_func

    Function that will be applied to the results of map_func to reduce them.

    TYPE: ReduceFunction[R] DEFAULT: identity

    map_kwargs

    Keyword arguments that will be passed to map_func in each job. Alternatively, one can use functools.partial.

    TYPE: Optional[Dict] DEFAULT: None

    reduce_kwargs

    Keyword arguments that will be passed to reduce_func in each job. Alternatively, one can use functools.partial.

    TYPE: Optional[Dict] DEFAULT: None

    config

    Instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    n_jobs

    Number of parallel jobs to run. Does not accept 0

    TYPE: int DEFAULT: -1

    Example

    A simple usage example with 2 jobs:

    >>> from pydvl.parallel import MapReduceJob\n>>> import numpy as np\n>>> map_reduce_job: MapReduceJob[np.ndarray, np.ndarray] = MapReduceJob(\n...     np.arange(5),\n...     map_func=np.sum,\n...     reduce_func=np.sum,\n...     n_jobs=2,\n... )\n>>> map_reduce_job()\n10\n

    When passed a single object as input, it will be repeated for each job:

    >>> from pydvl.parallel import MapReduceJob\n>>> import numpy as np\n>>> map_reduce_job: MapReduceJob[int, np.ndarray] = MapReduceJob(\n...     5,\n...     map_func=lambda x: np.array([x]),\n...     reduce_func=np.sum,\n...     n_jobs=2,\n... )\n>>> map_reduce_job()\n10\n

    Source code in src/pydvl/parallel/map_reduce.py
    def __init__(\nself,\ninputs: Union[Collection[T], T],\nmap_func: MapFunction[R],\nreduce_func: ReduceFunction[R] = identity,\nmap_kwargs: Optional[Dict] = None,\nreduce_kwargs: Optional[Dict] = None,\nconfig: ParallelConfig = ParallelConfig(),\n*,\nn_jobs: int = -1,\ntimeout: Optional[float] = None,\n):\nself.config = config\nparallel_backend = init_parallel_backend(self.config)\nself.parallel_backend = parallel_backend\nself.timeout = timeout\n# This uses the setter defined below\nself.n_jobs = n_jobs\nself.inputs_ = inputs\nself.map_kwargs = map_kwargs if map_kwargs is not None else dict()\nself.reduce_kwargs = reduce_kwargs if reduce_kwargs is not None else dict()\nself._map_func = reduce(maybe_add_argument, [\"job_id\", \"seed\"], map_func)\nself._reduce_func = reduce_func\n
    "},{"location":"api/pydvl/parallel/map_reduce/#pydvl.parallel.map_reduce.MapReduceJob.n_jobs","title":"n_jobs: int property writable","text":"

    Effective number of jobs according to the used ParallelBackend instance.

    "},{"location":"api/pydvl/parallel/map_reduce/#pydvl.parallel.map_reduce.MapReduceJob.__call__","title":"__call__(seed=None)","text":"

    Runs the map-reduce job.

    PARAMETER DESCRIPTION seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Union[Seed, SeedSequence]] DEFAULT: None

    RETURNS DESCRIPTION R

    The result of the reduce function.

    Source code in src/pydvl/parallel/map_reduce.py
    def __call__(\nself,\nseed: Optional[Union[Seed, SeedSequence]] = None,\n) -> R:\n\"\"\"\n    Runs the map-reduce job.\n    Args:\n        seed: Either an instance of a numpy random number generator or a seed for\n            it.\n    Returns:\n         The result of the reduce function.\n    \"\"\"\nif self.config.backend == \"joblib\":\nbackend = \"loky\"\nelse:\nbackend = self.config.backend\n# In joblib the levels are reversed.\n# 0 means no logging and 50 means log everything to stdout\nverbose = 50 - self.config.logging_level\nseed_seq = ensure_seed_sequence(seed)\nwith Parallel(backend=backend, n_jobs=self.n_jobs, verbose=verbose) as parallel:\nchunks = self._chunkify(self.inputs_, n_chunks=self.n_jobs)\nmap_results: List[R] = parallel(\ndelayed(self._map_func)(\nnext_chunk, job_id=j, seed=seed, **self.map_kwargs\n)\nfor j, (next_chunk, seed) in enumerate(\nzip(chunks, seed_seq.spawn(len(chunks)))\n)\n)\nreduce_results: R = self._reduce_func(map_results, **self.reduce_kwargs)\nreturn reduce_results\n
    "},{"location":"api/pydvl/parallel/backends/","title":"Backends","text":""},{"location":"api/pydvl/parallel/backends/joblib/","title":"Joblib","text":""},{"location":"api/pydvl/parallel/backends/joblib/#pydvl.parallel.backends.joblib.JoblibParallelBackend","title":"JoblibParallelBackend(config)","text":"

    Bases: BaseParallelBackend

    Class used to wrap joblib to make it transparent to algorithms.

    It shouldn't be initialized directly. You should instead call init_parallel_backend().

    PARAMETER DESCRIPTION config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig

    Source code in src/pydvl/parallel/backends/joblib.py
    def __init__(self, config: ParallelConfig):\nself.config = {\n\"logging_level\": config.logging_level,\n\"n_jobs\": config.n_cpus_local,\n}\n
    "},{"location":"api/pydvl/parallel/backends/joblib/#pydvl.parallel.backends.joblib.JoblibParallelBackend.wrap","title":"wrap(fun, **kwargs)","text":"

    Wraps a function as a joblib delayed.

    PARAMETER DESCRIPTION fun

    the function to wrap

    TYPE: Callable

    RETURNS DESCRIPTION Callable

    The delayed function.

    Source code in src/pydvl/parallel/backends/joblib.py
    def wrap(self, fun: Callable, **kwargs) -> Callable:\n\"\"\"Wraps a function as a joblib delayed.\n    Args:\n        fun: the function to wrap\n    Returns:\n        The delayed function.\n    \"\"\"\nreturn delayed(fun)  # type: ignore\n
    "},{"location":"api/pydvl/parallel/backends/ray/","title":"Ray","text":""},{"location":"api/pydvl/parallel/backends/ray/#pydvl.parallel.backends.ray.RayParallelBackend","title":"RayParallelBackend(config)","text":"

    Bases: BaseParallelBackend

    Class used to wrap ray to make it transparent to algorithms.

    It shouldn't be initialized directly. You should instead call init_parallel_backend().

    PARAMETER DESCRIPTION config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig

    Source code in src/pydvl/parallel/backends/ray.py
    def __init__(self, config: ParallelConfig):\nself.config = {\"address\": config.address, \"logging_level\": config.logging_level}\nif self.config[\"address\"] is None:\nself.config[\"num_cpus\"] = config.n_cpus_local\nif not ray.is_initialized():\nray.init(**self.config)\n# Register ray joblib backend\nregister_ray()\n
    "},{"location":"api/pydvl/parallel/backends/ray/#pydvl.parallel.backends.ray.RayParallelBackend.wrap","title":"wrap(fun, **kwargs)","text":"

    Wraps a function as a ray remote.

    PARAMETER DESCRIPTION fun

    the function to wrap

    TYPE: Callable

    kwargs

    keyword arguments to pass to @ray.remote

    DEFAULT: {}

    RETURNS DESCRIPTION Callable

    The .remote method of the ray RemoteFunction.

    Source code in src/pydvl/parallel/backends/ray.py
    def wrap(self, fun: Callable, **kwargs) -> Callable:\n\"\"\"Wraps a function as a ray remote.\n    Args:\n        fun: the function to wrap\n        kwargs: keyword arguments to pass to @ray.remote\n    Returns:\n        The `.remote` method of the ray `RemoteFunction`.\n    \"\"\"\nif len(kwargs) > 0:\nreturn ray.remote(**kwargs)(fun).remote  # type: ignore\nreturn ray.remote(fun).remote  # type: ignore\n
    "},{"location":"api/pydvl/parallel/futures/","title":"Futures","text":""},{"location":"api/pydvl/parallel/futures/#pydvl.parallel.futures.init_executor","title":"init_executor(max_workers=None, config=ParallelConfig(), **kwargs)","text":"

    Initializes a futures executor for the given parallel configuration.

    PARAMETER DESCRIPTION max_workers

    Maximum number of concurrent tasks.

    TYPE: Optional[int] DEFAULT: None

    config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    kwargs

    Other optional parameter that will be passed to the executor.

    DEFAULT: {}

    Examples

    from pydvl.parallel import init_executor, ParallelConfig\nconfig = ParallelConfig(backend=\"ray\")\nwith init_executor(max_workers=1, config=config) as executor:\nfuture = executor.submit(lambda x: x + 1, 1)\nresult = future.result()\nassert result == 2\n
    from pydvl.parallel.futures import init_executor\nwith init_executor() as executor:\nresults = list(executor.map(lambda x: x + 1, range(5)))\nassert results == [1, 2, 3, 4, 5]\n

    Source code in src/pydvl/parallel/futures/__init__.py
    @contextmanager\ndef init_executor(\nmax_workers: Optional[int] = None,\nconfig: ParallelConfig = ParallelConfig(),\n**kwargs,\n) -> Generator[Executor, None, None]:\n\"\"\"Initializes a futures executor for the given parallel configuration.\n    Args:\n        max_workers: Maximum number of concurrent tasks.\n        config: instance of [ParallelConfig][pydvl.utils.config.ParallelConfig]\n            with cluster address, number of cpus, etc.\n        kwargs: Other optional parameter that will be passed to the executor.\n    ??? Examples\n        ``` python\n        from pydvl.parallel import init_executor, ParallelConfig\n        config = ParallelConfig(backend=\"ray\")\n        with init_executor(max_workers=1, config=config) as executor:\n            future = executor.submit(lambda x: x + 1, 1)\n            result = future.result()\n        assert result == 2\n        ```\n        ``` python\n        from pydvl.parallel.futures import init_executor\n        with init_executor() as executor:\n            results = list(executor.map(lambda x: x + 1, range(5)))\n        assert results == [1, 2, 3, 4, 5]\n        ```\n    \"\"\"\ntry:\ncls = BaseParallelBackend.BACKENDS[config.backend]\nwith cls.executor(max_workers=max_workers, config=config, **kwargs) as e:\nyield e\nexcept KeyError:\nraise NotImplementedError(f\"Unexpected parallel backend {config.backend}\")\n
    "},{"location":"api/pydvl/parallel/futures/ray/","title":"Ray","text":""},{"location":"api/pydvl/parallel/futures/ray/#pydvl.parallel.futures.ray.RayExecutor","title":"RayExecutor(max_workers=None, *, config=ParallelConfig(), cancel_futures=CancellationPolicy.ALL)","text":"

    Bases: Executor

    Asynchronous executor using Ray that implements the concurrent.futures API.

    It shouldn't be initialized directly. You should instead call init_executor().

    PARAMETER DESCRIPTION max_workers

    Maximum number of concurrent tasks. Each task can request itself any number of vCPUs. You must ensure the product of this value and the n_cpus_per_job parameter passed to submit() does not exceed available cluster resources. If set to None, it will default to the total number of vCPUs in the ray cluster.

    TYPE: Optional[int] DEFAULT: None

    config

    instance of ParallelConfig with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    cancel_futures

    Select which futures will be cancelled when exiting this context manager. Pending is the default, which will cancel all pending futures, but not running ones, as done by concurrent.futures.ProcessPoolExecutor. Additionally, All cancels all pending and running futures, and None doesn't cancel any. See CancellationPolicy

    TYPE: CancellationPolicy DEFAULT: ALL

    Source code in src/pydvl/parallel/futures/ray.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.8.0\",\nargs_mapping={\"cancel_futures_on_exit\": \"cancel_futures\"},\n)\ndef __init__(\nself,\nmax_workers: Optional[int] = None,\n*,\nconfig: ParallelConfig = ParallelConfig(),\ncancel_futures: CancellationPolicy = CancellationPolicy.ALL,\n):\nif config.backend != \"ray\":\nraise ValueError(\nf\"Parallel backend must be set to 'ray' and not '{config.backend}'\"\n)\nif max_workers is not None:\nif max_workers <= 0:\nraise ValueError(\"max_workers must be greater than 0\")\nmax_workers = max_workers\nif isinstance(cancel_futures, CancellationPolicy):\nself._cancel_futures = cancel_futures\nelse:\nself._cancel_futures = (\nCancellationPolicy.PENDING\nif cancel_futures\nelse CancellationPolicy.NONE\n)\nself.config = {\"address\": config.address, \"logging_level\": config.logging_level}\nif config.address is None:\nself.config[\"num_cpus\"] = config.n_cpus_local\nif not ray.is_initialized():\nray.init(**self.config)\nself._max_workers = max_workers\nif self._max_workers is None:\nself._max_workers = int(ray._private.state.cluster_resources()[\"CPU\"])\nself._shutdown = False\nself._shutdown_lock = threading.Lock()\nself._queue_lock = threading.Lock()\nself._work_queue: \"queue.Queue[Optional[_WorkItem]]\" = queue.Queue(\nmaxsize=self._max_workers\n)\nself._pending_queue: \"queue.SimpleQueue[Optional[_WorkItem]]\" = (\nqueue.SimpleQueue()\n)\n# Work Item Manager Thread\nself._work_item_manager_thread: Optional[_WorkItemManagerThread] = None\n
    "},{"location":"api/pydvl/parallel/futures/ray/#pydvl.parallel.futures.ray.RayExecutor.submit","title":"submit(fn, *args, **kwargs)","text":"

    Submits a callable to be executed with the given arguments.

    Schedules the callable to be executed as fn(*args, **kwargs) and returns a Future instance representing the execution of the callable.

    PARAMETER DESCRIPTION fn

    Callable.

    TYPE: Callable[..., T]

    args

    Positional arguments that will be passed to fn.

    DEFAULT: ()

    kwargs

    Keyword arguments that will be passed to fn. It can also optionally contain options for the ray remote function as a dictionary as the keyword argument remote_function_options.

    DEFAULT: {}

    Returns: A Future representing the given call.

    RAISES DESCRIPTION RuntimeError

    If a task is submitted after the executor has been shut down.

    Source code in src/pydvl/parallel/futures/ray.py
    def submit(self, fn: Callable[..., T], *args, **kwargs) -> \"Future[T]\":\nr\"\"\"Submits a callable to be executed with the given arguments.\n    Schedules the callable to be executed as fn(\\*args, \\**kwargs)\n    and returns a Future instance representing the execution of the callable.\n    Args:\n        fn: Callable.\n        args: Positional arguments that will be passed to `fn`.\n        kwargs: Keyword arguments that will be passed to `fn`.\n            It can also optionally contain options for the ray remote function\n            as a dictionary as the keyword argument `remote_function_options`.\n    Returns:\n        A Future representing the given call.\n    Raises:\n        RuntimeError: If a task is submitted after the executor has been shut down.\n    \"\"\"\nwith self._shutdown_lock:\nlogger.debug(\"executor acquired shutdown lock\")\nif self._shutdown:\nraise RuntimeError(\"cannot schedule new futures after shutdown\")\nlogging.debug(\"Creating future and putting work item in work queue\")\nfuture: \"Future[T]\" = Future()\nremote_function_options = kwargs.pop(\"remote_function_options\", None)\nw = _WorkItem(\nfuture,\nfn,\nargs,\nkwargs,\nremote_function_options=remote_function_options,\n)\nself._put_work_item_in_queue(w)\n# We delay starting the thread until the first call to submit\nself._start_work_item_manager_thread()\nreturn future\n
    "},{"location":"api/pydvl/parallel/futures/ray/#pydvl.parallel.futures.ray.RayExecutor.shutdown","title":"shutdown(wait=True, *, cancel_futures=None)","text":"

    Clean up the resources associated with the Executor.

    This method tries to mimic the behaviour of Executor.shutdown while allowing one more value for cancel_futures which instructs it to use the CancellationPolicy defined upon construction.

    PARAMETER DESCRIPTION wait

    Whether to wait for pending futures to finish.

    TYPE: bool DEFAULT: True

    cancel_futures

    Overrides the executor's default policy for cancelling futures on exit. If True, all pending futures are cancelled, and if False, no futures are cancelled. If None (default), the executor's policy set at initialization is used.

    TYPE: Optional[bool] DEFAULT: None

    Source code in src/pydvl/parallel/futures/ray.py
    def shutdown(\nself, wait: bool = True, *, cancel_futures: Optional[bool] = None\n) -> None:\n\"\"\"Clean up the resources associated with the Executor.\n    This method tries to mimic the behaviour of\n    [Executor.shutdown][concurrent.futures.Executor.shutdown]\n    while allowing one more value for ``cancel_futures`` which instructs it\n    to use the [CancellationPolicy][pydvl.parallel.backend.CancellationPolicy]\n    defined upon construction.\n    Args:\n        wait: Whether to wait for pending futures to finish.\n        cancel_futures: Overrides the executor's default policy for\n            cancelling futures on exit. If ``True``, all pending futures are\n            cancelled, and if ``False``, no futures are cancelled. If ``None``\n            (default), the executor's policy set at initialization is used.\n    \"\"\"\nlogger.debug(\"executor shutting down\")\nwith self._shutdown_lock:\nlogger.debug(\"executor acquired shutdown lock\")\nself._shutdown = True\nself._cancel_futures = {\nNone: self._cancel_futures,\nTrue: CancellationPolicy.PENDING,\nFalse: CancellationPolicy.NONE,\n}[cancel_futures]\nif wait:\nlogger.debug(\"executor waiting for futures to finish\")\nif self._work_item_manager_thread is not None:\n# Putting None in the queue to signal\n# to work item manager thread that we are shutting down\nself._put_work_item_in_queue(None)\nlogger.debug(\n\"executor waiting for work item manager thread to terminate\"\n)\nself._work_item_manager_thread.join()\n# To reduce the risk of opening too many files, remove references to\n# objects that use file descriptors.\nself._work_item_manager_thread = None\ndel self._work_queue\ndel self._pending_queue\n
    "},{"location":"api/pydvl/parallel/futures/ray/#pydvl.parallel.futures.ray.RayExecutor.__exit__","title":"__exit__(exc_type, exc_val, exc_tb)","text":"

    Exit the runtime context related to the RayExecutor object.

    Source code in src/pydvl/parallel/futures/ray.py
    def __exit__(self, exc_type, exc_val, exc_tb):\n\"\"\"Exit the runtime context related to the RayExecutor object.\"\"\"\nself.shutdown()\nreturn False\n
    "},{"location":"api/pydvl/reporting/","title":"Reporting","text":""},{"location":"api/pydvl/reporting/plots/","title":"Plots","text":""},{"location":"api/pydvl/reporting/plots/#pydvl.reporting.plots.shaded_mean_std","title":"shaded_mean_std(data, abscissa=None, num_std=1.0, mean_color='dodgerblue', shade_color='lightblue', title=None, xlabel=None, ylabel=None, ax=None, **kwargs)","text":"

    The usual mean \\(\\pm\\) std deviation plot to aggregate runs of experiments.

    Deprecation notice

    This function is bogus and will be removed in the future in favour of properly computed confidence intervals.

    PARAMETER DESCRIPTION data

    axis 0 is to be aggregated on (e.g. runs) and axis 1 is the data for each run.

    TYPE: ndarray

    abscissa

    values for the x-axis. Leave empty to use increasing integers.

    TYPE: Optional[Sequence[Any]] DEFAULT: None

    num_std

    number of standard deviations to shade around the mean.

    TYPE: float DEFAULT: 1.0

    mean_color

    color for the mean

    TYPE: Optional[str] DEFAULT: 'dodgerblue'

    shade_color

    color for the shaded region

    TYPE: Optional[str] DEFAULT: 'lightblue'

    title

    Title text. To use mathematics, use LaTeX notation.

    TYPE: Optional[str] DEFAULT: None

    xlabel

    Text for the horizontal axis.

    TYPE: Optional[str] DEFAULT: None

    ylabel

    Text for the vertical axis

    TYPE: Optional[str] DEFAULT: None

    ax

    If passed, axes object into which to insert the figure. Otherwise, a new figure is created and returned

    TYPE: Optional[Axes] DEFAULT: None

    kwargs

    these are forwarded to the ax.plot() call for the mean.

    DEFAULT: {}

    RETURNS DESCRIPTION Axes

    The axes used (or created)

    Source code in src/pydvl/reporting/plots.py
    @deprecated(target=None, deprecated_in=\"0.7.1\", remove_in=\"0.9.0\")\ndef shaded_mean_std(\ndata: np.ndarray,\nabscissa: Optional[Sequence[Any]] = None,\nnum_std: float = 1.0,\nmean_color: Optional[str] = \"dodgerblue\",\nshade_color: Optional[str] = \"lightblue\",\ntitle: Optional[str] = None,\nxlabel: Optional[str] = None,\nylabel: Optional[str] = None,\nax: Optional[Axes] = None,\n**kwargs,\n) -> Axes:\nr\"\"\"The usual mean \\(\\pm\\) std deviation plot to aggregate runs of\n    experiments.\n    !!! warning \"Deprecation notice\"\n        This function is bogus and will be removed in the future in favour of\n        properly computed confidence intervals.\n    Args:\n        data: axis 0 is to be aggregated on (e.g. runs) and axis 1 is the\n            data for each run.\n        abscissa: values for the x-axis. Leave empty to use increasing integers.\n        num_std: number of standard deviations to shade around the mean.\n        mean_color: color for the mean\n        shade_color: color for the shaded region\n        title: Title text. To use mathematics, use LaTeX notation.\n        xlabel: Text for the horizontal axis.\n        ylabel: Text for the vertical axis\n        ax: If passed, axes object into which to insert the figure. Otherwise,\n            a new figure is created and returned\n        kwargs: these are forwarded to the ax.plot() call for the mean.\n    Returns:\n        The axes used (or created)\n    \"\"\"\nassert len(data.shape) == 2\nmean = data.mean(axis=0)\nstd = num_std * data.std(axis=0)\nif ax is None:\nfig, ax = plt.subplots()\nif abscissa is None:\nabscissa = list(range(data.shape[1]))\nax.fill_between(abscissa, mean - std, mean + std, alpha=0.3, color=shade_color)\nax.plot(abscissa, mean, color=mean_color, **kwargs)\nax.set_title(title)\nax.set_xlabel(xlabel)\nax.set_ylabel(ylabel)\nreturn ax\n
    "},{"location":"api/pydvl/reporting/plots/#pydvl.reporting.plots.plot_ci_array","title":"plot_ci_array(data, level, type='normal', abscissa=None, mean_color='dodgerblue', shade_color='lightblue', ax=None, **kwargs)","text":"

    Plot values and a confidence interval from a 2D array.

    Supported intervals are based on the normal and the t distributions.

    PARAMETER DESCRIPTION data

    A 2D array with M different values for each of the N indices.

    TYPE: NDArray

    level

    The confidence level.

    TYPE: float

    type

    The type of confidence interval to use.

    TYPE: Literal['normal', 't', 'auto'] DEFAULT: 'normal'

    abscissa

    The values for the x-axis. Leave empty to use increasing integers.

    TYPE: Optional[Sequence[str]] DEFAULT: None

    mean_color

    The color of the mean line.

    TYPE: Optional[str] DEFAULT: 'dodgerblue'

    shade_color

    The color of the confidence interval.

    TYPE: Optional[str] DEFAULT: 'lightblue'

    ax

    If passed, axes object into which to insert the figure. Otherwise, a new figure is created and the axes returned.

    TYPE: Optional[Axes] DEFAULT: None

    **kwargs

    Additional arguments to pass to the plot function.

    DEFAULT: {}

    RETURNS DESCRIPTION Axes

    The matplotlib axes.

    Source code in src/pydvl/reporting/plots.py
    def plot_ci_array(\ndata: NDArray,\nlevel: float,\ntype: Literal[\"normal\", \"t\", \"auto\"] = \"normal\",\nabscissa: Optional[Sequence[str]] = None,\nmean_color: Optional[str] = \"dodgerblue\",\nshade_color: Optional[str] = \"lightblue\",\nax: Optional[plt.Axes] = None,\n**kwargs,\n) -> plt.Axes:\n\"\"\"Plot values and a confidence interval from a 2D array.\n    Supported intervals are based on the normal and the t distributions.\n    Args:\n        data: A 2D array with M different values for each of the N indices.\n        level: The confidence level.\n        type: The type of confidence interval to use.\n        abscissa: The values for the x-axis. Leave empty to use increasing\n            integers.\n        mean_color: The color of the mean line.\n        shade_color: The color of the confidence interval.\n        ax: If passed, axes object into which to insert the figure. Otherwise,\n            a new figure is created and the axes returned.\n        **kwargs: Additional arguments to pass to the plot function.\n    Returns:\n        The matplotlib axes.\n    \"\"\"\nm, n = data.shape\nmeans = np.mean(data, axis=0)\nvariances = np.var(data, axis=0, ddof=1)\ndummy: ValuationResult[np.int_, str] = ValuationResult(\nalgorithm=\"dummy\",\nvalues=means,\nvariances=variances,\ncounts=np.ones_like(means, dtype=np.int_) * m,\nindices=np.arange(n),\ndata_names=np.array(abscissa, dtype=str)\nif abscissa is not None\nelse np.arange(n, dtype=str),\n)\nreturn plot_ci_values(\ndummy,\nlevel=level,\ntype=type,\nmean_color=mean_color,\nshade_color=shade_color,\nax=ax,\n**kwargs,\n)\n
    "},{"location":"api/pydvl/reporting/plots/#pydvl.reporting.plots.plot_ci_values","title":"plot_ci_values(values, level, type='auto', abscissa=None, mean_color='dodgerblue', shade_color='lightblue', ax=None, **kwargs)","text":"

    Plot values and a confidence interval.

    Uses values.data_names for the x-axis.

    Supported intervals are based on the normal and the t distributions.

    PARAMETER DESCRIPTION values

    The valuation result.

    TYPE: ValuationResult

    level

    The confidence level.

    TYPE: float

    type

    The type of confidence interval to use. If \"auto\", uses \"norm\" if the minimum number of updates for all indices is greater than 30, otherwise uses \"t\".

    TYPE: Literal['normal', 't', 'auto'] DEFAULT: 'auto'

    abscissa

    The values for the x-axis. Leave empty to use increasing integers.

    TYPE: Optional[Sequence[str]] DEFAULT: None

    mean_color

    The color of the mean line.

    TYPE: Optional[str] DEFAULT: 'dodgerblue'

    shade_color

    The color of the confidence interval.

    TYPE: Optional[str] DEFAULT: 'lightblue'

    ax

    If passed, axes object into which to insert the figure. Otherwise, a new figure is created and the axes returned.

    TYPE: Optional[Axes] DEFAULT: None

    **kwargs

    Additional arguments to pass to the plot function.

    DEFAULT: {}

    RETURNS DESCRIPTION

    The matplotlib axes.

    Source code in src/pydvl/reporting/plots.py
    def plot_ci_values(\nvalues: ValuationResult,\nlevel: float,\ntype: Literal[\"normal\", \"t\", \"auto\"] = \"auto\",\nabscissa: Optional[Sequence[str]] = None,\nmean_color: Optional[str] = \"dodgerblue\",\nshade_color: Optional[str] = \"lightblue\",\nax: Optional[plt.Axes] = None,\n**kwargs,\n):\n\"\"\"Plot values and a confidence interval.\n    Uses `values.data_names` for the x-axis.\n    Supported intervals are based on the normal and the t distributions.\n    Args:\n        values: The valuation result.\n        level: The confidence level.\n        type: The type of confidence interval to use. If \"auto\", uses \"norm\" if\n            the minimum number of updates for all indices is greater than 30,\n            otherwise uses \"t\".\n        abscissa: The values for the x-axis. Leave empty to use increasing\n            integers.\n        mean_color: The color of the mean line.\n        shade_color: The color of the confidence interval.\n        ax: If passed, axes object into which to insert the figure. Otherwise,\n            a new figure is created and the axes returned.\n        **kwargs: Additional arguments to pass to the plot function.\n    Returns:\n        The matplotlib axes.\n    \"\"\"\nppfs = {\n\"normal\": norm.ppf,\n\"t\": partial(t.ppf, df=values.counts - 1),\n\"auto\": norm.ppf\nif np.min(values.counts) > 30\nelse partial(t.ppf, df=values.counts - 1),\n}\ntry:\nscore = ppfs[type](1 - level / 2)\nexcept KeyError:\nraise ValueError(\nf\"Unknown confidence interval type requested: {type}.\"\n) from None\nif abscissa is None:\nabscissa = [str(i) for i, _ in enumerate(values)]\nbound = score * values.stderr\nif ax is None:\nfig, ax = plt.subplots()\nax.fill_between(\nabscissa,\nvalues.values - bound,\nvalues.values + bound,\nalpha=0.3,\ncolor=shade_color,\n)\nax.plot(abscissa, values.values, color=mean_color, **kwargs)\nreturn ax\n
    "},{"location":"api/pydvl/reporting/plots/#pydvl.reporting.plots.spearman_correlation","title":"spearman_correlation(vv, num_values, pvalue)","text":"

    Simple matrix plots with spearman correlation for each pair in vv.

    PARAMETER DESCRIPTION vv

    list of OrderedDicts with index: value. Spearman correlation is computed for the keys.

    TYPE: List[OrderedDict]

    num_values

    Use only these many values from the data (from the start of the OrderedDicts)

    TYPE: int

    pvalue

    correlation coefficients for which the p-value is below the threshold pvalue/len(vv) will be discarded.

    TYPE: float

    Source code in src/pydvl/reporting/plots.py
    def spearman_correlation(vv: List[OrderedDict], num_values: int, pvalue: float):\n\"\"\"Simple matrix plots with spearman correlation for each pair in vv.\n    Args:\n        vv: list of OrderedDicts with index: value. Spearman correlation\n            is computed for the keys.\n        num_values: Use only these many values from the data (from the start\n            of the OrderedDicts)\n        pvalue: correlation coefficients for which the p-value is below the\n            threshold `pvalue/len(vv)` will be discarded.\n    \"\"\"\nr: np.ndarray = np.ndarray((len(vv), len(vv)))\np: np.ndarray = np.ndarray((len(vv), len(vv)))\nfor i, a in enumerate(vv):\nfor j, b in enumerate(vv):\nfrom scipy.stats._stats_py import SpearmanrResult\nspearman: SpearmanrResult = sp.stats.spearmanr(\nlist(a.keys())[:num_values], list(b.keys())[:num_values]\n)\nr[i][j] = (\nspearman.correlation if spearman.pvalue < pvalue / len(vv) else np.nan\n)  # Bonferroni correction\np[i][j] = spearman.pvalue\nfig, axs = plt.subplots(1, 2, figsize=(16, 7))\nplot1 = axs[0].matshow(r, vmin=-1, vmax=1)\naxs[0].set_title(f\"Spearman correlation (top {num_values} values)\")\naxs[0].set_xlabel(\"Runs\")\naxs[0].set_ylabel(\"Runs\")\nfig.colorbar(plot1, ax=axs[0])\nplot2 = axs[1].matshow(p, vmin=0, vmax=1)\naxs[1].set_title(\"p-value\")\naxs[1].set_xlabel(\"Runs\")\naxs[1].set_ylabel(\"Runs\")\nfig.colorbar(plot2, ax=axs[1])\nreturn fig\n
    "},{"location":"api/pydvl/reporting/plots/#pydvl.reporting.plots.plot_shapley","title":"plot_shapley(df, *, level=0.05, ax=None, title=None, xlabel=None, ylabel=None)","text":"

    Plots the shapley values, as returned from compute_shapley_values, with error bars corresponding to an \\(\\alpha\\)-level Normal confidence interval.

    PARAMETER DESCRIPTION df

    dataframe with the shapley values

    TYPE: DataFrame

    level

    confidence level for the error bars

    TYPE: float DEFAULT: 0.05

    ax

    axes to plot on or None if a new subplots should be created

    TYPE: Optional[Axes] DEFAULT: None

    title

    string, title of the plot

    TYPE: Optional[str] DEFAULT: None

    xlabel

    string, x label of the plot

    TYPE: Optional[str] DEFAULT: None

    ylabel

    string, y label of the plot

    TYPE: Optional[str] DEFAULT: None

    RETURNS DESCRIPTION Axes

    The axes created or used

    Source code in src/pydvl/reporting/plots.py
    def plot_shapley(\ndf: pd.DataFrame,\n*,\nlevel: float = 0.05,\nax: Optional[plt.Axes] = None,\ntitle: Optional[str] = None,\nxlabel: Optional[str] = None,\nylabel: Optional[str] = None,\n) -> plt.Axes:\nr\"\"\"Plots the shapley values, as returned from\n    [compute_shapley_values][pydvl.value.shapley.common.compute_shapley_values],\n    with error bars corresponding to an $\\alpha$-level Normal confidence\n    interval.\n    Args:\n        df: dataframe with the shapley values\n        level: confidence level for the error bars\n        ax: axes to plot on or None if a new subplots should be created\n        title: string, title of the plot\n        xlabel: string, x label of the plot\n        ylabel: string, y label of the plot\n    Returns:\n        The axes created or used\n    \"\"\"\nif ax is None:\n_, ax = plt.subplots()\nyerr = norm.ppf(1 - level / 2) * df[\"data_value_stderr\"]\nax.errorbar(x=df.index, y=df[\"data_value\"], yerr=yerr, fmt=\"o\", capsize=6)\nax.set_xlabel(xlabel)\nax.set_ylabel(ylabel)\nax.set_title(title)\nplt.xticks(rotation=60)\nreturn ax\n
    "},{"location":"api/pydvl/reporting/plots/#pydvl.reporting.plots.plot_influence_distribution_by_label","title":"plot_influence_distribution_by_label(influences, labels, title_extra='')","text":"

    Plots the histogram of the influence that all samples in the training set have over a single sample index, separated by labels.

    PARAMETER DESCRIPTION influences

    array of influences (training samples x test samples)

    TYPE: NDArray[float_]

    labels

    labels for the training set.

    TYPE: NDArray[float_]

    title_extra

    TYPE: str DEFAULT: ''

    Source code in src/pydvl/reporting/plots.py
    def plot_influence_distribution_by_label(\ninfluences: NDArray[np.float_], labels: NDArray[np.float_], title_extra: str = \"\"\n):\n\"\"\"Plots the histogram of the influence that all samples in the training set\n    have over a single sample index, separated by labels.\n    Args:\n       influences: array of influences (training samples x test samples)\n       labels: labels for the training set.\n       title_extra:\n    \"\"\"\n_, ax = plt.subplots()\nunique_labels = np.unique(labels)\nfor label in unique_labels:\nax.hist(influences[labels == label], label=label, alpha=0.7)\nax.set_xlabel(\"Influence values\")\nax.set_ylabel(\"Number of samples\")\nax.set_title(f\"Distribution of influences \" + title_extra)\nax.legend()\nplt.show()\n
    "},{"location":"api/pydvl/reporting/scores/","title":"Scores","text":""},{"location":"api/pydvl/reporting/scores/#pydvl.reporting.scores.compute_removal_score","title":"compute_removal_score(u, values, percentages, *, remove_best=False, progress=False)","text":"

    Fits model and computes score on the test set after incrementally removing a percentage of data points from the training set, based on their values.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    values

    Data values of data instances in the training set.

    TYPE: ValuationResult

    percentages

    Sequence of removal percentages.

    TYPE: Union[NDArray[float_], Iterable[float]]

    remove_best

    If True, removes data points in order of decreasing valuation.

    TYPE: bool DEFAULT: False

    progress

    If True, display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION Dict[float, float]

    Dictionary that maps the percentages to their respective scores.

    Source code in src/pydvl/reporting/scores.py
    def compute_removal_score(\nu: Utility,\nvalues: ValuationResult,\npercentages: Union[NDArray[np.float_], Iterable[float]],\n*,\nremove_best: bool = False,\nprogress: bool = False,\n) -> Dict[float, float]:\nr\"\"\"Fits model and computes score on the test set after incrementally removing\n    a percentage of data points from the training set, based on their values.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        values: Data values of data instances in the training set.\n        percentages: Sequence of removal percentages.\n        remove_best: If True, removes data points in order of decreasing valuation.\n        progress: If True, display a progress bar.\n    Returns:\n        Dictionary that maps the percentages to their respective scores.\n    \"\"\"\n# Sanity checks\nif np.any([x >= 1.0 or x < 0.0 for x in percentages]):\nraise ValueError(\"All percentages should be in the range [0.0, 1.0)\")\nif len(values) != len(u.data.indices):\nraise ValueError(\nf\"The number of values, {len(values) }, should be equal to the number of data indices, {len(u.data.indices)}\"\n)\nscores = {}\n# We sort in descending order if we want to remove the best values\nvalues.sort(reverse=remove_best)\nfor pct in maybe_progress(percentages, display=progress, desc=\"Removal Scores\"):\nn_removal = int(pct * len(u.data))\nindices = values.indices[n_removal:]\nscore = u(indices)\nscores[pct] = score\nreturn scores\n
    "},{"location":"api/pydvl/utils/","title":"Utils","text":""},{"location":"api/pydvl/utils/caching/","title":"Caching","text":"

    Distributed caching of functions.

    pyDVL uses memcached to cache utility values, through pymemcache. This allows sharing evaluations across processes and nodes in a cluster. You can run memcached as a service, locally or remotely, see Setting up the cache

    Warning

    Function evaluations are cached with a key based on the function's signature and code. This can lead to undesired cache hits, see Cache reuse.

    Remember not to reuse utility objects for different datasets.

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching--configuration","title":"Configuration","text":"

    Memoization is disabled by default but can be enabled easily, see Setting up the cache. When enabled, it will be added to any callable used to construct a Utility (done with the decorator @memcached). Depending on the nature of the utility you might want to enable the computation of a running average of function values, see Usage with stochastic functions. You can see all configuration options under MemcachedConfig.

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching--default-configuration","title":"Default configuration","text":"
    default_config = dict(\nserver=('localhost', 11211),\nconnect_timeout=1.0,\ntimeout=0.1,\n# IMPORTANT! Disable small packet consolidation:\nno_delay=True,\nserde=serde.PickleSerde(pickle_version=PICKLE_VERSION)\n)\n
    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching--usage-with-stochastic-functions","title":"Usage with stochastic functions","text":"

    In addition to standard memoization, the decorator memcached() can compute running average and standard error of repeated evaluations for the same input. This can be useful for stochastic functions with high variance (e.g. model training for small sample sizes), but drastically reduces the speed benefits of memoization.

    This behaviour can be activated with the argument allow_repeated_evaluations to memcached().

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching--cache-reuse","title":"Cache reuse","text":"

    When working directly with memcached(), it is essential to only cache pure functions. If they have any kind of state, either internal or external (e.g. a closure over some data that may change), then the cache will fail to notice this and the same value will be returned.

    When a function is wrapped with memcached() for memoization, its signature (input and output names) and code are used as a key for the cache. Alternatively you can pass a custom value to be used as key with

    cached_fun = memcached(**asdict(cache_options))(fun, signature=custom_signature)\n

    If you are running experiments with the same Utility but different datasets, this will lead to evaluations of the utility on new data returning old values because utilities only use sample indices as arguments (so there is no way to tell the difference between '1' for dataset A and '1' for dataset 2 from the point of view of the cache). One solution is to empty the cache between runs, but the preferred one is to use a different Utility object for each dataset.

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching--unexpected-cache-misses","title":"Unexpected cache misses","text":"

    Because all arguments to a function are used as part of the key for the cache, sometimes one must exclude some of them. For example, If a function is going to run across multiple processes and some reporting arguments are added (like a job_id for logging purposes), these will be part of the signature and make the functions distinct to the eyes of the cache. This can be avoided with the use of ignore_args in the configuration.

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching.CacheStats","title":"CacheStats dataclass","text":"

    Statistics gathered by cached functions.

    ATTRIBUTE DESCRIPTION sets

    number of times a value was set in the cache

    TYPE: int

    misses

    number of times a value was not found in the cache

    TYPE: int

    hits

    number of times a value was found in the cache

    TYPE: int

    timeouts

    number of times a timeout occurred

    TYPE: int

    errors

    number of times an error occurred

    TYPE: int

    reconnects

    number of times the client reconnected to the server

    TYPE: int

    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching.serialize","title":"serialize(x)","text":"

    Serialize an object to bytes. Args: x: object to serialize.

    RETURNS DESCRIPTION bytes

    serialized object.

    Source code in src/pydvl/utils/caching.py
    def serialize(x: Any) -> bytes:\n\"\"\"Serialize an object to bytes.\n    Args:\n        x: object to serialize.\n    Returns:\n        serialized object.\n    \"\"\"\npickled_output = BytesIO()\npickler = Pickler(pickled_output, PICKLE_VERSION)\npickler.dump(x)\nreturn pickled_output.getvalue()\n
    "},{"location":"api/pydvl/utils/caching/#pydvl.utils.caching.memcached","title":"memcached(client_config=None, time_threshold=0.3, allow_repeated_evaluations=False, rtol_stderr=0.1, min_repetitions=3, ignore_args=None)","text":"

    Transparent, distributed memoization of function calls.

    Given a function and its signature, memcached uses a distributed cache that, for each set of inputs, keeps track of the average returned value, with variance and number of times it was calculated.

    If the function is deterministic, i.e. same input corresponds to the same exact output, set allow_repeated_evaluations to False. If instead the function is stochastic (like the training of a model depending on random initializations), memcached() allows to set a minimum number of evaluations to compute a running average, and a tolerance after which the function will not be called anymore. In other words, the function will be recomputed until the value has stabilized with a standard error smaller than rtol_stderr * running average.

    Warning

    Do not cache functions with state! See Cache reuse

    Example
    cached_fun = memcached(**asdict(cache_options))(heavy_computation)\n
    PARAMETER DESCRIPTION client_config

    configuration for pymemcache's Client. Will be merged on top of the default configuration (see below).

    TYPE: Optional[MemcachedClientConfig] DEFAULT: None

    time_threshold

    computations taking less time than this many seconds are not cached.

    TYPE: float DEFAULT: 0.3

    allow_repeated_evaluations

    If True, repeated calls to a function with the same arguments will be allowed and outputs averaged until the running standard deviation of the mean stabilizes below rtol_stderr * mean.

    TYPE: bool DEFAULT: False

    rtol_stderr

    relative tolerance for repeated evaluations. More precisely, memcached() will stop evaluating the function once the standard deviation of the mean is smaller than rtol_stderr * mean.

    TYPE: float DEFAULT: 0.1

    min_repetitions

    minimum number of times that a function evaluation on the same arguments is repeated before returning cached values. Useful for stochastic functions only. If the model training is very noisy, set this number to higher values to reduce variance.

    TYPE: int DEFAULT: 3

    ignore_args

    Do not take these keyword arguments into account when hashing the wrapped function for usage as key in memcached. This allows sharing the cache among different jobs for the same experiment run if the callable happens to have \"nuisance\" parameters like job_id which do not affect the result of the computation.

    TYPE: Optional[Iterable[str]] DEFAULT: None

    RETURNS DESCRIPTION Callable[[Callable[..., T], bytes | None], Callable[..., T]]

    A wrapped function

    Source code in src/pydvl/utils/caching.py
    def memcached(\nclient_config: Optional[MemcachedClientConfig] = None,\ntime_threshold: float = 0.3,\nallow_repeated_evaluations: bool = False,\nrtol_stderr: float = 0.1,\nmin_repetitions: int = 3,\nignore_args: Optional[Iterable[str]] = None,\n) -> Callable[[Callable[..., T], bytes | None], Callable[..., T]]:\n\"\"\"\n    Transparent, distributed memoization of function calls.\n    Given a function and its signature, memcached uses a distributed cache\n    that, for each set of inputs, keeps track of the average returned value,\n    with variance and number of times it was calculated.\n    If the function is deterministic, i.e. same input corresponds to the same\n    exact output, set `allow_repeated_evaluations` to `False`. If instead the\n    function is stochastic (like the training of a model depending on random\n    initializations), memcached() allows to set a minimum number of evaluations\n    to compute a running average, and a tolerance after which the function will\n    not be called anymore. In other words, the function will be recomputed\n    until the value has stabilized with a standard error smaller than\n    `rtol_stderr * running average`.\n    !!! Warning\n        Do not cache functions with state! See [Cache reuse](cache-reuse)\n    ??? Example\n        ```python\n        cached_fun = memcached(**asdict(cache_options))(heavy_computation)\n        ```\n    Args:\n        client_config: configuration for pymemcache's\n            [Client][pymemcache.client.base.Client].\n            Will be merged on top of the default configuration (see below).\n        time_threshold: computations taking less time than this many seconds are\n            not cached.\n        allow_repeated_evaluations: If `True`, repeated calls to a function\n            with the same arguments will be allowed and outputs averaged until the\n            running standard deviation of the mean stabilizes below\n            `rtol_stderr * mean`.\n        rtol_stderr: relative tolerance for repeated evaluations. More precisely,\n            [memcached()][pydvl.utils.caching.memcached] will stop evaluating the function once the\n            standard deviation of the mean is smaller than `rtol_stderr * mean`.\n        min_repetitions: minimum number of times that a function evaluation\n            on the same arguments is repeated before returning cached values. Useful\n            for stochastic functions only. If the model training is very noisy, set\n            this number to higher values to reduce variance.\n        ignore_args: Do not take these keyword arguments into account when\n            hashing the wrapped function for usage as key in memcached. This allows\n            sharing the cache among different jobs for the same experiment run if\n            the callable happens to have \"nuisance\" parameters like `job_id` which\n            do not affect the result of the computation.\n    Returns:\n        A wrapped function\n    \"\"\"\nif ignore_args is None:\nignore_args = []\n# Do I really need this?\ndef connect(config: MemcachedClientConfig):\n\"\"\"First tries to establish a connection, then tries setting and\n        getting a value.\"\"\"\ntry:\nclient = RetryingClient(\nClient(**asdict(config)),\nattempts=3,\nretry_delay=0.1,\nretry_for=[MemcacheUnexpectedCloseError],\n)\ntemp_key = str(uuid.uuid4())\nclient.set(temp_key, 7)\nassert client.get(temp_key) == 7\nclient.delete(temp_key, 0)\nreturn client\nexcept ConnectionRefusedError as e:\nlogger.error(  # type: ignore\nf\"@memcached: Timeout connecting \"\nf\"to {config.server} after \"\nf\"{config.connect_timeout} seconds: {str(e)}. Did you start memcached?\"\n)\nraise e\nexcept AssertionError as e:\nlogger.error(  # type: ignore\nf\"@memcached: Failure saving dummy value \"\nf\"to {config.server}: {str(e)}\"\n)\ndef wrapper(fun: Callable[..., T], signature: Optional[bytes] = None):\nif signature is None:\nsignature = serialize((fun.__code__.co_code, fun.__code__.co_consts))\n@wraps(fun, updated=[])  # don't try to use update() for a class\nclass Wrapped:\nconfig: MemcachedClientConfig\nstats: CacheStats\nclient: RetryingClient\ndef __init__(self, config: MemcachedClientConfig):\nself.config = config\nself.stats = CacheStats()\nself.client = connect(self.config)\nself._signature = signature\ndef __call__(self, *args, **kwargs) -> T:\nkey_kwargs = {k: v for k, v in kwargs.items() if k not in ignore_args}  # type: ignore\narg_signature: bytes = serialize((args, list(key_kwargs.items())))\nkey = blake2b(self._signature + arg_signature).hexdigest().encode(\"ASCII\")  # type: ignore\nresult_dict: Dict[str, float] = self.get_key_value(key)\nif result_dict is None:\nresult_dict = {}\nstart = time()\nvalue = fun(*args, **kwargs)\nend = time()\nresult_dict[\"value\"] = value\nif end - start >= time_threshold or allow_repeated_evaluations:\nresult_dict[\"count\"] = 1\nresult_dict[\"variance\"] = 0\nself.client.set(key, result_dict, noreply=True)\nself.stats.sets += 1\nself.stats.misses += 1\nelif allow_repeated_evaluations:\nself.stats.hits += 1\nvalue = result_dict[\"value\"]\ncount = result_dict[\"count\"]\nvariance = result_dict[\"variance\"]\nerror_on_average = (variance / count) ** (1 / 2)\nif (\nerror_on_average > rtol_stderr * value\nor count <= min_repetitions\n):\nnew_value = fun(*args, **kwargs)\nnew_avg, new_var = running_moments(\nvalue, variance, int(count), cast(float, new_value)\n)\nresult_dict[\"value\"] = new_avg\nresult_dict[\"count\"] = count + 1\nresult_dict[\"variance\"] = new_var\nself.client.set(key, result_dict, noreply=True)\nself.stats.sets += 1\nelse:\nself.stats.hits += 1\nreturn result_dict[\"value\"]  # type: ignore\ndef __getstate__(self):\n\"\"\"Enables pickling after a socket has been opened to the\n                memcached server, by removing the client from the stored\n                data.\"\"\"\nodict = self.__dict__.copy()\ndel odict[\"client\"]\nreturn odict\ndef __setstate__(self, d: dict):\n\"\"\"Restores a client connection after loading from a pickle.\"\"\"\nself.config = d[\"config\"]\nself.stats = d[\"stats\"]\nself.client = Client(**asdict(self.config))\nself._signature = signature\ndef get_key_value(self, key: bytes):\nresult = None\ntry:\nresult = self.client.get(key)\nexcept socket.timeout as e:\nself.stats.timeouts += 1\nwarnings.warn(f\"{type(self).__name__}: {str(e)}\", RuntimeWarning)\nexcept OSError as e:\nself.stats.errors += 1\nwarnings.warn(f\"{type(self).__name__}: {str(e)}\", RuntimeWarning)\nexcept AttributeError as e:\n# FIXME: this depends on _recv() failing on invalid sockets\n# See pymemcache.base.py,\nself.stats.reconnects += 1\nwarnings.warn(f\"{type(self).__name__}: {str(e)}\", RuntimeWarning)\nself.client = connect(self.config)\nreturn result\nWrapped.__doc__ = (\nf\"A wrapper around {fun.__name__}() with remote caching enabled.\\n\"\n+ (Wrapped.__doc__ or \"\")\n)\nWrapped.__name__ = f\"memcached_{fun.__name__}\"\npath = list(reversed(fun.__qualname__.split(\".\")))\npatched = [f\"memcached_{path[0]}\"] + path[1:]\nWrapped.__qualname__ = \".\".join(reversed(patched))\n# TODO: pick from some config file or something\nreturn Wrapped(client_config or MemcachedClientConfig())\nreturn wrapper\n
    "},{"location":"api/pydvl/utils/config/","title":"Config","text":""},{"location":"api/pydvl/utils/config/#pydvl.utils.config.ParallelConfig","title":"ParallelConfig dataclass","text":"

    Configuration for parallel computation backend.

    PARAMETER DESCRIPTION backend

    Type of backend to use. Defaults to 'joblib'

    TYPE: Literal['joblib', 'ray'] DEFAULT: 'joblib'

    address

    Address of existing remote or local cluster to use.

    TYPE: Optional[Union[str, Tuple[str, int]]] DEFAULT: None

    n_cpus_local

    Number of CPUs to use when creating a local ray cluster. This has no effect when using an existing ray cluster.

    TYPE: Optional[int] DEFAULT: None

    logging_level

    Logging level for the parallel backend's worker.

    TYPE: int DEFAULT: WARNING

    wait_timeout

    Timeout in seconds for waiting on futures.

    TYPE: float DEFAULT: 1.0

    "},{"location":"api/pydvl/utils/config/#pydvl.utils.config.MemcachedClientConfig","title":"MemcachedClientConfig dataclass","text":"

    Configuration of the memcached client.

    PARAMETER DESCRIPTION server

    A tuple of (IP|domain name, port).

    TYPE: Tuple[str, int] DEFAULT: ('localhost', 11211)

    connect_timeout

    How many seconds to wait before raising ConnectionRefusedError on failure to connect.

    TYPE: float DEFAULT: 1.0

    timeout

    seconds to wait for send or recv calls on the socket connected to memcached.

    TYPE: float DEFAULT: 1.0

    no_delay

    set the TCP_NODELAY flag, which may help with performance in some cases.

    TYPE: bool DEFAULT: True

    serde

    a serializer / deserializer (\"serde\"). The default PickleSerde should work in most cases. See pymemcached's documentation for details.

    TYPE: PickleSerde DEFAULT: PickleSerde(pickle_version=PICKLE_VERSION)

    "},{"location":"api/pydvl/utils/config/#pydvl.utils.config.MemcachedConfig","title":"MemcachedConfig dataclass","text":"

    Configuration for memcached(), providing memoization of function calls.

    Instances of this class are typically used as arguments for the construction of a Utility.

    PARAMETER DESCRIPTION client_config

    Configuration for the connection to the memcached server.

    TYPE: MemcachedClientConfig DEFAULT: field(default_factory=MemcachedClientConfig)

    time_threshold

    computations taking less time than this many seconds are not cached.

    TYPE: float DEFAULT: 0.3

    allow_repeated_evaluations

    If True, repeated calls to a function with the same arguments will be allowed and outputs averaged until the running standard deviation of the mean stabilises below rtol_stderr * mean.

    TYPE: bool DEFAULT: False

    rtol_stderr

    relative tolerance for repeated evaluations. More precisely, memcached() will stop evaluating the function once the standard deviation of the mean is smaller than rtol_stderr * mean.

    TYPE: float DEFAULT: 0.1

    min_repetitions

    minimum number of times that a function evaluation on the same arguments is repeated before returning cached values. Useful for stochastic functions only. If the model training is very noisy, set this number to higher values to reduce variance.

    TYPE: int DEFAULT: 3

    ignore_args

    Do not take these keyword arguments into account when hashing the wrapped function for usage as key in memcached.

    TYPE: Optional[Iterable[str]] DEFAULT: None

    "},{"location":"api/pydvl/utils/dataset/","title":"Dataset","text":"

    This module contains convenience classes to handle data and groups thereof.

    Shapley and Least Core value computations require evaluation of a scoring function (the utility). This is typically the performance of the model on a test set (as an approximation to its true expected performance). It is therefore convenient to keep both the training data and the test data together to be passed around to methods in shapley and least_core. This is done with Dataset.

    This abstraction layer also seamlessly grouping data points together if one is interested in computing their value as a group, see GroupedDataset.

    Objects of both types are used to construct a Utility object.

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset","title":"Dataset(x_train, y_train, x_test, y_test, feature_names=None, target_names=None, data_names=None, description=None, is_multi_output=False)","text":"

    A convenience class to handle datasets.

    It holds a dataset, split into training and test data, together with several labels on feature names, data point names and a description.

    PARAMETER DESCRIPTION x_train

    training data

    TYPE: Union[NDArray, DataFrame]

    y_train

    labels for training data

    TYPE: Union[NDArray, DataFrame]

    x_test

    test data

    TYPE: Union[NDArray, DataFrame]

    y_test

    labels for test data

    TYPE: Union[NDArray, DataFrame]

    feature_names

    name of the features of input data

    TYPE: Optional[Sequence[str]] DEFAULT: None

    target_names

    names of the features of target data

    TYPE: Optional[Sequence[str]] DEFAULT: None

    data_names

    names assigned to data points. For example, if the dataset is a time series, each entry can be a timestamp which can be referenced directly instead of using a row number.

    TYPE: Optional[Sequence[str]] DEFAULT: None

    description

    A textual description of the dataset.

    TYPE: Optional[str] DEFAULT: None

    is_multi_output

    set to False if labels are scalars, or to True if they are vectors of dimension > 1.

    TYPE: bool DEFAULT: False

    Source code in src/pydvl/utils/dataset.py
    def __init__(\nself,\nx_train: Union[NDArray, pd.DataFrame],\ny_train: Union[NDArray, pd.DataFrame],\nx_test: Union[NDArray, pd.DataFrame],\ny_test: Union[NDArray, pd.DataFrame],\nfeature_names: Optional[Sequence[str]] = None,\ntarget_names: Optional[Sequence[str]] = None,\ndata_names: Optional[Sequence[str]] = None,\ndescription: Optional[str] = None,\n# FIXME: use same parameter name as in check_X_y()\nis_multi_output: bool = False,\n):\n\"\"\"Constructs a Dataset from data and labels.\n    Args:\n        x_train: training data\n        y_train: labels for training data\n        x_test: test data\n        y_test: labels for test data\n        feature_names: name of the features of input data\n        target_names: names of the features of target data\n        data_names: names assigned to data points.\n            For example, if the dataset is a time series, each entry can be a\n            timestamp which can be referenced directly instead of using a row\n            number.\n        description: A textual description of the dataset.\n        is_multi_output: set to `False` if labels are scalars, or to\n            `True` if they are vectors of dimension > 1.\n    \"\"\"\nself.x_train, self.y_train = check_X_y(\nx_train, y_train, multi_output=is_multi_output\n)\nself.x_test, self.y_test = check_X_y(\nx_test, y_test, multi_output=is_multi_output\n)\nif x_train.shape[-1] != x_test.shape[-1]:\nraise ValueError(\nf\"Mismatching number of features: \"\nf\"{x_train.shape[-1]} and {x_test.shape[-1]}\"\n)\nif x_train.shape[0] != y_train.shape[0]:\nraise ValueError(\nf\"Mismatching number of samples: \"\nf\"{x_train.shape[-1]} and {x_test.shape[-1]}\"\n)\nif x_test.shape[0] != y_test.shape[0]:\nraise ValueError(\nf\"Mismatching number of samples: \"\nf\"{x_test.shape[-1]} and {y_test.shape[-1]}\"\n)\ndef make_names(s: str, a: np.ndarray) -> List[str]:\nn = a.shape[1] if len(a.shape) > 1 else 1\nreturn [f\"{s}{i:0{1 + int(np.log10(n))}d}\" for i in range(1, n + 1)]\nself.feature_names = feature_names\nself.target_names = target_names\nif self.feature_names is None:\nif isinstance(x_train, pd.DataFrame):\nself.feature_names = x_train.columns.tolist()\nelse:\nself.feature_names = make_names(\"x\", x_train)\nif self.target_names is None:\nif isinstance(y_train, pd.DataFrame):\nself.target_names = y_train.columns.tolist()\nelse:\nself.target_names = make_names(\"y\", y_train)\nif len(self.x_train.shape) > 1:\nif (\nlen(self.feature_names) != self.x_train.shape[-1]\nor len(self.feature_names) != self.x_test.shape[-1]\n):\nraise ValueError(\"Mismatching number of features and names\")\nif len(self.y_train.shape) > 1:\nif (\nlen(self.target_names) != self.y_train.shape[-1]\nor len(self.target_names) != self.y_test.shape[-1]\n):\nraise ValueError(\"Mismatching number of targets and names\")\nself.description = description or \"No description\"\nself._indices = np.arange(len(self.x_train), dtype=np.int_)\nself._data_names = (\nnp.array(data_names, dtype=object)\nif data_names is not None\nelse self._indices.astype(object)\n)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.indices","title":"indices: NDArray[np.int_] property","text":"

    Index of positions in data.x_train.

    Contiguous integers from 0 to len(Dataset).

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.data_names","title":"data_names: NDArray[np.object_] property","text":"

    Names of each individual datapoint.

    Used for reporting Shapley values.

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.dim","title":"dim: int property","text":"

    Returns the number of dimensions of a sample.

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.get_training_data","title":"get_training_data(indices=None)","text":"

    Given a set of indices, returns the training data that refer to those indices.

    This is used mainly by Utility to retrieve subsets of the data from indices. It is typically not needed in algorithms.

    PARAMETER DESCRIPTION indices

    Optional indices that will be used to select points from the training data. If None, the entire training data will be returned.

    TYPE: Optional[Iterable[int]] DEFAULT: None

    RETURNS DESCRIPTION Tuple[NDArray, NDArray]

    If indices is not None, the selected x and y arrays from the training data. Otherwise, the entire dataset.

    Source code in src/pydvl/utils/dataset.py
    def get_training_data(\nself, indices: Optional[Iterable[int]] = None\n) -> Tuple[NDArray, NDArray]:\n\"\"\"Given a set of indices, returns the training data that refer to those\n    indices.\n    This is used mainly by [Utility][pydvl.utils.utility.Utility] to retrieve\n    subsets of the data from indices. It is typically **not needed in\n    algorithms**.\n    Args:\n        indices: Optional indices that will be used to select points from\n            the training data. If `None`, the entire training data will be\n            returned.\n    Returns:\n        If `indices` is not `None`, the selected x and y arrays from the\n            training data. Otherwise, the entire dataset.\n    \"\"\"\nif indices is None:\nreturn self.x_train, self.y_train\nx = self.x_train[indices]\ny = self.y_train[indices]\nreturn x, y\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.get_test_data","title":"get_test_data(indices=None)","text":"

    Returns the entire test set regardless of the passed indices.

    The passed indices will not be used because for data valuation we generally want to score the trained model on the entire test data.

    Additionally, the way this method is used in the Utility class, the passed indices will be those of the training data and would not work on the test data.

    There may be cases where it is desired to use parts of the test data. In those cases, it is recommended to inherit from Dataset and override get_test_data().

    For example, the following snippet shows how one could go about mapping the training data indices into test data indices inside get_test_data():

    Example
    >>> from pydvl.utils import Dataset\n>>> import numpy as np\n>>> class DatasetWithTestDataIndices(Dataset):\n...    def get_test_data(self, indices=None):\n...        if indices is None:\n...            return self.x_test, self.y_test\n...        fraction = len(list(indices)) / len(self)\n...        mapped_indices = len(self.x_test) / len(self) * np.asarray(indices)\n...        mapped_indices = np.unique(mapped_indices.astype(int))\n...        return self.x_test[mapped_indices], self.y_test[mapped_indices]\n...\n>>> X = np.random.rand(100, 10)\n>>> y = np.random.randint(0, 2, 100)\n>>> dataset = DatasetWithTestDataIndices.from_arrays(X, y)\n>>> indices = np.random.choice(dataset.indices, 30, replace=False)\n>>> _ = dataset.get_training_data(indices)\n>>> _ = dataset.get_test_data(indices)\n
    PARAMETER DESCRIPTION indices

    Optional indices into the test data. This argument is unused left for compatibility with get_training_data().

    TYPE: Optional[Iterable[int]] DEFAULT: None

    RETURNS DESCRIPTION Tuple[NDArray, NDArray]

    The entire test data.

    Source code in src/pydvl/utils/dataset.py
    def get_test_data(\nself, indices: Optional[Iterable[int]] = None\n) -> Tuple[NDArray, NDArray]:\n\"\"\"Returns the entire test set regardless of the passed indices.\n    The passed indices will not be used because for data valuation\n    we generally want to score the trained model on the entire test data.\n    Additionally, the way this method is used in the\n    [Utility][pydvl.utils.utility.Utility] class, the passed indices will\n    be those of the training data and would not work on the test data.\n    There may be cases where it is desired to use parts of the test data.\n    In those cases, it is recommended to inherit from\n    [Dataset][pydvl.utils.dataset.Dataset] and override\n    [get_test_data()][pydvl.utils.dataset.Dataset.get_test_data].\n    For example, the following snippet shows how one could go about\n    mapping the training data indices into test data indices\n    inside [get_test_data()][pydvl.utils.dataset.Dataset.get_test_data]:\n    ??? Example\n        ```pycon\n        >>> from pydvl.utils import Dataset\n        >>> import numpy as np\n        >>> class DatasetWithTestDataIndices(Dataset):\n        ...    def get_test_data(self, indices=None):\n        ...        if indices is None:\n        ...            return self.x_test, self.y_test\n        ...        fraction = len(list(indices)) / len(self)\n        ...        mapped_indices = len(self.x_test) / len(self) * np.asarray(indices)\n        ...        mapped_indices = np.unique(mapped_indices.astype(int))\n        ...        return self.x_test[mapped_indices], self.y_test[mapped_indices]\n        ...\n        >>> X = np.random.rand(100, 10)\n        >>> y = np.random.randint(0, 2, 100)\n        >>> dataset = DatasetWithTestDataIndices.from_arrays(X, y)\n        >>> indices = np.random.choice(dataset.indices, 30, replace=False)\n        >>> _ = dataset.get_training_data(indices)\n        >>> _ = dataset.get_test_data(indices)\n        ```\n    Args:\n        indices: Optional indices into the test data. This argument is\n            unused left for compatibility with\n            [get_training_data()][pydvl.utils.dataset.Dataset.get_training_data].\n    Returns:\n        The entire test data.\n    \"\"\"\nreturn self.x_test, self.y_test\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.from_sklearn","title":"from_sklearn(data, train_size=0.8, random_state=None, stratify_by_target=False, **kwargs) classmethod","text":"

    Constructs a Dataset object from a sklearn.utils.Bunch, as returned by the load_* functions in scikit-learn toy datasets.

    Example
    >>> from pydvl.utils import Dataset\n>>> from sklearn.datasets import load_boston\n>>> dataset = Dataset.from_sklearn(load_boston())\n
    PARAMETER DESCRIPTION data

    scikit-learn Bunch object. The following attributes are supported:

    • data: covariates.
    • target: target variables (labels).
    • feature_names (optional): the feature names.
    • target_names (optional): the target names.
    • DESCR (optional): a description.

    TYPE: Bunch

    train_size

    size of the training dataset. Used in train_test_split

    TYPE: float DEFAULT: 0.8

    random_state

    seed for train / test split

    TYPE: Optional[int] DEFAULT: None

    stratify_by_target

    If True, data is split in a stratified fashion, using the target variable as labels. Read more in scikit-learn's user guide.

    TYPE: bool DEFAULT: False

    kwargs

    Additional keyword arguments to pass to the Dataset constructor. Use this to pass e.g. is_multi_output.

    DEFAULT: {}

    RETURNS DESCRIPTION Dataset

    Object with the sklearn dataset

    Changed in version 0.6.0

    Added kwargs to pass to the Dataset constructor.

    Source code in src/pydvl/utils/dataset.py
    @classmethod\ndef from_sklearn(\ncls,\ndata: Bunch,\ntrain_size: float = 0.8,\nrandom_state: Optional[int] = None,\nstratify_by_target: bool = False,\n**kwargs,\n) -> \"Dataset\":\n\"\"\"Constructs a [Dataset][pydvl.utils.Dataset] object from a\n    [sklearn.utils.Bunch][], as returned by the `load_*`\n    functions in [scikit-learn toy datasets](https://scikit-learn.org/stable/datasets/toy_dataset.html).\n    ??? Example\n        ```pycon\n        >>> from pydvl.utils import Dataset\n        >>> from sklearn.datasets import load_boston\n        >>> dataset = Dataset.from_sklearn(load_boston())\n        ```\n    Args:\n        data: scikit-learn Bunch object. The following attributes are supported:\n            - `data`: covariates.\n            - `target`: target variables (labels).\n            - `feature_names` (**optional**): the feature names.\n            - `target_names` (**optional**): the target names.\n            - `DESCR` (**optional**): a description.\n        train_size: size of the training dataset. Used in `train_test_split`\n        random_state: seed for train / test split\n        stratify_by_target: If `True`, data is split in a stratified\n            fashion, using the target variable as labels. Read more in\n            [scikit-learn's user guide](https://scikit-learn.org/stable/modules/cross_validation.html#stratification).\n        kwargs: Additional keyword arguments to pass to the\n            [Dataset][pydvl.utils.Dataset] constructor. Use this to pass e.g. `is_multi_output`.\n    Returns:\n        Object with the sklearn dataset\n    !!! tip \"Changed in version 0.6.0\"\n        Added kwargs to pass to the [Dataset][pydvl.utils.Dataset] constructor.\n    \"\"\"\nx_train, x_test, y_train, y_test = train_test_split(\ndata.data,\ndata.target,\ntrain_size=train_size,\nrandom_state=random_state,\nstratify=data.target if stratify_by_target else None,\n)\nreturn cls(\nx_train,\ny_train,\nx_test,\ny_test,\nfeature_names=data.get(\"feature_names\"),\ntarget_names=data.get(\"target_names\"),\ndescription=data.get(\"DESCR\"),\n**kwargs,\n)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.Dataset.from_arrays","title":"from_arrays(X, y, train_size=0.8, random_state=None, stratify_by_target=False, **kwargs) classmethod","text":"

    Constructs a Dataset object from X and y numpy arrays as returned by the make_* functions in sklearn generated datasets.

    Example
    >>> from pydvl.utils import Dataset\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression()\n>>> dataset = Dataset.from_arrays(X, y)\n
    PARAMETER DESCRIPTION X

    numpy array of shape (n_samples, n_features)

    TYPE: NDArray

    y

    numpy array of shape (n_samples,)

    TYPE: NDArray

    train_size

    size of the training dataset. Used in train_test_split

    TYPE: float DEFAULT: 0.8

    random_state

    seed for train / test split

    TYPE: Optional[int] DEFAULT: None

    stratify_by_target

    If True, data is split in a stratified fashion, using the y variable as labels. Read more in sklearn's user guide.

    TYPE: bool DEFAULT: False

    kwargs

    Additional keyword arguments to pass to the Dataset constructor. Use this to pass e.g. feature_names or target_names.

    DEFAULT: {}

    RETURNS DESCRIPTION Dataset

    Object with the passed X and y arrays split across training and test sets.

    New in version 0.4.0

    Changed in version 0.6.0

    Added kwargs to pass to the Dataset constructor.

    Source code in src/pydvl/utils/dataset.py
    @classmethod\ndef from_arrays(\ncls,\nX: NDArray,\ny: NDArray,\ntrain_size: float = 0.8,\nrandom_state: Optional[int] = None,\nstratify_by_target: bool = False,\n**kwargs,\n) -> \"Dataset\":\n\"\"\"Constructs a [Dataset][pydvl.utils.Dataset] object from X and y numpy arrays  as\n    returned by the `make_*` functions in [sklearn generated datasets](https://scikit-learn.org/stable/datasets/sample_generators.html).\n    ??? Example\n        ```pycon\n        >>> from pydvl.utils import Dataset\n        >>> from sklearn.datasets import make_regression\n        >>> X, y = make_regression()\n        >>> dataset = Dataset.from_arrays(X, y)\n        ```\n    Args:\n        X: numpy array of shape (n_samples, n_features)\n        y: numpy array of shape (n_samples,)\n        train_size: size of the training dataset. Used in `train_test_split`\n        random_state: seed for train / test split\n        stratify_by_target: If `True`, data is split in a stratified fashion,\n            using the y variable as labels. Read more in [sklearn's user\n            guide](https://scikit-learn.org/stable/modules/cross_validation.html#stratification).\n        kwargs: Additional keyword arguments to pass to the\n            [Dataset][pydvl.utils.Dataset] constructor. Use this to pass e.g. `feature_names`\n            or `target_names`.\n    Returns:\n        Object with the passed X and y arrays split across training and test sets.\n    !!! tip \"New in version 0.4.0\"\n    !!! tip \"Changed in version 0.6.0\"\n        Added kwargs to pass to the [Dataset][pydvl.utils.Dataset] constructor.\n    \"\"\"\nx_train, x_test, y_train, y_test = train_test_split(\nX,\ny,\ntrain_size=train_size,\nrandom_state=random_state,\nstratify=y if stratify_by_target else None,\n)\nreturn cls(x_train, y_train, x_test, y_test, **kwargs)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset","title":"GroupedDataset(x_train, y_train, x_test, y_test, data_groups, feature_names=None, target_names=None, group_names=None, description=None, **kwargs)","text":"

    Bases: Dataset

    Used for calculating Shapley values of subsets of the data considered as logical units. For instance, one can group by value of a categorical feature, by bin into which a continuous feature falls, or by label.

    PARAMETER DESCRIPTION x_train

    training data

    TYPE: NDArray

    y_train

    labels of training data

    TYPE: NDArray

    x_test

    test data

    TYPE: NDArray

    y_test

    labels of test data

    TYPE: NDArray

    data_groups

    Iterable of the same length as x_train containing a group label for each training data point. The label can be of any type, e.g. str or int. Data points with the same label will then be grouped by this object and considered as one for effects of valuation.

    TYPE: Sequence

    feature_names

    names of the covariates' features.

    TYPE: Optional[Sequence[str]] DEFAULT: None

    target_names

    names of the labels or targets y

    TYPE: Optional[Sequence[str]] DEFAULT: None

    group_names

    names of the groups. If not provided, the labels from data_groups will be used.

    TYPE: Optional[Sequence[str]] DEFAULT: None

    description

    A textual description of the dataset

    TYPE: Optional[str] DEFAULT: None

    kwargs

    Additional keyword arguments to pass to the Dataset constructor.

    DEFAULT: {}

    Changed in version 0.6.0

    Added group_names and forwarding of kwargs

    Source code in src/pydvl/utils/dataset.py
    def __init__(\nself,\nx_train: NDArray,\ny_train: NDArray,\nx_test: NDArray,\ny_test: NDArray,\ndata_groups: Sequence,\nfeature_names: Optional[Sequence[str]] = None,\ntarget_names: Optional[Sequence[str]] = None,\ngroup_names: Optional[Sequence[str]] = None,\ndescription: Optional[str] = None,\n**kwargs,\n):\n\"\"\"Class for grouping datasets.\n    Used for calculating Shapley values of subsets of the data considered\n    as logical units. For instance, one can group by value of a categorical\n    feature, by bin into which a continuous feature falls, or by label.\n    Args:\n        x_train: training data\n        y_train: labels of training data\n        x_test: test data\n        y_test: labels of test data\n        data_groups: Iterable of the same length as `x_train` containing\n            a group label for each training data point. The label can be of any\n            type, e.g. `str` or `int`. Data points with the same label will\n            then be grouped by this object and considered as one for effects of\n            valuation.\n        feature_names: names of the covariates' features.\n        target_names: names of the labels or targets y\n        group_names: names of the groups. If not provided, the labels\n            from `data_groups` will be used.\n        description: A textual description of the dataset\n        kwargs: Additional keyword arguments to pass to the\n            [Dataset][pydvl.utils.Dataset] constructor.\n    !!! tip \"Changed in version 0.6.0\"\n    Added `group_names` and forwarding of `kwargs`\n    \"\"\"\nsuper().__init__(\nx_train=x_train,\ny_train=y_train,\nx_test=x_test,\ny_test=y_test,\nfeature_names=feature_names,\ntarget_names=target_names,\ndescription=description,\n**kwargs,\n)\nif len(data_groups) != len(x_train):\nraise ValueError(\nf\"data_groups and x_train must have the same length.\"\nf\"Instead got {len(data_groups)=} and {len(x_train)=}\"\n)\nself.groups: OrderedDict[Any, List[int]] = OrderedDict(\n{k: [] for k in set(data_groups)}\n)\nfor idx, group in enumerate(data_groups):\nself.groups[group].append(idx)\nself.group_items = list(self.groups.items())\nself._indices = np.arange(len(self.groups.keys()))\nself._data_names = (\nnp.array(group_names, dtype=object)\nif group_names is not None\nelse np.array(list(self.groups.keys()), dtype=object)\n)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.indices","title":"indices property","text":"

    Indices of the groups.

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.data_names","title":"data_names property","text":"

    Names of the groups.

    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.get_training_data","title":"get_training_data(indices=None)","text":"

    Returns the data and labels of all samples in the given groups.

    PARAMETER DESCRIPTION indices

    group indices whose elements to return. If None, all data from all groups are returned.

    TYPE: Optional[Iterable[int]] DEFAULT: None

    RETURNS DESCRIPTION Tuple[NDArray, NDArray]

    Tuple of training data x and labels y.

    Source code in src/pydvl/utils/dataset.py
    def get_training_data(\nself, indices: Optional[Iterable[int]] = None\n) -> Tuple[NDArray, NDArray]:\n\"\"\"Returns the data and labels of all samples in the given groups.\n    Args:\n        indices: group indices whose elements to return. If `None`,\n            all data from all groups are returned.\n    Returns:\n        Tuple of training data x and labels y.\n    \"\"\"\nif indices is None:\nindices = self.indices\ndata_indices = [\nidx for group_id in indices for idx in self.group_items[group_id][1]\n]\nreturn super().get_training_data(data_indices)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.from_sklearn","title":"from_sklearn(data, train_size=0.8, random_state=None, stratify_by_target=False, data_groups=None, **kwargs) classmethod","text":"

    Constructs a GroupedDataset object from a sklearn.utils.Bunch as returned by the load_* functions in scikit-learn toy datasets and groups it.

    Example
    >>> from sklearn.datasets import load_iris\n>>> from pydvl.utils import GroupedDataset\n>>> iris = load_iris()\n>>> data_groups = iris.data[:, 0] // 0.5\n>>> dataset = GroupedDataset.from_sklearn(iris, data_groups=data_groups)\n
    PARAMETER DESCRIPTION data

    scikit-learn Bunch object. The following attributes are supported:

    • data: covariates.
    • target: target variables (labels).
    • feature_names (optional): the feature names.
    • target_names (optional): the target names.
    • DESCR (optional): a description.

    TYPE: Bunch

    train_size

    size of the training dataset. Used in train_test_split.

    TYPE: float DEFAULT: 0.8

    random_state

    seed for train / test split.

    TYPE: Optional[int] DEFAULT: None

    stratify_by_target

    If True, data is split in a stratified fashion, using the target variable as labels. Read more in sklearn's user guide.

    TYPE: bool DEFAULT: False

    data_groups

    an array holding the group index or name for each data point. The length of this array must be equal to the number of data points in the dataset.

    TYPE: Optional[Sequence] DEFAULT: None

    kwargs

    Additional keyword arguments to pass to the Dataset constructor.

    DEFAULT: {}

    RETURNS DESCRIPTION GroupedDataset

    Dataset with the selected sklearn data

    Source code in src/pydvl/utils/dataset.py
    @classmethod\ndef from_sklearn(\ncls,\ndata: Bunch,\ntrain_size: float = 0.8,\nrandom_state: Optional[int] = None,\nstratify_by_target: bool = False,\ndata_groups: Optional[Sequence] = None,\n**kwargs,\n) -> \"GroupedDataset\":\n\"\"\"Constructs a [GroupedDataset][pydvl.utils.GroupedDataset] object from a\n    [sklearn.utils.Bunch][sklearn.utils.Bunch] as returned by the `load_*` functions in\n    [scikit-learn toy datasets](https://scikit-learn.org/stable/datasets/toy_dataset.html) and groups\n    it.\n    ??? Example\n        ```pycon\n        >>> from sklearn.datasets import load_iris\n        >>> from pydvl.utils import GroupedDataset\n        >>> iris = load_iris()\n        >>> data_groups = iris.data[:, 0] // 0.5\n        >>> dataset = GroupedDataset.from_sklearn(iris, data_groups=data_groups)\n        ```\n    Args:\n        data: scikit-learn Bunch object. The following attributes are supported:\n            - `data`: covariates.\n            - `target`: target variables (labels).\n            - `feature_names` (**optional**): the feature names.\n            - `target_names` (**optional**): the target names.\n            - `DESCR` (**optional**): a description.\n        train_size: size of the training dataset. Used in `train_test_split`.\n        random_state: seed for train / test split.\n        stratify_by_target: If `True`, data is split in a stratified\n            fashion, using the target variable as labels. Read more in\n            [sklearn's user guide](https://scikit-learn.org/stable/modules/cross_validation.html#stratification).\n        data_groups: an array holding the group index or name for each\n            data point. The length of this array must be equal to the number of\n            data points in the dataset.\n        kwargs: Additional keyword arguments to pass to the\n            [Dataset][pydvl.utils.Dataset] constructor.\n    Returns:\n        Dataset with the selected sklearn data\n    \"\"\"\nif data_groups is None:\nraise ValueError(\n\"data_groups must be provided when constructing a GroupedDataset\"\n)\nx_train, x_test, y_train, y_test, data_groups_train, _ = train_test_split(\ndata.data,\ndata.target,\ndata_groups,\ntrain_size=train_size,\nrandom_state=random_state,\nstratify=data.target if stratify_by_target else None,\n)\ndataset = Dataset(\nx_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, **kwargs\n)\nreturn cls.from_dataset(dataset, data_groups_train)  # type: ignore\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.from_arrays","title":"from_arrays(X, y, train_size=0.8, random_state=None, stratify_by_target=False, data_groups=None, **kwargs) classmethod","text":"

    Constructs a GroupedDataset object from X and y numpy arrays as returned by the make_* functions in scikit-learn generated datasets.

    Example
    >>> from sklearn.datasets import make_classification\n>>> from pydvl.utils import GroupedDataset\n>>> X, y = make_classification(\n...     n_samples=100,\n...     n_features=4,\n...     n_informative=2,\n...     n_redundant=0,\n...     random_state=0,\n...     shuffle=False\n... )\n>>> data_groups = X[:, 0] // 0.5\n>>> dataset = GroupedDataset.from_arrays(X, y, data_groups=data_groups)\n
    PARAMETER DESCRIPTION X

    array of shape (n_samples, n_features)

    TYPE: NDArray

    y

    array of shape (n_samples,)

    TYPE: NDArray

    train_size

    size of the training dataset. Used in train_test_split.

    TYPE: float DEFAULT: 0.8

    random_state

    seed for train / test split.

    TYPE: Optional[int] DEFAULT: None

    stratify_by_target

    If True, data is split in a stratified fashion, using the y variable as labels. Read more in sklearn's user guide.

    TYPE: bool DEFAULT: False

    data_groups

    an array holding the group index or name for each data point. The length of this array must be equal to the number of data points in the dataset.

    TYPE: Optional[Sequence] DEFAULT: None

    kwargs

    Additional keyword arguments that will be passed to the Dataset constructor.

    DEFAULT: {}

    RETURNS DESCRIPTION Dataset

    Dataset with the passed X and y arrays split across training and test sets.

    New in version 0.4.0

    Changed in version 0.6.0

    Added kwargs to pass to the Dataset constructor.

    Source code in src/pydvl/utils/dataset.py
    @classmethod\ndef from_arrays(\ncls,\nX: NDArray,\ny: NDArray,\ntrain_size: float = 0.8,\nrandom_state: Optional[int] = None,\nstratify_by_target: bool = False,\ndata_groups: Optional[Sequence] = None,\n**kwargs,\n) -> \"Dataset\":\n\"\"\"Constructs a [GroupedDataset][pydvl.utils.GroupedDataset] object from X and y numpy arrays\n    as returned by the `make_*` functions in\n    [scikit-learn generated datasets](https://scikit-learn.org/stable/datasets/sample_generators.html).\n    ??? Example\n        ```pycon\n        >>> from sklearn.datasets import make_classification\n        >>> from pydvl.utils import GroupedDataset\n        >>> X, y = make_classification(\n        ...     n_samples=100,\n        ...     n_features=4,\n        ...     n_informative=2,\n        ...     n_redundant=0,\n        ...     random_state=0,\n        ...     shuffle=False\n        ... )\n        >>> data_groups = X[:, 0] // 0.5\n        >>> dataset = GroupedDataset.from_arrays(X, y, data_groups=data_groups)\n        ```\n    Args:\n        X: array of shape (n_samples, n_features)\n        y: array of shape (n_samples,)\n        train_size: size of the training dataset. Used in `train_test_split`.\n        random_state: seed for train / test split.\n        stratify_by_target: If `True`, data is split in a stratified\n            fashion, using the y variable as labels. Read more in\n            [sklearn's user guide](https://scikit-learn.org/stable/modules/cross_validation.html#stratification).\n        data_groups: an array holding the group index or name for each data\n            point. The length of this array must be equal to the number of\n            data points in the dataset.\n        kwargs: Additional keyword arguments that will be passed to the\n            [Dataset][pydvl.utils.Dataset] constructor.\n    Returns:\n        Dataset with the passed X and y arrays split across training and\n            test sets.\n    !!! tip \"New in version 0.4.0\"\n    !!! tip \"Changed in version 0.6.0\"\n        Added kwargs to pass to the [Dataset][pydvl.utils.Dataset] constructor.\n    \"\"\"\nif data_groups is None:\nraise ValueError(\n\"data_groups must be provided when constructing a GroupedDataset\"\n)\nx_train, x_test, y_train, y_test, data_groups_train, _ = train_test_split(\nX,\ny,\ndata_groups,\ntrain_size=train_size,\nrandom_state=random_state,\nstratify=y if stratify_by_target else None,\n)\ndataset = Dataset(\nx_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, **kwargs\n)\nreturn cls.from_dataset(dataset, data_groups_train)\n
    "},{"location":"api/pydvl/utils/dataset/#pydvl.utils.dataset.GroupedDataset.from_dataset","title":"from_dataset(dataset, data_groups) classmethod","text":"

    Creates a GroupedDataset object from the data a Dataset object and a mapping of data groups.

    Example
    >>> import numpy as np\n>>> from pydvl.utils import Dataset, GroupedDataset\n>>> dataset = Dataset.from_arrays(\n...     X=np.asarray([[1, 2], [3, 4], [5, 6], [7, 8]]),\n...     y=np.asarray([0, 1, 0, 1]),\n... )\n>>> dataset = GroupedDataset.from_dataset(dataset, data_groups=[0, 0, 1, 1])\n
    PARAMETER DESCRIPTION dataset

    The original data.

    TYPE: Dataset

    data_groups

    An array holding the group index or name for each data point. The length of this array must be equal to the number of data points in the dataset.

    TYPE: Sequence[Any]

    RETURNS DESCRIPTION GroupedDataset

    A GroupedDataset with the initial Dataset grouped by data_groups.

    Source code in src/pydvl/utils/dataset.py
    @classmethod\ndef from_dataset(\ncls, dataset: Dataset, data_groups: Sequence[Any]\n) -> \"GroupedDataset\":\n\"\"\"Creates a [GroupedDataset][pydvl.utils.GroupedDataset] object from the data a\n    [Dataset][pydvl.utils.Dataset] object and a mapping of data groups.\n    ??? Example\n        ```pycon\n        >>> import numpy as np\n        >>> from pydvl.utils import Dataset, GroupedDataset\n        >>> dataset = Dataset.from_arrays(\n        ...     X=np.asarray([[1, 2], [3, 4], [5, 6], [7, 8]]),\n        ...     y=np.asarray([0, 1, 0, 1]),\n        ... )\n        >>> dataset = GroupedDataset.from_dataset(dataset, data_groups=[0, 0, 1, 1])\n        ```\n    Args:\n        dataset: The original data.\n        data_groups: An array holding the group index or name for each data\n            point. The length of this array must be equal to the number of\n            data points in the dataset.\n    Returns:\n        A [GroupedDataset][pydvl.utils.GroupedDataset] with the initial\n            [Dataset][pydvl.utils.Dataset] grouped by data_groups.\n    \"\"\"\nreturn cls(\nx_train=dataset.x_train,\ny_train=dataset.y_train,\nx_test=dataset.x_test,\ny_test=dataset.y_test,\ndata_groups=data_groups,\nfeature_names=dataset.feature_names,\ntarget_names=dataset.target_names,\ndescription=dataset.description,\n)\n
    "},{"location":"api/pydvl/utils/functional/","title":"Functional","text":"

    Supporting utilities for manipulating arguments of functions.

    "},{"location":"api/pydvl/utils/functional/#pydvl.utils.functional.free_arguments","title":"free_arguments(fun)","text":"

    Computes the set of free arguments for a function or functools.partial object.

    All arguments of a function are considered free unless they are set by a partial. For example, if f = partial(g, a=1), then a is not a free argument of f.

    PARAMETER DESCRIPTION fun

    A callable or a [partial object][].

    TYPE: Union[Callable, partial]

    RETURNS DESCRIPTION Set[str]

    The set of free arguments of fun.

    New in version 0.7.0

    Source code in src/pydvl/utils/functional.py
    def free_arguments(fun: Union[Callable, partial]) -> Set[str]:\n\"\"\"Computes the set of free arguments for a function or\n    [functools.partial][] object.\n    All arguments of a function are considered free unless they are set by a\n    partial. For example, if `f = partial(g, a=1)`, then `a` is not a free\n    argument of `f`.\n    Args:\n        fun: A callable or a [partial object][].\n    Returns:\n        The set of free arguments of `fun`.\n    !!! tip \"New in version 0.7.0\"\n    \"\"\"\nargs_set_by_partial: Set[str] = set()\ndef _rec_unroll_partial_function_args(g: Union[Callable, partial]) -> Callable:\n\"\"\"Stores arguments and recursively call itself if `g` is a\n        [functools.partial][] object. In the end, returns the initially wrapped\n        function.\n        This handles the construct `partial(_accept_additional_argument, *args,\n        **kwargs)` that is used by `maybe_add_argument`.\n        Args:\n            g: A partial or a function to unroll.\n        Returns:\n            Initial wrapped function.\n        \"\"\"\nnonlocal args_set_by_partial\nif isinstance(g, partial) and g.func == _accept_additional_argument:\narg = g.keywords[\"arg\"]\nif arg in args_set_by_partial:\nargs_set_by_partial.remove(arg)\nreturn _rec_unroll_partial_function_args(g.keywords[\"fun\"])\nelif isinstance(g, partial):\nargs_set_by_partial.update(g.keywords.keys())\nargs_set_by_partial.update(g.args)\nreturn _rec_unroll_partial_function_args(g.func)\nelse:\nreturn g\nwrapped_fn = _rec_unroll_partial_function_args(fun)\nsig = inspect.signature(wrapped_fn)\nreturn args_set_by_partial | set(sig.parameters.keys())\n
    "},{"location":"api/pydvl/utils/functional/#pydvl.utils.functional.maybe_add_argument","title":"maybe_add_argument(fun, new_arg)","text":"

    Wraps a function to accept the given keyword parameter if it doesn't already.

    If fun already takes a keyword parameter of name new_arg, then it is returned as is. Otherwise, a wrapper is returned which merely ignores the argument.

    PARAMETER DESCRIPTION fun

    The function to wrap

    TYPE: Callable

    new_arg

    The name of the argument that the new function will accept (and ignore).

    TYPE: str

    RETURNS DESCRIPTION Callable

    A new function accepting one more keyword argument.

    Changed in version 0.7.0

    Ability to work with partials.

    Source code in src/pydvl/utils/functional.py
    def maybe_add_argument(fun: Callable, new_arg: str) -> Callable:\n\"\"\"Wraps a function to accept the given keyword parameter if it doesn't\n    already.\n    If `fun` already takes a keyword parameter of name `new_arg`, then it is\n    returned as is. Otherwise, a wrapper is returned which merely ignores the\n    argument.\n    Args:\n        fun: The function to wrap\n        new_arg: The name of the argument that the new function will accept\n            (and ignore).\n    Returns:\n        A new function accepting one more keyword argument.\n    !!! tip \"Changed in version 0.7.0\"\n        Ability to work with partials.\n    \"\"\"\nif new_arg in free_arguments(fun):\nreturn fun\nreturn partial(_accept_additional_argument, fun=fun, arg=new_arg)\n
    "},{"location":"api/pydvl/utils/numeric/","title":"Numeric","text":"

    This module contains routines for numerical computations used across the library.

    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.powerset","title":"powerset(s)","text":"

    Returns an iterator for the power set of the argument.

    Subsets are generated in sequence by growing size. See random_powerset() for random sampling.

    Example
    >>> import numpy as np\n>>> from pydvl.utils.numeric import powerset\n>>> list(powerset(np.array((1,2))))\n[(), (1,), (2,), (1, 2)]\n
    PARAMETER DESCRIPTION s

    The set to use

    TYPE: NDArray[T]

    RETURNS DESCRIPTION Iterator[Collection[T]]

    An iterator over all subsets of the set of indices s.

    Source code in src/pydvl/utils/numeric.py
    def powerset(s: NDArray[T]) -> Iterator[Collection[T]]:\n\"\"\"Returns an iterator for the power set of the argument.\n     Subsets are generated in sequence by growing size. See\n     [random_powerset()][pydvl.utils.numeric.random_powerset] for random\n     sampling.\n    ??? Example\n        ``` pycon\n        >>> import numpy as np\n        >>> from pydvl.utils.numeric import powerset\n        >>> list(powerset(np.array((1,2))))\n        [(), (1,), (2,), (1, 2)]\n        ```\n    Args:\n         s: The set to use\n    Returns:\n        An iterator over all subsets of the set of indices `s`.\n    \"\"\"\nreturn chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.num_samples_permutation_hoeffding","title":"num_samples_permutation_hoeffding(eps, delta, u_range)","text":"

    Lower bound on the number of samples required for MonteCarlo Shapley to obtain an (\u03b5,\u03b4)-approximation.

    That is: with probability 1-\u03b4, the estimated value for one data point will be \u03b5-close to the true quantity, if at least this many permutations are sampled.

    PARAMETER DESCRIPTION eps

    \u03b5 > 0

    TYPE: float

    delta

    0 < \u03b4 <= 1

    TYPE: float

    u_range

    Range of the Utility function

    TYPE: float

    RETURNS DESCRIPTION int

    Number of permutations required to guarantee \u03b5-correct Shapley values with probability 1-\u03b4

    Source code in src/pydvl/utils/numeric.py
    def num_samples_permutation_hoeffding(eps: float, delta: float, u_range: float) -> int:\n\"\"\"Lower bound on the number of samples required for MonteCarlo Shapley to\n    obtain an (\u03b5,\u03b4)-approximation.\n    That is: with probability 1-\u03b4, the estimated value for one data point will\n    be \u03b5-close to the true quantity, if at least this many permutations are\n    sampled.\n    Args:\n        eps: \u03b5 > 0\n        delta: 0 < \u03b4 <= 1\n        u_range: Range of the [Utility][pydvl.utils.utility.Utility] function\n    Returns:\n        Number of _permutations_ required to guarantee \u03b5-correct Shapley\n            values with probability 1-\u03b4\n    \"\"\"\nreturn int(np.ceil(np.log(2 / delta) * 2 * u_range**2 / eps**2))\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.random_subset","title":"random_subset(s, q=0.5, seed=None)","text":"

    Returns one subset at random from s.

    PARAMETER DESCRIPTION s

    set to sample from

    TYPE: NDArray[T]

    q

    Sampling probability for elements. The default 0.5 yields a uniform distribution over the power set of s.

    TYPE: float DEFAULT: 0.5

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION NDArray[T]

    The subset

    Source code in src/pydvl/utils/numeric.py
    def random_subset(\ns: NDArray[T], q: float = 0.5, seed: Optional[Seed] = None\n) -> NDArray[T]:\n\"\"\"Returns one subset at random from ``s``.\n    Args:\n        s: set to sample from\n        q: Sampling probability for elements. The default 0.5 yields a\n            uniform distribution over the power set of s.\n        seed: Either an instance of a numpy random number generator or a seed\n            for it.\n    Returns:\n        The subset\n    \"\"\"\nrng = np.random.default_rng(seed)\nselection = rng.uniform(size=len(s)) > q\nreturn s[selection]\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.random_powerset","title":"random_powerset(s, n_samples=None, q=0.5, seed=None)","text":"

    Samples subsets from the power set of the argument, without pre-generating all subsets and in no order.

    See powerset if you wish to deterministically generate all subsets.

    To generate subsets, len(s) Bernoulli draws with probability q are drawn. The default value of q = 0.5 provides a uniform distribution over the power set of s. Other choices can be used e.g. to implement owen_sampling_shapley.

    PARAMETER DESCRIPTION s

    set to sample from

    TYPE: NDArray[T]

    n_samples

    if set, stop the generator after this many steps. Defaults to np.iinfo(np.int32).max

    TYPE: Optional[int] DEFAULT: None

    q

    Sampling probability for elements. The default 0.5 yields a uniform distribution over the power set of s.

    TYPE: float DEFAULT: 0.5

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION Generator[NDArray[T], None, None]

    Samples from the power set of s.

    RAISES DESCRIPTION ValueError

    if the element sampling probability is not in [0,1]

    Source code in src/pydvl/utils/numeric.py
    def random_powerset(\ns: NDArray[T],\nn_samples: Optional[int] = None,\nq: float = 0.5,\nseed: Optional[Seed] = None,\n) -> Generator[NDArray[T], None, None]:\n\"\"\"Samples subsets from the power set of the argument, without\n    pre-generating all subsets and in no order.\n    See [powerset][pydvl.utils.numeric.powerset] if you wish to deterministically generate all subsets.\n    To generate subsets, `len(s)` Bernoulli draws with probability `q` are\n    drawn. The default value of `q = 0.5` provides a uniform distribution over\n    the power set of `s`. Other choices can be used e.g. to implement\n    [owen_sampling_shapley][pydvl.value.shapley.owen.owen_sampling_shapley].\n    Args:\n        s: set to sample from\n        n_samples: if set, stop the generator after this many steps.\n            Defaults to `np.iinfo(np.int32).max`\n        q: Sampling probability for elements. The default 0.5 yields a\n            uniform distribution over the power set of s.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        Samples from the power set of `s`.\n    Raises:\n        ValueError: if the element sampling probability is not in [0,1]\n    \"\"\"\nif q < 0 or q > 1:\nraise ValueError(\"Element sampling probability must be in [0,1]\")\nrng = np.random.default_rng(seed)\ntotal = 1\nif n_samples is None:\nn_samples = np.iinfo(np.int32).max\nwhile total <= n_samples:\nyield random_subset(s, q, seed=rng)\ntotal += 1\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.random_subset_of_size","title":"random_subset_of_size(s, size, seed=None)","text":"

    Samples a random subset of given size uniformly from the powerset of s.

    PARAMETER DESCRIPTION s

    Set to sample from

    TYPE: NDArray[T]

    size

    Size of the subset to generate

    TYPE: int

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION NDArray[T]

    The subset

    Raises ValueError: If size > len(s)

    Source code in src/pydvl/utils/numeric.py
    def random_subset_of_size(\ns: NDArray[T], size: int, seed: Optional[Seed] = None\n) -> NDArray[T]:\n\"\"\"Samples a random subset of given size uniformly from the powerset\n    of `s`.\n    Args:\n        s: Set to sample from\n        size: Size of the subset to generate\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        The subset\n    Raises\n        ValueError: If size > len(s)\n    \"\"\"\nif size > len(s):\nraise ValueError(\"Cannot sample subset larger than set\")\nrng = np.random.default_rng(seed)\nreturn rng.choice(s, size=size, replace=False)\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.random_matrix_with_condition_number","title":"random_matrix_with_condition_number(n, condition_number, seed=None)","text":"

    Constructs a square matrix with a given condition number.

    Taken from: https://gist.github.com/bstellato/23322fe5d87bb71da922fbc41d658079#file-random_mat_condition_number-py

    Also see: https://math.stackexchange.com/questions/1351616/condition-number-of-ata.

    PARAMETER DESCRIPTION n

    size of the matrix

    TYPE: int

    condition_number

    duh

    TYPE: float

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION NDArray

    An (n,n) matrix with the requested condition number.

    Source code in src/pydvl/utils/numeric.py
    def random_matrix_with_condition_number(\nn: int, condition_number: float, seed: Optional[Seed] = None\n) -> NDArray:\n\"\"\"Constructs a square matrix with a given condition number.\n    Taken from:\n    [https://gist.github.com/bstellato/23322fe5d87bb71da922fbc41d658079#file-random_mat_condition_number-py](\n    https://gist.github.com/bstellato/23322fe5d87bb71da922fbc41d658079#file-random_mat_condition_number-py)\n    Also see:\n    [https://math.stackexchange.com/questions/1351616/condition-number-of-ata](\n    https://math.stackexchange.com/questions/1351616/condition-number-of-ata).\n    Args:\n        n: size of the matrix\n        condition_number: duh\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        An (n,n) matrix with the requested condition number.\n    \"\"\"\nif n < 2:\nraise ValueError(\"Matrix size must be at least 2\")\nif condition_number <= 1:\nraise ValueError(\"Condition number must be greater than 1\")\nrng = np.random.default_rng(seed)\nlog_condition_number = np.log(condition_number)\nexp_vec = np.arange(\n-log_condition_number / 4.0,\nlog_condition_number * (n + 1) / (4 * (n - 1)),\nlog_condition_number / (2.0 * (n - 1)),\n)\nexp_vec = exp_vec[:n]\ns: np.ndarray = np.exp(exp_vec)\nS = np.diag(s)\nU, _ = np.linalg.qr((rng.uniform(size=(n, n)) - 5.0) * 200)\nV, _ = np.linalg.qr((rng.uniform(size=(n, n)) - 5.0) * 200)\nP: np.ndarray = U.dot(S).dot(V.T)\nP = P.dot(P.T)\nreturn P\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.running_moments","title":"running_moments(previous_avg, previous_variance, count, new_value)","text":"

    Uses Welford's algorithm to calculate the running average and variance of a set of numbers.

    See Welford's algorithm in wikipedia

    Warning

    This is not really using Welford's correction for numerical stability for the variance. (FIXME)

    Todo

    This could be generalised to arbitrary moments. See this paper

    PARAMETER DESCRIPTION previous_avg

    average value at previous step

    TYPE: float | NDArray[float_]

    previous_variance

    variance at previous step

    TYPE: float | NDArray[float_]

    count

    number of points seen so far

    TYPE: int

    new_value

    new value in the series of numbers

    TYPE: float | NDArray[float_]

    RETURNS DESCRIPTION Tuple[float | NDArray[float_], float | NDArray[float_]]

    new_average, new_variance, calculated with the new count

    Source code in src/pydvl/utils/numeric.py
    def running_moments(\nprevious_avg: float | NDArray[np.float_],\nprevious_variance: float | NDArray[np.float_],\ncount: int,\nnew_value: float | NDArray[np.float_],\n) -> Tuple[float | NDArray[np.float_], float | NDArray[np.float_]]:\n\"\"\"Uses Welford's algorithm to calculate the running average and variance of\n     a set of numbers.\n    See [Welford's algorithm in wikipedia](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm)\n    !!! Warning\n        This is not really using Welford's correction for numerical stability\n        for the variance. (FIXME)\n    !!! Todo\n        This could be generalised to arbitrary moments. See [this paper](https://www.osti.gov/biblio/1028931)\n    Args:\n        previous_avg: average value at previous step\n        previous_variance: variance at previous step\n        count: number of points seen so far\n        new_value: new value in the series of numbers\n    Returns:\n        new_average, new_variance, calculated with the new count\n    \"\"\"\n# broadcasted operations seem not to be supported by mypy, so we ignore the type\nnew_average = (new_value + count * previous_avg) / (count + 1)  # type: ignore\nnew_variance = previous_variance + (\n(new_value - previous_avg) * (new_value - new_average) - previous_variance\n) / (count + 1)\nreturn new_average, new_variance\n
    "},{"location":"api/pydvl/utils/numeric/#pydvl.utils.numeric.top_k_value_accuracy","title":"top_k_value_accuracy(y_true, y_pred, k=3)","text":"

    Computes the top-k accuracy for the estimated values by comparing indices of the highest k values.

    PARAMETER DESCRIPTION y_true

    Exact/true value

    TYPE: NDArray[float_]

    y_pred

    Predicted/estimated value

    TYPE: NDArray[float_]

    k

    Number of the highest values taken into account

    TYPE: int DEFAULT: 3

    RETURNS DESCRIPTION float

    Accuracy

    Source code in src/pydvl/utils/numeric.py
    def top_k_value_accuracy(\ny_true: NDArray[np.float_], y_pred: NDArray[np.float_], k: int = 3\n) -> float:\n\"\"\"Computes the top-k accuracy for the estimated values by comparing indices\n    of the highest k values.\n    Args:\n        y_true: Exact/true value\n        y_pred: Predicted/estimated value\n        k: Number of the highest values taken into account\n    Returns:\n        Accuracy\n    \"\"\"\ntop_k_exact_values = np.argsort(y_true)[-k:]\ntop_k_pred_values = np.argsort(y_pred)[-k:]\ntop_k_accuracy = len(np.intersect1d(top_k_exact_values, top_k_pred_values)) / k\nreturn top_k_accuracy\n
    "},{"location":"api/pydvl/utils/parallel/","title":"Parallel","text":""},{"location":"api/pydvl/utils/parallel/#pydvl.utils.parallel--this-module-is-deprecated","title":"This module is deprecated","text":"

    Redirects

    Imports from this module will be redirected to pydvl.parallel only until v0.9.0. Please update your imports.

    "},{"location":"api/pydvl/utils/progress/","title":"Progress","text":"

    Warning

    This module is deprecated and will be removed in a future release. It implements a wrapper for the tqdm progress bar iterator for easy toggling, but this functionality is already provided by the disable argument of tqdm.

    "},{"location":"api/pydvl/utils/progress/#pydvl.utils.progress.MockProgress","title":"MockProgress(iterator)","text":"

    Bases: Iterator

    A Naive mock class to use with maybe_progress and tqdm. Mocked methods don't support return values. Mocked properties don't do anything

    Source code in src/pydvl/utils/progress.py
    def __init__(self, iterator: Union[Iterator, Iterable]):\n# Since there is no _it in __dict__ at this point, doing here\n# self._it = iterator\n# results in a call to __getattr__() and the assignment fails, so we\n# use __dict__ instead\nself.__dict__[\"_it\"] = iterator\n
    "},{"location":"api/pydvl/utils/progress/#pydvl.utils.progress.maybe_progress","title":"maybe_progress(it, display=False, **kwargs)","text":"

    Returns either a tqdm progress bar or a mock object which wraps the iterator as well, but ignores any accesses to methods or properties.

    PARAMETER DESCRIPTION it

    the iterator to wrap

    TYPE: Union[int, Iterable, Iterator]

    display

    set to True to return a tqdm bar

    TYPE: bool DEFAULT: False

    kwargs

    Keyword arguments that will be forwarded to tqdm

    DEFAULT: {}

    Source code in src/pydvl/utils/progress.py
    def maybe_progress(\nit: Union[int, Iterable, Iterator], display: bool = False, **kwargs\n) -> Union[tqdm, MockProgress]:\n\"\"\"Returns either a tqdm progress bar or a mock object which wraps the\n    iterator as well, but ignores any accesses to methods or properties.\n    Args:\n        it: the iterator to wrap\n        display: set to True to return a tqdm bar\n        kwargs: Keyword arguments that will be forwarded to tqdm\n    \"\"\"\nif isinstance(it, int):\nit = range(it)  # type: ignore\nreturn tqdm(it, **kwargs) if display else MockProgress(it)\n
    "},{"location":"api/pydvl/utils/score/","title":"Score","text":"

    This module provides a Scorer class that wraps scoring functions with additional information.

    Scorers are the fundamental building block of many data valuation methods. They are typically used by the Utility class to evaluate the quality of a model when trained on subsets of the training data.

    Scorers can be constructed in the same way as in scikit-learn: either from known strings or from a callable. Greater values must be better. If they are not, a negated version can be used, see scikit-learn's make_scorer().

    Scorer provides additional information about the scoring function, like its range and default values, which can be used by some data valuation methods (like group_testing_shapley()) to estimate the number of samples required for a certain quality of approximation.

    "},{"location":"api/pydvl/utils/score/#pydvl.utils.score.squashed_r2","title":"squashed_r2 = compose_score(Scorer('r2'), _sigmoid, (0, 1), 'squashed r2') module-attribute","text":"

    A scorer that squashes the R\u00b2 score into the range [0, 1] using a sigmoid.

    "},{"location":"api/pydvl/utils/score/#pydvl.utils.score.squashed_variance","title":"squashed_variance = compose_score(Scorer('explained_variance'), _sigmoid, (0, 1), 'squashed explained variance') module-attribute","text":"

    A scorer that squashes the explained variance score into the range [0, 1] using a sigmoid.

    "},{"location":"api/pydvl/utils/score/#pydvl.utils.score.ScorerCallable","title":"ScorerCallable","text":"

    Bases: Protocol

    Signature for a scorer

    "},{"location":"api/pydvl/utils/score/#pydvl.utils.score.Scorer","title":"Scorer(scoring, default=np.nan, range=(-np.inf, np.inf), name=None)","text":"

    A scoring callable that takes a model, data, and labels and returns a scalar.

    PARAMETER DESCRIPTION scoring

    Either a string or callable that can be passed to get_scorer.

    TYPE: Union[str, ScorerCallable]

    default

    score to be used when a model cannot be fit, e.g. when too little data is passed, or errors arise.

    TYPE: float DEFAULT: nan

    range

    numerical range of the score function. Some Monte Carlo methods can use this to estimate the number of samples required for a certain quality of approximation. If not provided, it can be read from the scoring object if it provides it, for instance if it was constructed with compose_score().

    TYPE: Tuple DEFAULT: (-inf, inf)

    name

    The name of the scorer. If not provided, the name of the function passed will be used.

    TYPE: Optional[str] DEFAULT: None

    New in version 0.5.0

    Source code in src/pydvl/utils/score.py
    def __init__(\nself,\nscoring: Union[str, ScorerCallable],\ndefault: float = np.nan,\nrange: Tuple = (-np.inf, np.inf),\nname: Optional[str] = None,\n):\nif name is None and isinstance(scoring, str):\nname = scoring\nself._scorer = get_scorer(scoring)\nself.default = default\n# TODO: auto-fill from known scorers ?\nself.range = np.array(range)\nself._name = getattr(self._scorer, \"__name__\", name or \"scorer\")\n
    "},{"location":"api/pydvl/utils/score/#pydvl.utils.score.compose_score","title":"compose_score(scorer, transformation, range, name)","text":"

    Composes a scoring function with an arbitrary scalar transformation.

    Useful to squash unbounded scores into ranges manageable by data valuation methods.

    Example:

    sigmoid = lambda x: 1/(1+np.exp(-x))\ncompose_score(Scorer(\"r2\"), sigmoid, range=(0,1), name=\"squashed r2\")\n
    PARAMETER DESCRIPTION scorer

    The object to be composed.

    TYPE: Scorer

    transformation

    A scalar transformation

    TYPE: Callable[[float], float]

    range

    The range of the transformation. This will be used e.g. by Utility for the range of the composed.

    TYPE: Tuple[float, float]

    name

    A string representation for the composition, for str().

    TYPE: str

    RETURNS DESCRIPTION Scorer

    The composite Scorer.

    Source code in src/pydvl/utils/score.py
    def compose_score(\nscorer: Scorer,\ntransformation: Callable[[float], float],\nrange: Tuple[float, float],\nname: str,\n) -> Scorer:\n\"\"\"Composes a scoring function with an arbitrary scalar transformation.\n    Useful to squash unbounded scores into ranges manageable by data valuation\n    methods.\n    Example:\n    ```python\n    sigmoid = lambda x: 1/(1+np.exp(-x))\n    compose_score(Scorer(\"r2\"), sigmoid, range=(0,1), name=\"squashed r2\")\n    ```\n    Args:\n        scorer: The object to be composed.\n        transformation: A scalar transformation\n        range: The range of the transformation. This will be used e.g. by\n            [Utility][pydvl.utils.utility.Utility] for the range of the composed.\n        name: A string representation for the composition, for `str()`.\n    Returns:\n        The composite [Scorer][pydvl.utils.score.Scorer].\n    \"\"\"\nclass CompositeScorer(Scorer):\ndef __call__(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float:\nscore = self._scorer(model=model, X=X, y=y)\nreturn transformation(score)\nreturn CompositeScorer(scorer, range=range, name=name)\n
    "},{"location":"api/pydvl/utils/status/","title":"Status","text":""},{"location":"api/pydvl/utils/status/#pydvl.utils.status.Status","title":"Status","text":"

    Bases: Enum

    Status of a computation.

    Statuses can be combined using bitwise or (|) and bitwise and (&) to get the status of a combined computation. For example, if we have two computations, one that has converged and one that has failed, then the combined status is Status.Converged | Status.Failed == Status.Converged, but Status.Converged & Status.Failed == Status.Failed.

    "},{"location":"api/pydvl/utils/status/#pydvl.utils.status.Status--or","title":"OR","text":"

    The result of bitwise or-ing two valuation statuses with | is given by the following table:

    P C F P P C P C C C C F P C F

    where P = Pending, C = Converged, F = Failed.

    "},{"location":"api/pydvl/utils/status/#pydvl.utils.status.Status--and","title":"AND","text":"

    The result of bitwise and-ing two valuation statuses with & is given by the following table:

    P C F P P P F C P C F F F F F

    where P = Pending, C = Converged, F = Failed.

    "},{"location":"api/pydvl/utils/status/#pydvl.utils.status.Status--not","title":"NOT","text":"

    The result of bitwise negation of a Status with ~ is Failed if the status is Converged, or Converged otherwise:

    ~P == C, ~C == F, ~F == C\n
    "},{"location":"api/pydvl/utils/status/#pydvl.utils.status.Status--boolean-casting","title":"Boolean casting","text":"

    A Status evaluates to True iff it's Converged or Failed:

    bool(Status.Pending) == False\nbool(Status.Converged) == True\nbool(Status.Failed) == True\n

    Warning

    These truth values are inconsistent with the usual boolean operations. In particular the XOR of two instances of Status is not the same as the XOR of their boolean values.

    "},{"location":"api/pydvl/utils/types/","title":"Types","text":"

    This module contains types, protocols, decorators and generic function transformations. Some of it probably belongs elsewhere.

    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.SupervisedModel","title":"SupervisedModel","text":"

    Bases: Protocol

    This is the minimal Protocol that valuation methods require from models in order to work.

    All that is needed are the standard sklearn methods fit(), predict() and score().

    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.SupervisedModel.fit","title":"fit(x, y)","text":"

    Fit the model to the data

    PARAMETER DESCRIPTION x

    Independent variables

    TYPE: NDArray

    y

    Dependent variable

    TYPE: NDArray

    Source code in src/pydvl/utils/types.py
    def fit(self, x: NDArray, y: NDArray):\n\"\"\"Fit the model to the data\n    Args:\n        x: Independent variables\n        y: Dependent variable\n    \"\"\"\npass\n
    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.SupervisedModel.predict","title":"predict(x)","text":"

    Compute predictions for the input

    PARAMETER DESCRIPTION x

    Independent variables for which to compute predictions

    TYPE: NDArray

    RETURNS DESCRIPTION NDArray

    Predictions for the input

    Source code in src/pydvl/utils/types.py
    def predict(self, x: NDArray) -> NDArray:\n\"\"\"Compute predictions for the input\n    Args:\n        x: Independent variables for which to compute predictions\n    Returns:\n        Predictions for the input\n    \"\"\"\npass\n
    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.SupervisedModel.score","title":"score(x, y)","text":"

    Compute the score of the model given test data

    PARAMETER DESCRIPTION x

    Independent variables

    TYPE: NDArray

    y

    Dependent variable

    TYPE: NDArray

    RETURNS DESCRIPTION float

    The score of the model on (x, y)

    Source code in src/pydvl/utils/types.py
    def score(self, x: NDArray, y: NDArray) -> float:\n\"\"\"Compute the score of the model given test data\n    Args:\n        x: Independent variables\n        y: Dependent variable\n    Returns:\n        The score of the model on `(x, y)`\n    \"\"\"\npass\n
    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.NoPublicConstructor","title":"NoPublicConstructor","text":"

    Bases: ABCMeta

    Metaclass that ensures a private constructor

    If a class uses this metaclass like this:

    class SomeClass(metaclass=NoPublicConstructor):\n    pass\n

    If you try to instantiate your class (SomeClass()), a TypeError will be thrown.

    Taken almost verbatim from: https://stackoverflow.com/a/64682734

    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.NoPublicConstructor.create","title":"create(*args, **kwargs)","text":"

    Create an instance of the class

    Source code in src/pydvl/utils/types.py
    def create(cls, *args: Any, **kwargs: Any):\n\"\"\"Create an instance of the class\"\"\"\nreturn super().__call__(*args, **kwargs)\n
    "},{"location":"api/pydvl/utils/types/#pydvl.utils.types.ensure_seed_sequence","title":"ensure_seed_sequence(seed=None)","text":"

    If the passed seed is a SeedSequence object then it is returned as is. If it is a Generator the internal protected seed sequence from the generator gets extracted. Otherwise, a new SeedSequence object is created from the passed (optional) seed.

    PARAMETER DESCRIPTION seed

    Either an int, a Generator object a SeedSequence object or None.

    TYPE: Optional[Union[Seed, SeedSequence]] DEFAULT: None

    RETURNS DESCRIPTION SeedSequence

    A SeedSequence object.

    New in version 0.7.0

    Source code in src/pydvl/utils/types.py
    def ensure_seed_sequence(\nseed: Optional[Union[Seed, SeedSequence]] = None\n) -> SeedSequence:\n\"\"\"\n    If the passed seed is a SeedSequence object then it is returned as is. If it is\n    a Generator the internal protected seed sequence from the generator gets extracted.\n    Otherwise, a new SeedSequence object is created from the passed (optional) seed.\n    Args:\n        seed: Either an int, a Generator object a SeedSequence object or None.\n    Returns:\n        A SeedSequence object.\n    !!! tip \"New in version 0.7.0\"\n    \"\"\"\nif isinstance(seed, SeedSequence):\nreturn seed\nelif isinstance(seed, Generator):\nreturn cast(SeedSequence, seed.bit_generator.seed_seq)  # type: ignore\nelse:\nreturn SeedSequence(seed)\n
    "},{"location":"api/pydvl/utils/utility/","title":"Utility","text":"

    This module contains classes to manage and learn utility functions for the computation of values. Please see the documentation on Computing Data Values for more information.

    Utility holds information about model, data and scoring function (the latter being what one usually understands under utility in the general definition of Shapley value). It is automatically cached across machines when the cache is configured and it is enabled upon construction.

    DataUtilityLearning adds support for learning the scoring function to avoid repeated re-training of the model to compute the score.

    This module also contains derived Utility classes for toy games that are used for testing and for demonstration purposes.

    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility--references","title":"References","text":"
    1. Wang, T., Yang, Y. and Jia, R., 2021. Improving cooperative game theory-based data valuation via data utility learning. arXiv preprint arXiv:2107.06336.\u00a0\u21a9

    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.Utility","title":"Utility(model, data, scorer=None, *, default_score=0.0, score_range=(-np.inf, np.inf), catch_errors=True, show_warnings=False, enable_cache=False, cache_options=None, clone_before_fit=True)","text":"

    Convenience wrapper with configurable memoization of the scoring function.

    An instance of Utility holds the triple of model, dataset and scoring function which determines the value of data points. This is used for the computation of all game-theoretic values like Shapley values and the Least Core.

    The Utility expect the model to fulfill the SupervisedModel interface i.e. to have fit(), predict(), and score() methods.

    When calling the utility, the model will be cloned if it is a Sci-Kit Learn model, otherwise a copy is created using copy.deepcopy

    Since evaluating the scoring function requires retraining the model and that can be time-consuming, this class wraps it and caches the results of each execution. Caching is available both locally and across nodes, but must always be enabled for your project first, see Setting up the cache.

    ATTRIBUTE DESCRIPTION model

    The supervised model.

    TYPE: SupervisedModel

    data

    An object containing the split data.

    TYPE: Dataset

    scorer

    A scoring function. If None, the score() method of the model will be used. See score for ways to create and compose scorers, in particular how to set default values and ranges.

    TYPE: Scorer

    PARAMETER DESCRIPTION model

    Any supervised model. Typical choices can be found in the [sci-kit learn documentation][https://scikit-learn.org/stable/supervised_learning.html].

    TYPE: SupervisedModel

    data

    Dataset or GroupedDataset instance.

    TYPE: Dataset

    scorer

    A scoring object. If None, the score() method of the model will be used. See score for ways to create and compose scorers, in particular how to set default values and ranges. For convenience, a string can be passed, which will be used to construct a Scorer.

    TYPE: Optional[Union[str, Scorer]] DEFAULT: None

    default_score

    As a convenience when no scorer object is passed (where a default value can be provided), this argument also allows to set the default score for models that have not been fit, e.g. when too little data is passed, or errors arise.

    TYPE: float DEFAULT: 0.0

    score_range

    As with default_score, this is a convenience argument for when no scorer argument is provided, to set the numerical range of the score function. Some Monte Carlo methods can use this to estimate the number of samples required for a certain quality of approximation.

    TYPE: Tuple[float, float] DEFAULT: (-inf, inf)

    catch_errors

    set to True to catch the errors when fit() fails. This could happen in several steps of the pipeline, e.g. when too little training data is passed, which happens often during Shapley value calculations. When this happens, the default_score is returned as a score and computation continues.

    TYPE: bool DEFAULT: True

    show_warnings

    Set to False to suppress warnings thrown by fit().

    TYPE: bool DEFAULT: False

    enable_cache

    If True, use memcached for memoization.

    TYPE: bool DEFAULT: False

    cache_options

    Optional configuration object for memcached.

    TYPE: Optional[MemcachedConfig] DEFAULT: None

    clone_before_fit

    If True, the model will be cloned before calling fit().

    TYPE: bool DEFAULT: True

    Example
    >>> from pydvl.utils import Utility, DataUtilityLearning, Dataset\n>>> from sklearn.linear_model import LinearRegression, LogisticRegression\n>>> from sklearn.datasets import load_iris\n>>> dataset = Dataset.from_sklearn(load_iris(), random_state=16)\n>>> u = Utility(LogisticRegression(random_state=16), dataset)\n>>> u(dataset.indices)\n0.9\n
    Source code in src/pydvl/utils/utility.py
    def __init__(\nself,\nmodel: SupervisedModel,\ndata: Dataset,\nscorer: Optional[Union[str, Scorer]] = None,\n*,\ndefault_score: float = 0.0,\nscore_range: Tuple[float, float] = (-np.inf, np.inf),\ncatch_errors: bool = True,\nshow_warnings: bool = False,\nenable_cache: bool = False,\ncache_options: Optional[MemcachedConfig] = None,\nclone_before_fit: bool = True,\n):\nself.model = self._clone_model(model)\nself.data = data\nif isinstance(scorer, str):\nscorer = Scorer(scorer, default=default_score, range=score_range)\nself.scorer = check_scoring(self.model, scorer)\nself.default_score = scorer.default if scorer is not None else default_score\n# TODO: auto-fill from known scorers ?\nself.score_range = scorer.range if scorer is not None else np.array(score_range)\nself.catch_errors = catch_errors\nself.show_warnings = show_warnings\nself.enable_cache = enable_cache\nself.cache_options: MemcachedConfig = cache_options or MemcachedConfig()\nself.clone_before_fit = clone_before_fit\nself._signature = serialize((hash(self.model), hash(data), hash(scorer)))\nself._initialize_utility_wrapper()\n
    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.Utility.signature","title":"signature property","text":"

    Signature used for caching model results.

    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.Utility.cache_stats","title":"cache_stats: Optional[CacheStats] property","text":"

    Cache statistics are gathered when cache is enabled. See CacheStats for all fields returned.

    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.Utility.__call__","title":"__call__(indices)","text":"PARAMETER DESCRIPTION indices

    a subset of valid indices for the x_train attribute of Dataset.

    TYPE: Iterable[int]

    Source code in src/pydvl/utils/utility.py
    def __call__(self, indices: Iterable[int]) -> float:\n\"\"\"\n    Args:\n        indices: a subset of valid indices for the\n            `x_train` attribute of [Dataset][pydvl.utils.dataset.Dataset].\n    \"\"\"\nutility: float = self._utility_wrapper(frozenset(indices))\nreturn utility\n
    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.DataUtilityLearning","title":"DataUtilityLearning(u, training_budget, model)","text":"

    Implementation of Data Utility Learning (Wang et al., 2022)1.

    This object wraps a Utility and delegates calls to it, up until a given budget (number of iterations). Every tuple of input and output (a so-called utility sample) is stored. Once the budget is exhausted, DataUtilityLearning fits the given model to the utility samples. Subsequent calls will use the learned model to predict the utility instead of delegating.

    PARAMETER DESCRIPTION u

    The Utility to learn.

    TYPE: Utility

    training_budget

    Number of utility samples to collect before fitting the given model.

    TYPE: int

    model

    A supervised regression model

    TYPE: SupervisedModel

    Example
    >>> from pydvl.utils import Utility, DataUtilityLearning, Dataset\n>>> from sklearn.linear_model import LinearRegression, LogisticRegression\n>>> from sklearn.datasets import load_iris\n>>> dataset = Dataset.from_sklearn(load_iris())\n>>> u = Utility(LogisticRegression(), dataset)\n>>> wrapped_u = DataUtilityLearning(u, 3, LinearRegression())\n... # First 3 calls will be computed normally\n>>> for i in range(3):\n...     _ = wrapped_u((i,))\n>>> wrapped_u((1, 2, 3)) # Subsequent calls will be computed using the fit model for DUL\n0.0\n
    Source code in src/pydvl/utils/utility.py
    def __init__(\nself, u: Utility, training_budget: int, model: SupervisedModel\n) -> None:\nself.utility = u\nself.training_budget = training_budget\nself.model = model\nself._current_iteration = 0\nself._is_model_fit = False\nself._utility_samples: Dict[FrozenSet, Tuple[NDArray[np.bool_], float]] = {}\n
    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.DataUtilityLearning.data","title":"data: Dataset property","text":"

    Returns the wrapped utility's Dataset.

    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.MinerGameUtility","title":"MinerGameUtility(n_miners, **kwargs)","text":"

    Bases: Utility

    Toy game utility that is used for testing and demonstration purposes.

    Consider a group of n miners, who have discovered large bars of gold.

    If two miners can carry one piece of gold, then the payoff of a coalition \\(S\\) is:

    \\[{ v(S) = \\left\\{\\begin{array}{lll} \\mid S \\mid / 2 & \\text{, if} & \\mid S \\mid \\text{ is even} \\\\ ( \\mid S \\mid - 1)/2 & \\text{, if} & \\mid S \\mid \\text{ is odd} \\end{array}\\right. }\\]

    If there are more than two miners and there is an even number of miners, then the core consists of the single payoff where each miner gets 1/2.

    If there is an odd number of miners, then the core is empty.

    Taken from Wikipedia

    PARAMETER DESCRIPTION n_miners

    Number of miners that participate in the game.

    TYPE: int

    Source code in src/pydvl/utils/utility.py
    def __init__(self, n_miners: int, **kwargs):\nif n_miners <= 2:\nraise ValueError(f\"n_miners, {n_miners} should be > 2\")\nself.n_miners = n_miners\nx = np.arange(n_miners)[..., np.newaxis]\n# The y values don't matter here\ny = np.zeros_like(x)\nself.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y)\n
    "},{"location":"api/pydvl/utils/utility/#pydvl.utils.utility.GlovesGameUtility","title":"GlovesGameUtility(left, right, **kwargs)","text":"

    Bases: Utility

    Toy game utility that is used for testing and demonstration purposes.

    In this game, some players have a left glove and others a right glove. Single gloves have a worth of zero while pairs have a worth of 1.

    The payoff of a coalition \\(S\\) is:

    \\[{ v(S) = \\min( \\mid S \\cap L \\mid, \\mid S \\cap R \\mid ) }\\]

    Where \\(L\\), respectively \\(R\\), is the set of players with left gloves, respectively right gloves.

    PARAMETER DESCRIPTION left

    Number of players with a left glove.

    TYPE: int

    right

    Number of player with a right glove.

    TYPE: int

    Source code in src/pydvl/utils/utility.py
    def __init__(self, left: int, right: int, **kwargs):\nself.left = left\nself.right = right\nx = np.empty(left + right)[..., np.newaxis]\n# The y values don't matter here\ny = np.zeros_like(x)\nself.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y)\n
    "},{"location":"api/pydvl/value/","title":"Value","text":"

    This module implements algorithms for the exact and approximate computation of values and semi-values.

    See Data valuation for an introduction to the concepts and methods implemented here.

    "},{"location":"api/pydvl/value/result/","title":"Result","text":"

    This module collects types and methods for the inspection of the results of valuation algorithms.

    The most important class is ValuationResult, which provides access to raw values, as well as convenient behaviour as a Sequence with extended indexing and updating abilities, and conversion to pandas DataFrames.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result--operating-on-results","title":"Operating on results","text":"

    Results can be added together with the standard + operator. Because values are typically running averages of iterative algorithms, addition behaves like a weighted average of the two results, with the weights being the number of updates in each result: adding two results is the same as generating one result with the mean of the values of the two results as values. The variances are updated accordingly. See ValuationResult for details.

    Results can also be sorted by value, variance or number of updates, see sort(). The arrays of ValuationResult.values, ValuationResult.variances, ValuationResult.counts, ValuationResult.indices, ValuationResult.names are sorted in the same way.

    Indexing and slicing of results is supported and ValueItem objects are returned. These objects can be compared with the usual operators, which take only the ValueItem.value into account.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result--creating-result-objects","title":"Creating result objects","text":"

    The most commonly used factory method is ValuationResult.zeros(), which creates a result object with all values, variances and counts set to zero. ValuationResult.empty() creates an empty result object, which can be used as a starting point for adding results together. Empty results are discarded when added to other results. Finally, ValuationResult.from_random() samples random values uniformly.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValueItem","title":"ValueItem dataclass","text":"

    Bases: Generic[IndexT, NameT]

    The result of a value computation for one datum.

    ValueItems can be compared with the usual operators, forming a total order. Comparisons take only the value into account.

    Todo

    Maybe have a mode of comparing similar to np.isclose, or taking the variance into account.

    ATTRIBUTE DESCRIPTION index

    Index of the sample with this value in the original Dataset

    TYPE: IndexT

    name

    Name of the sample if it was provided. Otherwise, str(index)

    TYPE: NameT

    value

    The value

    TYPE: float

    variance

    Variance of the value if it was computed with an approximate method

    TYPE: Optional[float]

    count

    Number of updates for this value

    TYPE: Optional[int]

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValueItem.stderr","title":"stderr: Optional[float] property","text":"

    Standard error of the value.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult","title":"ValuationResult(*, values, variances=None, counts=None, indices=None, data_names=None, algorithm='', status=Status.Pending, sort=False, **extra_values)","text":"

    Bases: Sequence, Iterable[ValueItem[IndexT, NameT]], Generic[IndexT, NameT]

    Objects of this class hold the results of valuation algorithms.

    These include indices in the original Dataset, any data names (e.g. group names in GroupedDataset), the values themselves, and variance of the computation in the case of Monte Carlo methods. ValuationResults can be iterated over like any Sequence: iter(valuation_result) returns a generator of ValueItem in the order in which the object is sorted.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult--indexing","title":"Indexing","text":"

    Indexing can be position-based, when accessing any of the attributes values, variances, counts and indices, as well as when iterating over the object, or using the item access operator, both getter and setter. The \"position\" is either the original sequence in which the data was passed to the constructor, or the sequence in which the object is sorted, see below.

    Alternatively, indexing can be data-based, i.e. using the indices in the original dataset. This is the case for the methods get() and update().

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult--sorting","title":"Sorting","text":"

    Results can be sorted in-place with sort(), or alternatively using python's standard sorted() and reversed() Note that sorting values affects how iterators and the object itself as Sequence behave: values[0] returns a ValueItem with the highest or lowest ranking point if this object is sorted by descending or ascending value, respectively. If unsorted, values[0] returns the ValueItem at position 0, which has data index indices[0] in the Dataset.

    The same applies to direct indexing of the ValuationResult: the index is positional, according to the sorting. It does not refer to the \"data index\". To sort according to data index, use sort() with key=\"index\".

    In order to access ValueItem objects by their data index, use get().

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult--operating-on-results","title":"Operating on results","text":"

    Results can be added to each other with the + operator. Means and variances are correctly updated, using the counts attribute.

    Results can also be updated with new values using update(). Means and variances are updated accordingly using the Welford algorithm.

    Empty objects behave in a special way, see empty().

    PARAMETER DESCRIPTION values

    An array of values. If omitted, defaults to an empty array or to an array of zeros if indices are given.

    TYPE: NDArray[float_]

    indices

    An optional array of indices in the original dataset. If omitted, defaults to np.arange(len(values)). Warning: It is common to pass the indices of a Dataset here. Attention must be paid in a parallel context to copy them to the local process. Just do indices=np.copy(data.indices).

    TYPE: Optional[NDArray[IndexT]] DEFAULT: None

    variances

    An optional array of variances in the computation of each value.

    TYPE: Optional[NDArray[float_]] DEFAULT: None

    counts

    An optional array with the number of updates for each value. Defaults to an array of ones.

    TYPE: Optional[NDArray[int_]] DEFAULT: None

    data_names

    Names for the data points. Defaults to index numbers if not set.

    TYPE: Optional[Sequence[NameT] | NDArray[NameT]] DEFAULT: None

    algorithm

    The method used.

    TYPE: str DEFAULT: ''

    status

    The end status of the algorithm.

    TYPE: Status DEFAULT: Pending

    sort

    Whether to sort the indices by ascending value. See above how this affects usage as an iterable or sequence.

    TYPE: bool DEFAULT: False

    extra_values

    Additional values that can be passed as keyword arguments. This can contain, for example, the least core value.

    DEFAULT: {}

    RAISES DESCRIPTION ValueError

    If input arrays have mismatching lengths.

    Source code in src/pydvl/value/result.py
    def __init__(\nself,\n*,\nvalues: NDArray[np.float_],\nvariances: Optional[NDArray[np.float_]] = None,\ncounts: Optional[NDArray[np.int_]] = None,\nindices: Optional[NDArray[IndexT]] = None,\ndata_names: Optional[Sequence[NameT] | NDArray[NameT]] = None,\nalgorithm: str = \"\",\nstatus: Status = Status.Pending,\nsort: bool = False,\n**extra_values,\n):\nif variances is not None and len(variances) != len(values):\nraise ValueError(\"Lengths of values and variances do not match\")\nif data_names is not None and len(data_names) != len(values):\nraise ValueError(\"Lengths of values and data_names do not match\")\nif indices is not None and len(indices) != len(values):\nraise ValueError(\"Lengths of values and indices do not match\")\nself._algorithm = algorithm\nself._status = Status(status)  # Just in case we are given a string\nself._values = values\nself._variances = np.zeros_like(values) if variances is None else variances\nself._counts = np.ones_like(values) if counts is None else counts\nself._sort_order = None\nself._extra_values = extra_values or {}\n# Yuk...\nif data_names is None:\nif indices is not None:\nself._names = np.copy(indices)\nelse:\nself._names = np.arange(len(self._values), dtype=np.int_)\nelif not isinstance(data_names, np.ndarray):\nself._names = np.array(data_names)\nelse:\nself._names = data_names.copy()\nif len(np.unique(self._names)) != len(self._names):\nraise ValueError(\"Data names must be unique\")\nif indices is None:\nindices = np.arange(len(self._values), dtype=np.int_)\nself._indices = indices\nself._positions = {idx: pos for pos, idx in enumerate(indices)}\nself._sort_positions: NDArray[np.int_] = np.arange(\nlen(self._values), dtype=np.int_\n)\nif sort:\nself.sort()\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.values","title":"values: NDArray[np.float_] property","text":"

    The values, possibly sorted.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.variances","title":"variances: NDArray[np.float_] property","text":"

    The variances, possibly sorted.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.stderr","title":"stderr: NDArray[np.float_] property","text":"

    The raw standard errors, possibly sorted.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.counts","title":"counts: NDArray[np.int_] property","text":"

    The raw counts, possibly sorted.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.indices","title":"indices: NDArray[IndexT] property","text":"

    The indices for the values, possibly sorted.

    If the object is unsorted, then these are the same as declared at construction or np.arange(len(values)) if none were passed.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.names","title":"names: NDArray[NameT] property","text":"

    The names for the values, possibly sorted. If the object is unsorted, then these are the same as declared at construction or np.arange(len(values)) if none were passed.

    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.sort","title":"sort(reverse=False, key='value')","text":"

    Sorts the indices in place by key.

    Once sorted, iteration over the results, and indexing of all the properties ValuationResult.values, ValuationResult.variances, ValuationResult.counts, ValuationResult.indices and ValuationResult.names will follow the same order.

    PARAMETER DESCRIPTION reverse

    Whether to sort in descending order by value.

    TYPE: bool DEFAULT: False

    key

    The key to sort by. Defaults to ValueItem.value.

    TYPE: Literal['value', 'variance', 'index', 'name'] DEFAULT: 'value'

    Source code in src/pydvl/value/result.py
    def sort(\nself,\nreverse: bool = False,\n# Need a \"Comparable\" type here\nkey: Literal[\"value\", \"variance\", \"index\", \"name\"] = \"value\",\n) -> None:\n\"\"\"Sorts the indices in place by `key`.\n    Once sorted, iteration over the results, and indexing of all the\n    properties\n    [ValuationResult.values][pydvl.value.result.ValuationResult.values],\n    [ValuationResult.variances][pydvl.value.result.ValuationResult.variances],\n    [ValuationResult.counts][pydvl.value.result.ValuationResult.counts],\n    [ValuationResult.indices][pydvl.value.result.ValuationResult.indices]\n    and [ValuationResult.names][pydvl.value.result.ValuationResult.names]\n    will follow the same order.\n    Args:\n        reverse: Whether to sort in descending order by value.\n        key: The key to sort by. Defaults to\n            [ValueItem.value][pydvl.value.result.ValueItem].\n    \"\"\"\nkeymap = {\n\"index\": \"_indices\",\n\"value\": \"_values\",\n\"variance\": \"_variances\",\n\"name\": \"_names\",\n}\nself._sort_positions = np.argsort(getattr(self, keymap[key]))\nif reverse:\nself._sort_positions = self._sort_positions[::-1]\nself._sort_order = reverse\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.__getattr__","title":"__getattr__(attr)","text":"

    Allows access to extra values as if they were properties of the instance.

    Source code in src/pydvl/value/result.py
    def __getattr__(self, attr: str) -> Any:\n\"\"\"Allows access to extra values as if they were properties of the instance.\"\"\"\n# This is here to avoid a RecursionError when copying or pickling the object\nif attr == \"_extra_values\":\nraise AttributeError()\ntry:\nreturn self._extra_values[attr]\nexcept KeyError as e:\nraise AttributeError(\nf\"{self.__class__.__name__} object has no attribute {attr}\"\n) from e\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.__iter__","title":"__iter__()","text":"

    Iterate over the results returning ValueItem objects. To sort in place before iteration, use sort().

    Source code in src/pydvl/value/result.py
    def __iter__(self) -> Iterator[ValueItem[IndexT, NameT]]:\n\"\"\"Iterate over the results returning [ValueItem][pydvl.value.result.ValueItem] objects.\n    To sort in place before iteration, use [sort()][pydvl.value.result.ValuationResult.sort].\n    \"\"\"\nfor pos in self._sort_positions:\nyield ValueItem(\nself._indices[pos],\nself._names[pos],\nself._values[pos],\nself._variances[pos],\nself._counts[pos],\n)\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.__add__","title":"__add__(other)","text":"

    Adds two ValuationResults.

    The values must have been computed with the same algorithm. An exception to this is if one argument has empty values, in which case the other argument is returned.

    Warning

    Abusing this will introduce numerical errors.

    Means and standard errors are correctly handled. Statuses are added with bit-wise &, see Status. data_names are taken from the left summand, or if unavailable from the right one. The algorithm string is carried over if both terms have the same one or concatenated.

    It is possible to add ValuationResults of different lengths, and with different or overlapping indices. The result will have the union of indices, and the values.

    Warning

    FIXME: Arbitrary extra_values aren't handled.

    Source code in src/pydvl/value/result.py
    def __add__(\nself, other: ValuationResult[IndexT, NameT]\n) -> ValuationResult[IndexT, NameT]:\n\"\"\"Adds two ValuationResults.\n    The values must have been computed with the same algorithm. An exception\n    to this is if one argument has empty values, in which case the other\n    argument is returned.\n    !!! Warning\n        Abusing this will introduce numerical errors.\n    Means and standard errors are correctly handled. Statuses are added with\n    bit-wise `&`, see [Status][pydvl.value.result.Status].\n    `data_names` are taken from the left summand, or if unavailable from\n    the right one. The `algorithm` string is carried over if both terms\n    have the same one or concatenated.\n    It is possible to add ValuationResults of different lengths, and with\n    different or overlapping indices. The result will have the union of\n    indices, and the values.\n    !!! Warning\n        FIXME: Arbitrary `extra_values` aren't handled.\n    \"\"\"\n# empty results\nif len(self.values) == 0:\nreturn other\nif len(other.values) == 0:\nreturn self\nself._check_compatible(other)\nindices = np.union1d(self._indices, other._indices).astype(self._indices.dtype)\nthis_pos = np.searchsorted(indices, self._indices)\nother_pos = np.searchsorted(indices, other._indices)\nn: NDArray[np.int_] = np.zeros_like(indices, dtype=int)\nm: NDArray[np.int_] = np.zeros_like(indices, dtype=int)\nxn: NDArray[np.int_] = np.zeros_like(indices, dtype=float)\nxm: NDArray[np.int_] = np.zeros_like(indices, dtype=float)\nvn: NDArray[np.int_] = np.zeros_like(indices, dtype=float)\nvm: NDArray[np.int_] = np.zeros_like(indices, dtype=float)\nn[this_pos] = self._counts\nxn[this_pos] = self._values\nvn[this_pos] = self._variances\nm[other_pos] = other._counts\nxm[other_pos] = other._values\nvm[other_pos] = other._variances\n# Sample mean of n+m samples from two means of n and m samples\nxnm = (n * xn + m * xm) / (n + m)\n# Sample variance of n+m samples from two sample variances of n and m samples\nvnm = (n * (vn + xn**2) + m * (vm + xm**2)) / (n + m) - xnm**2\nif np.any(vnm < 0):\nif np.any(vnm < -1e-6):\nlogger.warning(\n\"Numerical error in variance computation. \"\nf\"Negative sample variances clipped to 0 in {vnm}\"\n)\nvnm[np.where(vnm < 0)] = 0\n# Merging of names:\n# If an index has the same name in both results, it must be the same.\n# If an index has a name in one result but not the other, the name is\n# taken from the result with the name.\nif self._names.dtype != other._names.dtype:\nif np.can_cast(other._names.dtype, self._names.dtype, casting=\"safe\"):\nother._names = other._names.astype(self._names.dtype)\nlogger.warning(\nf\"Casting ValuationResult.names from {other._names.dtype} to {self._names.dtype}\"\n)\nelse:\nraise TypeError(\nf\"Cannot cast ValuationResult.names from \"\nf\"{other._names.dtype} to {self._names.dtype}\"\n)\nboth_pos = np.intersect1d(this_pos, other_pos)\nif len(both_pos) > 0:\nthis_names: NDArray = np.empty_like(indices, dtype=object)\nother_names: NDArray = np.empty_like(indices, dtype=object)\nthis_names[this_pos] = self._names\nother_names[other_pos] = other._names\nthis_shared_names = np.take(this_names, both_pos)\nother_shared_names = np.take(other_names, both_pos)\nif np.any(this_shared_names != other_shared_names):\nraise ValueError(f\"Mismatching names in ValuationResults\")\nnames = np.empty_like(indices, dtype=self._names.dtype)\nnames[this_pos] = self._names\nnames[other_pos] = other._names\nreturn ValuationResult(\nalgorithm=self.algorithm or other.algorithm or \"\",\nstatus=self.status & other.status,\nindices=indices,\nvalues=xnm,\nvariances=vnm,\ncounts=n + m,\ndata_names=names,\n# FIXME: What to do with extra_values? This is not commutative:\n# extra_values=self._extra_values.update(other._extra_values),\n)\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.update","title":"update(idx, new_value)","text":"

    Updates the result in place with a new value, using running mean and variance.

    PARAMETER DESCRIPTION idx

    Data index of the value to update.

    TYPE: int

    new_value

    New value to add to the result.

    TYPE: float

    RETURNS DESCRIPTION ValuationResult[IndexT, NameT]

    A reference to the same, modified result.

    RAISES DESCRIPTION IndexError

    If the index is not found.

    Source code in src/pydvl/value/result.py
    def update(self, idx: int, new_value: float) -> ValuationResult[IndexT, NameT]:\n\"\"\"Updates the result in place with a new value, using running mean\n    and variance.\n    Args:\n        idx: Data index of the value to update.\n        new_value: New value to add to the result.\n    Returns:\n        A reference to the same, modified result.\n    Raises:\n        IndexError: If the index is not found.\n    \"\"\"\ntry:\npos = self._positions[idx]\nexcept KeyError:\nraise IndexError(f\"Index {idx} not found in ValuationResult\")\nval, var = running_moments(\nself._values[pos], self._variances[pos], self._counts[pos], new_value\n)\nself[pos] = ValueItem(\nindex=cast(IndexT, idx),  # FIXME\nname=self._names[pos],\nvalue=val,\nvariance=var,\ncount=self._counts[pos] + 1,\n)\nreturn self\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.get","title":"get(idx)","text":"

    Retrieves a ValueItem by data index, as opposed to sort index, like the indexing operator.

    RAISES DESCRIPTION IndexError

    If the index is not found.

    Source code in src/pydvl/value/result.py
    def get(self, idx: Integral) -> ValueItem:\n\"\"\"Retrieves a ValueItem by data index, as opposed to sort index, like\n    the indexing operator.\n    Raises:\n         IndexError: If the index is not found.\n    \"\"\"\ntry:\npos = self._positions[idx]\nexcept KeyError:\nraise IndexError(f\"Index {idx} not found in ValuationResult\")\nreturn ValueItem(\nself._indices[pos],\nself._names[pos],\nself._values[pos],\nself._variances[pos],\nself._counts[pos],\n)\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.to_dataframe","title":"to_dataframe(column=None, use_names=False)","text":"

    Returns values as a dataframe.

    PARAMETER DESCRIPTION column

    Name for the column holding the data value. Defaults to the name of the algorithm used.

    TYPE: Optional[str] DEFAULT: None

    use_names

    Whether to use data names instead of indices for the DataFrame's index.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION DataFrame

    A dataframe with two columns, one for the values, with name given as explained in column, and another with standard errors for approximate algorithms. The latter will be named column+'_stderr'.

    Raises: ImportError: If pandas is not installed

    Source code in src/pydvl/value/result.py
    def to_dataframe(\nself, column: Optional[str] = None, use_names: bool = False\n) -> pandas.DataFrame:\n\"\"\"Returns values as a dataframe.\n    Args:\n        column: Name for the column holding the data value. Defaults to\n            the name of the algorithm used.\n        use_names: Whether to use data names instead of indices for the\n            DataFrame's index.\n    Returns:\n        A dataframe with two columns, one for the values, with name\n            given as explained in `column`, and another with standard errors for\n            approximate algorithms. The latter will be named `column+'_stderr'`.\n    Raises:\n         ImportError: If pandas is not installed\n    \"\"\"\nif not pandas:\nraise ImportError(\"Pandas required for DataFrame export\")\ncolumn = column or self._algorithm\ndf = pandas.DataFrame(\nself._values[self._sort_positions],\nindex=self._names[self._sort_positions]\nif use_names\nelse self._indices[self._sort_positions],\ncolumns=[column],\n)\ndf[column + \"_stderr\"] = self.stderr[self._sort_positions]\nreturn df\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.from_random","title":"from_random(size, total=None, seed=None, **kwargs) classmethod","text":"

    Creates a ValuationResult object and fills it with an array of random values from a uniform distribution in [-1,1]. The values can be made to sum up to a given total number (doing so will change their range).

    PARAMETER DESCRIPTION size

    Number of values to generate

    TYPE: int

    total

    If set, the values are normalized to sum to this number (\"efficiency\" property of Shapley values).

    TYPE: Optional[float] DEFAULT: None

    kwargs

    Additional options to pass to the constructor of ValuationResult. Use to override status, names, etc.

    DEFAULT: {}

    RETURNS DESCRIPTION 'ValuationResult'

    A valuation result with its status set to

    'ValuationResult'

    Status.Converged by default.

    RAISES DESCRIPTION ValueError

    If size is less than 1.

    Changed in version 0.6.0

    Added parameter total. Check for zero size

    Source code in src/pydvl/value/result.py
    @classmethod\ndef from_random(\ncls,\nsize: int,\ntotal: Optional[float] = None,\nseed: Optional[Seed] = None,\n**kwargs,\n) -> \"ValuationResult\":\n\"\"\"Creates a [ValuationResult][pydvl.value.result.ValuationResult] object and fills it with an array\n    of random values from a uniform distribution in [-1,1]. The values can\n    be made to sum up to a given total number (doing so will change their range).\n    Args:\n        size: Number of values to generate\n        total: If set, the values are normalized to sum to this number\n            (\"efficiency\" property of Shapley values).\n        kwargs: Additional options to pass to the constructor of\n            [ValuationResult][pydvl.value.result.ValuationResult]. Use to override status, names, etc.\n    Returns:\n        A valuation result with its status set to\n        [Status.Converged][pydvl.utils.status.Status] by default.\n    Raises:\n         ValueError: If `size` is less than 1.\n    !!! tip \"Changed in version 0.6.0\"\n        Added parameter `total`. Check for zero size\n    \"\"\"\nif size < 1:\nraise ValueError(\"Size must be a positive integer\")\nrng = np.random.default_rng(seed)\nvalues = rng.uniform(low=-1, high=1, size=size)\nif total is not None:\nvalues *= total / np.sum(values)\noptions = dict(values=values, status=Status.Converged, algorithm=\"random\")\noptions.update(kwargs)\nreturn cls(**options)  # type: ignore\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.empty","title":"empty(algorithm='', indices=None, data_names=None, n_samples=0) classmethod","text":"

    Creates an empty ValuationResult object.

    Empty results are characterised by having an empty array of values. When another result is added to an empty one, the empty one is discarded.

    PARAMETER DESCRIPTION algorithm

    Name of the algorithm used to compute the values

    TYPE: str DEFAULT: ''

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Source code in src/pydvl/value/result.py
    @classmethod\n@deprecated(\ntarget=True,\ndeprecated_in=\"0.6.0\",\nremove_in=\"0.8.0\",\nargs_mapping=dict(indices=None, data_names=None, n_samples=None),\ntemplate_mgs=\"`%(source_name)s` is deprecated for generating zero-filled \"\n\"results, use `ValuationResult.zeros()` instead.\",\n)\ndef empty(\ncls,\nalgorithm: str = \"\",\nindices: Optional[Sequence[IndexT] | NDArray[IndexT]] = None,\ndata_names: Optional[Sequence[NameT] | NDArray[NameT]] = None,\nn_samples: int = 0,\n) -> ValuationResult:\n\"\"\"Creates an empty [ValuationResult][pydvl.value.result.ValuationResult] object.\n    Empty results are characterised by having an empty array of values. When\n    another result is added to an empty one, the empty one is discarded.\n    Args:\n        algorithm: Name of the algorithm used to compute the values\n    Returns:\n        Object with the results.\n    \"\"\"\nif indices is not None or data_names is not None or n_samples != 0:\nreturn cls.zeros(\nalgorithm=algorithm,\nindices=indices,\ndata_names=data_names,\nn_samples=n_samples,\n)\nreturn cls(algorithm=algorithm, status=Status.Pending, values=np.array([]))\n
    "},{"location":"api/pydvl/value/result/#pydvl.value.result.ValuationResult.zeros","title":"zeros(algorithm='', indices=None, data_names=None, n_samples=0) classmethod","text":"

    Creates an empty ValuationResult object.

    Empty results are characterised by having an empty array of values. When another result is added to an empty one, the empty one is ignored.

    PARAMETER DESCRIPTION algorithm

    Name of the algorithm used to compute the values

    TYPE: str DEFAULT: ''

    indices

    Data indices to use. A copy will be made. If not given, the indices will be set to the range [0, n_samples).

    TYPE: Optional[Sequence[IndexT] | NDArray[IndexT]] DEFAULT: None

    data_names

    Data names to use. A copy will be made. If not given, the names will be set to the string representation of the indices.

    TYPE: Optional[Sequence[NameT] | NDArray[NameT]] DEFAULT: None

    n_samples

    Number of data points whose values are computed. If not given, the length of indices will be used.

    TYPE: int DEFAULT: 0

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Source code in src/pydvl/value/result.py
    @classmethod\ndef zeros(\ncls,\nalgorithm: str = \"\",\nindices: Optional[Sequence[IndexT] | NDArray[IndexT]] = None,\ndata_names: Optional[Sequence[NameT] | NDArray[NameT]] = None,\nn_samples: int = 0,\n) -> ValuationResult:\n\"\"\"Creates an empty [ValuationResult][pydvl.value.result.ValuationResult] object.\n    Empty results are characterised by having an empty array of values. When\n    another result is added to an empty one, the empty one is ignored.\n    Args:\n        algorithm: Name of the algorithm used to compute the values\n        indices: Data indices to use. A copy will be made. If not given,\n            the indices will be set to the range `[0, n_samples)`.\n        data_names: Data names to use. A copy will be made. If not given,\n            the names will be set to the string representation of the indices.\n        n_samples: Number of data points whose values are computed. If\n            not given, the length of `indices` will be used.\n    Returns:\n        Object with the results.\n    \"\"\"\nif indices is None:\nindices = np.arange(n_samples, dtype=np.int_)\nelse:\nindices = np.array(indices, dtype=np.int_)\nreturn cls(\nalgorithm=algorithm,\nstatus=Status.Pending,\nindices=indices,\ndata_names=np.array(data_names, dtype=object)\nif data_names is not None\nelse np.empty_like(indices, dtype=object),\nvalues=np.zeros(len(indices)),\nvariances=np.zeros(len(indices)),\ncounts=np.zeros(len(indices), dtype=np.int_),\n)\n
    "},{"location":"api/pydvl/value/sampler/","title":"Sampler","text":"

    Samplers iterate over subsets of indices.

    The classes in this module are used to iterate over indices and subsets of their complement in the whole set, as required for the computation of marginal utility for semi-values. The elements returned when iterating over any subclass of PowersetSampler are tuples of the form (idx, subset), where idx is the index of the element being added to the subset, and subset is the subset of the complement of idx. The classes in this module are used to iterate over an index set \\(I\\) as required for the computation of marginal utility for semi-values. The elements returned when iterating over any subclass of :class:PowersetSampler are tuples of the form \\((i, S)\\), where \\(i\\) is an index of interest, and \\(S \\subset I \\setminus \\{i\\}\\) is a subset of the complement of \\(i\\).

    The iteration happens in two nested loops. An outer loop iterates over \\(I\\), and an inner loop iterates over the powerset of \\(I \\setminus \\{i\\}\\). The outer iteration can be either sequential or at random.

    Note

    This is the natural mode of iteration for the combinatorial definition of semi-values, in particular Shapley value. For the computation using permutations, adhering to this interface is not ideal, but we stick to it for consistency.

    The samplers are used in the semivalues module to compute any semi-value, in particular Shapley and Beta values, and Banzhaf indices.

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler--slicing-of-samplers","title":"Slicing of samplers","text":"

    The samplers can be sliced for parallel computation. For those which are embarrassingly parallel, this is done by slicing the set of \"outer\" indices and returning new samplers over those slices. This includes all truly powerset-based samplers, such as DeterministicUniformSampler and UniformSampler. In contrast, slicing a PermutationSampler creates a new sampler which iterates over the same indices.

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler","title":"PowersetSampler(indices, index_iteration=IndexIteration.Sequential, outer_indices=None)","text":"

    Bases: ABC, Iterable[SampleT], Generic[IndexT]

    Samplers are custom iterables over subsets of indices.

    Calling iter() on a sampler returns an iterator over tuples of the form \\((i, S)\\), where \\(i\\) is an index of interest, and \\(S \\subset I \\setminus \\{i\\}\\) is a subset of the complement of \\(i\\).

    This is done in two nested loops, where the outer loop iterates over the set of indices, and the inner loop iterates over subsets of the complement of the current index. The outer iteration can be either sequential or at random.

    Note

    Samplers are not iterators themselves, so that each call to iter() e.g. in a for loop creates a new iterator.

    Example
    >>>for idx, s in DeterministicUniformSampler(np.arange(2)):\n>>>    print(s, end=\"\")\n[][2,][][1,]\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler--methods-required-in-subclasses","title":"Methods required in subclasses","text":"

    Samplers must implement a weight() function to be used as a multiplier in Monte Carlo sums, so that the limit expectation coincides with the semi-value.

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler--slicing-of-samplers","title":"Slicing of samplers","text":"

    The samplers can be sliced for parallel computation. For those which are embarrassingly parallel, this is done by slicing the set of \"outer\" indices and returning new samplers over those slices.

    index_iteration: the order in which indices are iterated over\nouter_indices: The set of items (indices) over which to iterate\nwhen sampling. Subsets are taken from the complement of each index\nin succession. For embarrassingly parallel computations, this set\nis sliced and the samplers are used to iterate over the slices.\n
    Source code in src/pydvl/value/sampler.py
    def __init__(\nself,\nindices: NDArray[IndexT],\nindex_iteration: IndexIteration = IndexIteration.Sequential,\nouter_indices: NDArray[IndexT] | None = None,\n):\n\"\"\"\n    Args:\n        indices: The set of items (indices) to sample from.\n        index_iteration: the order in which indices are iterated over\n        outer_indices: The set of items (indices) over which to iterate\n            when sampling. Subsets are taken from the complement of each index\n            in succession. For embarrassingly parallel computations, this set\n            is sliced and the samplers are used to iterate over the slices.\n    \"\"\"\nself._indices = indices\nself._index_iteration = index_iteration\nself._outer_indices = outer_indices if outer_indices is not None else indices\nself._n = len(indices)\nself._n_samples = 0\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler.iterindices","title":"iterindices()","text":"

    Iterates over indices in the order specified at construction.

    this is probably not very useful, but I couldn't decide

    which method is better

    Source code in src/pydvl/value/sampler.py
    def iterindices(self) -> Iterator[IndexT]:\n\"\"\"Iterates over indices in the order specified at construction.\n    FIXME: this is probably not very useful, but I couldn't decide\n      which method is better\n    \"\"\"\nif self._index_iteration is PowersetSampler.IndexIteration.Sequential:\nfor idx in self._outer_indices:\nyield idx\nelif self._index_iteration is PowersetSampler.IndexIteration.Random:\nwhile True:\nyield np.random.choice(self._outer_indices, size=1).item()\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler.__len__","title":"__len__()","text":"

    Returns the number of outer indices over which the sampler iterates.

    Source code in src/pydvl/value/sampler.py
    def __len__(self) -> int:\n\"\"\"Returns the number of outer indices over which the sampler iterates.\"\"\"\nreturn len(self._outer_indices)\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PowersetSampler.weight","title":"weight(n, subset_len) abstractmethod classmethod","text":"

    Factor by which to multiply Monte Carlo samples, so that the mean converges to the desired expression.

    By the Law of Large Numbers, the sample mean of \\(\\delta_i(S_j)\\) converges to the expectation under the distribution from which \\(S_j\\) is sampled.

    \\[ \\frac{1}{m} \\sum_{j = 1}^m \\delta_i (S_j) c (S_j) \\longrightarrow \\underset{S \\sim \\mathcal{D}_{- i}}{\\mathbb{E}} [\\delta_i (S) c ( S)]\\]

    We add a factor \\(c(S_j)\\) in order to have this expectation coincide with the desired expression.

    Source code in src/pydvl/value/sampler.py
    @classmethod\n@abc.abstractmethod\ndef weight(cls, n: int, subset_len: int) -> float:\nr\"\"\"Factor by which to multiply Monte Carlo samples, so that the\n    mean converges to the desired expression.\n    By the Law of Large Numbers, the sample mean of $\\delta_i(S_j)$\n    converges to the expectation under the distribution from which $S_j$ is\n    sampled.\n    $$ \\frac{1}{m}  \\sum_{j = 1}^m \\delta_i (S_j) c (S_j) \\longrightarrow\n       \\underset{S \\sim \\mathcal{D}_{- i}}{\\mathbb{E}} [\\delta_i (S) c (\n       S)]$$\n    We add a factor $c(S_j)$ in order to have this expectation coincide with\n    the desired expression.\n    \"\"\"\n...\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.StochasticSamplerMixin","title":"StochasticSamplerMixin(*args, seed=None, **kwargs)","text":"

    Mixin class for samplers which use a random number generator.

    Source code in src/pydvl/value/sampler.py
    def __init__(self, *args, seed: Optional[Seed] = None, **kwargs):\nsuper().__init__(*args, **kwargs)\nself._rng = np.random.default_rng(seed)\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.DeterministicUniformSampler","title":"DeterministicUniformSampler(indices, *args, **kwargs)","text":"

    Bases: PowersetSampler[IndexT]

    For every index \\(i\\), each subset of the complement indices - {i} is returned.

    Note

    Indices are always iterated over sequentially, irrespective of the value of index_iteration upon construction.

    Example
    >>> for idx, s in DeterministicUniformSampler(np.arange(2)):\n>>>    print(f\"{idx} - {s}\", end=\", \")\n1 - [], 1 - [2], 2 - [], 2 - [1],\n
    PARAMETER DESCRIPTION indices

    The set of items (indices) to sample from.

    TYPE: NDArray[IndexT]

    Source code in src/pydvl/value/sampler.py
    def __init__(self, indices: NDArray[IndexT], *args, **kwargs):\n\"\"\"An iterator to perform uniform deterministic sampling of subsets.\n    For every index $i$, each subset of the complement `indices - {i}` is\n    returned.\n    !!! Note\n        Indices are always iterated over sequentially, irrespective of\n        the value of `index_iteration` upon construction.\n    ??? Example\n        ``` pycon\n        >>> for idx, s in DeterministicUniformSampler(np.arange(2)):\n        >>>    print(f\"{idx} - {s}\", end=\", \")\n        1 - [], 1 - [2], 2 - [], 2 - [1],\n        ```\n    Args:\n        indices: The set of items (indices) to sample from.\n    \"\"\"\n# Force sequential iteration\nkwargs.update({\"index_iteration\": PowersetSampler.IndexIteration.Sequential})\nsuper().__init__(indices, *args, **kwargs)\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.UniformSampler","title":"UniformSampler","text":"

    Bases: StochasticSamplerMixin, PowersetSampler[IndexT]

    An iterator to perform uniform random sampling of subsets.

    Iterating over every index \\(i\\), either in sequence or at random depending on the value of index_iteration, one subset of the complement indices - {i} is sampled with equal probability \\(2^{n-1}\\). The iterator never ends.

    Example

    The code

    for idx, s in UniformSampler(np.arange(3)):\nprint(f\"{idx} - {s}\", end=\", \")\n
    Produces the output:
    0 - [1 4], 1 - [2 3], 2 - [0 1 3], 3 - [], 4 - [2], 0 - [1 3 4], 1 - [0 2]\n(...)\n

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.UniformSampler.weight","title":"weight(n, subset_len) classmethod","text":"

    Correction coming from Monte Carlo integration so that the mean of the marginals converges to the value: the uniform distribution over the powerset of a set with n-1 elements has mass 2^{n-1} over each subset.

    Source code in src/pydvl/value/sampler.py
    @classmethod\ndef weight(cls, n: int, subset_len: int) -> float:\n\"\"\"Correction coming from Monte Carlo integration so that the mean of\n    the marginals converges to the value: the uniform distribution over the\n    powerset of a set with n-1 elements has mass 2^{n-1} over each subset.\"\"\"\nreturn float(2 ** (n - 1)) if n > 0 else 1.0\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.AntitheticSampler","title":"AntitheticSampler","text":"

    Bases: StochasticSamplerMixin, PowersetSampler[IndexT]

    An iterator to perform uniform random sampling of subsets, and their complements.

    Works as :class:~pydvl.value.sampler.UniformSampler, but for every tuple \\((i,S)\\), it subsequently returns \\((i,S^c)\\), where \\(S^c\\) is the complement of the set \\(S\\), including the index \\(i\\) itself.

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PermutationSampler","title":"PermutationSampler","text":"

    Bases: StochasticSamplerMixin, PowersetSampler[IndexT]

    Sample permutations of indices and iterate through each returning increasing subsets, as required for the permutation definition of semi-values.

    This sampler does not implement the two loops described in PowersetSampler. Instead, for a permutation (3,1,4,2), it returns in sequence the tuples of index and sets: (3, {}), (1, {3}), (4, {3,1}) and (2, {3,1,4}).

    Note that the full index set is never returned.

    Warning

    This sampler requires caching to be enabled or computation will be doubled wrt. a \"direct\" implementation of permutation MC

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.PermutationSampler.__getitem__","title":"__getitem__(key)","text":"

    Permutation samplers cannot be split across indices, so we return a copy of the full sampler.

    Source code in src/pydvl/value/sampler.py
    def __getitem__(self, key: slice | list[int]) -> PowersetSampler[IndexT]:\n\"\"\"Permutation samplers cannot be split across indices, so we return\n    a copy of the full sampler.\"\"\"\nreturn super().__getitem__(slice(None))\n
    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.DeterministicPermutationSampler","title":"DeterministicPermutationSampler","text":"

    Bases: PermutationSampler[IndexT]

    Samples all n! permutations of the indices deterministically, and iterates through them, returning sets as required for the permutation-based definition of semi-values.

    Warning

    This sampler requires caching to be enabled or computation will be doubled wrt. a \"direct\" implementation of permutation MC

    Warning

    This sampler is not parallelizable, as it always iterates over the whole set of permutations in the same order. Different processes would always return the same values for all indices.

    "},{"location":"api/pydvl/value/sampler/#pydvl.value.sampler.RandomHierarchicalSampler","title":"RandomHierarchicalSampler","text":"

    Bases: StochasticSamplerMixin, PowersetSampler[IndexT]

    For every index, sample a set size, then a set of that size.

    Todo

    This is unnecessary, but a step towards proper stratified sampling.

    "},{"location":"api/pydvl/value/semivalues/","title":"Semivalues","text":"

    This module provides the core functionality for the computation of generic semi-values. A semi-value is any valuation function with the form:

    \\[v_\\text{semi}(i) = \\sum_{i=1}^n w(k) \\sum_{S \\subset D_{-i}^{(k)}} [U(S_{+i})-U(S)],\\]

    where the coefficients \\(w(k)\\) satisfy the property:

    \\[\\sum_{k=1}^n w(k) = 1.\\] Note

    For implementation consistency, we slightly depart from the common definition of semi-values, which includes a factor \\(1/n\\) in the sum over subsets. Instead, we subsume this factor into the coefficient \\(w(k)\\).

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues--main-components","title":"Main components","text":"

    The computation of a semi-value requires two components:

    1. A subset sampler that generates subsets of the set \\(D\\) of interest.
    2. A coefficient \\(w(k)\\) that assigns a weight to each subset size \\(k\\).

    Samplers can be found in sampler, and can be classified into two categories: powerset samplers and permutation samplers. Powerset samplers generate subsets of \\(D_{-i}\\), while the permutation sampler generates permutations of \\(D\\). The former conform to the above definition of semi-values, while the latter reformulates it as:

    \\[ v(i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)} \\tilde{w}( | \\sigma_{:i} | )[U(\\sigma_{:i} \\cup \\{i\\}) \u2212 U(\\sigma_{:i})], \\]

    where \\(\\sigma_{:i}\\) denotes the set of indices in permutation sigma before the position where \\(i\\) appears (see Data valuation for details), and

    \\[ \\tilde{w} (k) = n \\binom{n - 1}{k} w (k) \\]

    is the weight correction due to the reformulation.

    Warning

    Both PermutationSampler and DeterministicPermutationSampler require caching to be enabled or computation will be doubled wrt. a 'direct' implementation of permutation MC.

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues--computing-semi-values","title":"Computing semi-values","text":"

    Samplers and coefficients can be arbitrarily mixed by means of the main entry point of this module, compute_generic_semivalues. There are several pre-defined coefficients, including the Shapley value of (Ghorbani and Zou, 2019)1, the Banzhaf index of (Wang and Jia)3, and the Beta coefficient of (Kwon and Zou, 2022)2. For each of these methods, there is a convenience wrapper function. Respectively, these are: compute_shapley_semivalues, compute_banzhaf_semivalues, and compute_beta_shapley_semivalues. instead.

    Parallelization and batching

    In order to ensure reproducibility and fine-grained control of parallelization, samples are generated in the main process and then distributed to worker processes for evaluation. For small sample sizes, this can lead to a significant overhead. To avoid this, we temporarily provide an additional argument batch_size to all methods which can improve performance with small models up to an order of magnitude. Note that this argument will be removed before version 1.0 in favour of a more general solution.

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues--references","title":"References","text":"
    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning. In: Proceedings of the 36th International Conference on Machine Learning, PMLR, pp. 2242\u20132251.\u00a0\u21a9

    2. Kwon, Y. and Zou, J., 2022. Beta Shapley: A Unified and Noise-reduced Data Valuation Framework for Machine Learning. In: Proceedings of the 25th International Conference on Artificial Intelligence and Statistics (AISTATS) 2022, Vol. 151. PMLR, Valencia, Spain.\u00a0\u21a9

    3. Wang, J.T. and Jia, R., 2022. Data Banzhaf: A Robust Data Valuation Framework for Machine Learning. ArXiv preprint arXiv:2205.15466.\u00a0\u21a9

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.SVCoefficient","title":"SVCoefficient","text":"

    Bases: Protocol

    The protocol that coefficients for the computation of semi-values must fulfill.

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.SVCoefficient.__call__","title":"__call__(n, k)","text":"

    Computes the coefficient for a given subset size.

    PARAMETER DESCRIPTION n

    Total number of elements in the set.

    TYPE: int

    k

    Size of the subset for which the coefficient is being computed

    TYPE: int

    Source code in src/pydvl/value/semivalues.py
    def __call__(self, n: int, k: int) -> float:\n\"\"\"Computes the coefficient for a given subset size.\n    Args:\n        n: Total number of elements in the set.\n        k: Size of the subset for which the coefficient is being computed\n    \"\"\"\n...\n
    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.SemiValueMode","title":"SemiValueMode","text":"

    Bases: str, Enum

    Enumeration of semi-value modes.

    Deprecation notice

    This enum and the associated methods are deprecated and will be removed in 0.8.0.

    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.compute_generic_semivalues","title":"compute_generic_semivalues(sampler, u, coefficient, done, *, batch_size=1, n_jobs=1, config=ParallelConfig(), progress=False)","text":"

    Computes semi-values for a given utility function and subset sampler.

    PARAMETER DESCRIPTION sampler

    The subset sampler to use for utility computations.

    TYPE: PowersetSampler[IndexT]

    u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    coefficient

    The semi-value coefficient

    TYPE: SVCoefficient

    done

    Stopping criterion.

    TYPE: StoppingCriterion

    batch_size

    Number of marginal evaluations per single parallel job.

    TYPE: int DEFAULT: 1

    n_jobs

    Number of parallel jobs to use.

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Deprecation notice

    Parameter batch_size is for experimental use and will be removed in future versions.

    Source code in src/pydvl/value/semivalues.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.9.0\",\nargs_mapping={\"batch_size\": None},\ntemplate_mgs=\"batch_size is for experimental use and will be removed\"\n\"in future versions.\",\n)\ndef compute_generic_semivalues(\nsampler: PowersetSampler[IndexT],\nu: Utility,\ncoefficient: SVCoefficient,\ndone: StoppingCriterion,\n*,\nbatch_size: int = 1,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\n) -> ValuationResult:\n\"\"\"Computes semi-values for a given utility function and subset sampler.\n    Args:\n        sampler: The subset sampler to use for utility computations.\n        u: Utility object with model, data, and scoring function.\n        coefficient: The semi-value coefficient\n        done: Stopping criterion.\n        batch_size: Number of marginal evaluations per single parallel job.\n        n_jobs: Number of parallel jobs to use.\n        config: Object configuring parallel computation, with cluster\n            address, number of cpus, etc.\n        progress: Whether to display a progress bar.\n    Returns:\n        Object with the results.\n    !!! warning \"Deprecation notice\"\n        Parameter `batch_size` is for experimental use and will be removed in\n        future versions.\n    \"\"\"\nfrom concurrent.futures import FIRST_COMPLETED, Future, wait\nfrom pydvl.parallel import effective_n_jobs, init_executor, init_parallel_backend\nif isinstance(sampler, PermutationSampler) and not u.enable_cache:\nlog.warning(\n\"PermutationSampler requires caching to be enabled or computation \"\n\"will be doubled wrt. a 'direct' implementation of permutation MC\"\n)\nif batch_size != 1:\nwarnings.warn(\n\"Parameter `batch_size` is for experimental use and will be\"\n\" removed in future versions\",\nDeprecationWarning,\n)\nresult = ValuationResult.zeros(\nalgorithm=f\"semivalue-{str(sampler)}-{coefficient.__name__}\",  # type: ignore\nindices=u.data.indices,\ndata_names=u.data.data_names,\n)\nparallel_backend = init_parallel_backend(config)\nu = parallel_backend.put(u)\ncorrection = parallel_backend.put(\nlambda n, k: coefficient(n, k) * sampler.weight(n, k)\n)\nmax_workers = effective_n_jobs(n_jobs, config)\nn_submitted_jobs = 2 * max_workers  # number of jobs in the queue\nsampler_it = iter(sampler)\npbar = tqdm(disable=not progress, total=100, unit=\"%\")\nwith init_executor(\nmax_workers=max_workers, config=config, cancel_futures=True\n) as executor:\npending: set[Future] = set()\nwhile True:\npbar.n = 100 * done.completion()\npbar.refresh()\ncompleted, pending = wait(pending, timeout=1, return_when=FIRST_COMPLETED)\nfor future in completed:\nfor idx, marginal in future.result():\nresult.update(idx, marginal)\nif done(result):\nreturn result\n# Ensure that we always have n_submitted_jobs running\ntry:\nfor _ in range(n_submitted_jobs - len(pending)):\nsamples = tuple(islice(sampler_it, batch_size))\nif len(samples) == 0:\nraise StopIteration\npending.add(\nexecutor.submit(\n_marginal, u=u, coefficient=correction, samples=samples\n)\n)\nexcept StopIteration:\nif len(pending) == 0:\nreturn result\n
    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.compute_shapley_semivalues","title":"compute_shapley_semivalues(u, *, done=MaxUpdates(100), sampler_t=PermutationSampler, batch_size=1, n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Computes Shapley values for a given utility function.

    This is a convenience wrapper for compute_generic_semivalues with the Shapley coefficient. Use compute_shapley_values for a more flexible interface and additional methods, including TMCS.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    done

    Stopping criterion.

    TYPE: StoppingCriterion DEFAULT: MaxUpdates(100)

    sampler_t

    The sampler type to use. See the sampler module for a list.

    TYPE: Type[StochasticSampler] DEFAULT: PermutationSampler

    batch_size

    Number of marginal evaluations per single parallel job.

    TYPE: int DEFAULT: 1

    n_jobs

    Number of parallel jobs to use.

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Deprecation notice

    Parameter batch_size is for experimental use and will be removed in future versions.

    Source code in src/pydvl/value/semivalues.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.9.0\",\nargs_mapping={\"batch_size\": None},\ntemplate_mgs=\"batch_size is for experimental use and will be removed\"\n\"in future versions.\",\n)\ndef compute_shapley_semivalues(\nu: Utility,\n*,\ndone: StoppingCriterion = MaxUpdates(100),\nsampler_t: Type[StochasticSampler] = PermutationSampler,\nbatch_size: int = 1,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None,\n) -> ValuationResult:\n\"\"\"Computes Shapley values for a given utility function.\n    This is a convenience wrapper for\n    [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues]\n    with the Shapley coefficient. Use\n    [compute_shapley_values][pydvl.value.shapley.common.compute_shapley_values]\n    for a more flexible interface and additional methods, including TMCS.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        done: Stopping criterion.\n        sampler_t: The sampler type to use. See the\n            [sampler][pydvl.value.sampler] module for a list.\n        batch_size: Number of marginal evaluations per single parallel job.\n        n_jobs: Number of parallel jobs to use.\n        config: Object configuring parallel computation, with cluster\n            address, number of cpus, etc.\n        seed: Either an instance of a numpy random number generator or a seed\n            for it.\n        progress: Whether to display a progress bar.\n    Returns:\n        Object with the results.\n    !!! warning \"Deprecation notice\"\n        Parameter `batch_size` is for experimental use and will be removed in\n        future versions.\n    \"\"\"\n# HACK: cannot infer return type because of useless IndexT, NameT\nreturn compute_generic_semivalues(  # type: ignore\nsampler_t(u.data.indices, seed=seed),\nu,\nshapley_coefficient,\ndone,\nbatch_size=batch_size,\nn_jobs=n_jobs,\nconfig=config,\nprogress=progress,\n)\n
    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.compute_banzhaf_semivalues","title":"compute_banzhaf_semivalues(u, *, done=MaxUpdates(100), sampler_t=PermutationSampler, batch_size=1, n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Computes Banzhaf values for a given utility function.

    This is a convenience wrapper for compute_generic_semivalues with the Banzhaf coefficient.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    done

    Stopping criterion.

    TYPE: StoppingCriterion DEFAULT: MaxUpdates(100)

    sampler_t

    The sampler type to use. See the sampler module for a list.

    TYPE: Type[StochasticSampler] DEFAULT: PermutationSampler

    batch_size

    Number of marginal evaluations per single parallel job.

    TYPE: int DEFAULT: 1

    n_jobs

    Number of parallel jobs to use.

    TYPE: int DEFAULT: 1

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Deprecation notice

    Parameter batch_size is for experimental use and will be removed in future versions.

    Source code in src/pydvl/value/semivalues.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.9.0\",\nargs_mapping={\"batch_size\": None},\ntemplate_mgs=\"batch_size is for experimental use and will be removed\"\n\"in future versions.\",\n)\ndef compute_banzhaf_semivalues(\nu: Utility,\n*,\ndone: StoppingCriterion = MaxUpdates(100),\nsampler_t: Type[StochasticSampler] = PermutationSampler,\nbatch_size: int = 1,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None,\n) -> ValuationResult:\n\"\"\"Computes Banzhaf values for a given utility function.\n    This is a convenience wrapper for\n    [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues]\n    with the Banzhaf coefficient.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        done: Stopping criterion.\n        sampler_t: The sampler type to use. See the\n            [sampler][pydvl.value.sampler] module for a list.\n        batch_size: Number of marginal evaluations per single parallel job.\n        n_jobs: Number of parallel jobs to use.\n        seed: Either an instance of a numpy random number generator or a seed\n            for it.\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        progress: Whether to display a progress bar.\n    Returns:\n        Object with the results.\n    !!! warning \"Deprecation notice\"\n        Parameter `batch_size` is for experimental use and will be removed in\n        future versions.\n    \"\"\"\n# HACK: cannot infer return type because of useless IndexT, NameT\nreturn compute_generic_semivalues(  # type: ignore\nsampler_t(u.data.indices, seed=seed),\nu,\nbanzhaf_coefficient,\ndone,\nbatch_size=batch_size,\nn_jobs=n_jobs,\nconfig=config,\nprogress=progress,\n)\n
    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.compute_beta_shapley_semivalues","title":"compute_beta_shapley_semivalues(u, *, alpha=1, beta=1, done=MaxUpdates(100), sampler_t=PermutationSampler, batch_size=1, n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Computes Beta Shapley values for a given utility function.

    This is a convenience wrapper for compute_generic_semivalues with the Beta Shapley coefficient.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    alpha

    Alpha parameter of the Beta distribution.

    TYPE: float DEFAULT: 1

    beta

    Beta parameter of the Beta distribution.

    TYPE: float DEFAULT: 1

    done

    Stopping criterion.

    TYPE: StoppingCriterion DEFAULT: MaxUpdates(100)

    sampler_t

    The sampler type to use. See the sampler module for a list.

    TYPE: Type[StochasticSampler] DEFAULT: PermutationSampler

    batch_size

    Number of marginal evaluations per (parallelized) task.

    TYPE: int DEFAULT: 1

    n_jobs

    Number of parallel jobs to use.

    TYPE: int DEFAULT: 1

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Deprecation notice

    Parameter batch_size is for experimental use and will be removed in future versions.

    Source code in src/pydvl/value/semivalues.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.9.0\",\nargs_mapping={\"batch_size\": None},\ntemplate_mgs=\"batch_size is for experimental use and will be removed\"\n\"in future versions.\",\n)\ndef compute_beta_shapley_semivalues(\nu: Utility,\n*,\nalpha: float = 1,\nbeta: float = 1,\ndone: StoppingCriterion = MaxUpdates(100),\nsampler_t: Type[StochasticSampler] = PermutationSampler,\nbatch_size: int = 1,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None,\n) -> ValuationResult:\n\"\"\"Computes Beta Shapley values for a given utility function.\n    This is a convenience wrapper for\n    [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues]\n    with the Beta Shapley coefficient.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        alpha: Alpha parameter of the Beta distribution.\n        beta: Beta parameter of the Beta distribution.\n        done: Stopping criterion.\n        sampler_t: The sampler type to use. See the\n            [sampler][pydvl.value.sampler] module for a list.\n        batch_size: Number of marginal evaluations per (parallelized) task.\n        n_jobs: Number of parallel jobs to use.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n        config: Object configuring parallel computation, with cluster address, number of\n            cpus, etc.\n        progress: Whether to display a progress bar.\n    Returns:\n        Object with the results.\n    !!! warning \"Deprecation notice\"\n        Parameter `batch_size` is for experimental use and will be removed in\n        future versions.\n    \"\"\"\n# HACK: cannot infer return type because of useless IndexT, NameT\nreturn compute_generic_semivalues(  # type: ignore\nsampler_t(u.data.indices, seed=seed),\nu,\nbeta_coefficient(alpha, beta),\ndone,\nbatch_size=batch_size,\nn_jobs=n_jobs,\nconfig=config,\nprogress=progress,\n)\n
    "},{"location":"api/pydvl/value/semivalues/#pydvl.value.semivalues.compute_semivalues","title":"compute_semivalues(u, *, done=MaxUpdates(100), mode=SemiValueMode.Shapley, sampler_t=PermutationSampler[IndexT], batch_size=1, n_jobs=1, seed=None, **kwargs)","text":"

    Convenience entry point for most common semi-value computations.

    Deprecation warning

    This method is deprecated and will be replaced in 0.8.0 by the more general implementation of compute_generic_semivalues. Use compute_shapley_semivalues, compute_banzhaf_semivalues, or compute_beta_shapley_semivalues instead.

    The modes supported with this interface are the following. For greater flexibility use compute_generic_semivalues directly.

    • SemiValueMode.Shapley: Shapley values.
    • SemiValueMode.BetaShapley: Implements the Beta Shapley semi-value as introduced in (Kwon and Zou, 2022)1. Pass additional keyword arguments alpha and beta to set the parameters of the Beta distribution (both default to 1).
    • SemiValueMode.Banzhaf: Implements the Banzhaf semi-value as introduced in (Wang and Jia, 2022)1.

    See [[data-valuation]] for an overview of valuation. - SemiValueMode.Banzhaf: Implements the Banzhaf semi-value as introduced in [@wang_data_2022].

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    done

    Stopping criterion.

    TYPE: StoppingCriterion DEFAULT: MaxUpdates(100)

    mode

    The semi-value mode to use. See SemiValueMode for a list.

    TYPE: SemiValueMode DEFAULT: Shapley

    sampler_t

    The sampler type to use. See sampler for a list.

    TYPE: Type[StochasticSampler[IndexT]] DEFAULT: PermutationSampler[IndexT]

    batch_size

    Number of marginal evaluations per (parallelized) task.

    TYPE: int DEFAULT: 1

    n_jobs

    Number of parallel jobs to use.

    TYPE: int DEFAULT: 1

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    kwargs

    Additional keyword arguments passed to compute_generic_semivalues.

    DEFAULT: {}

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Deprecation notice

    Parameter batch_size is for experimental use and will be removed in future versions.

    Source code in src/pydvl/value/semivalues.py
    @deprecated(target=True, deprecated_in=\"0.7.0\", remove_in=\"0.8.0\")\ndef compute_semivalues(\nu: Utility,\n*,\ndone: StoppingCriterion = MaxUpdates(100),\nmode: SemiValueMode = SemiValueMode.Shapley,\nsampler_t: Type[StochasticSampler[IndexT]] = PermutationSampler[IndexT],\nbatch_size: int = 1,\nn_jobs: int = 1,\nseed: Optional[Seed] = None,\n**kwargs,\n) -> ValuationResult:\n\"\"\"Convenience entry point for most common semi-value computations.\n    !!! warning \"Deprecation warning\"\n        This method is deprecated and will be replaced in 0.8.0 by the more\n        general implementation of\n        [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues].\n        Use\n        [compute_shapley_semivalues][pydvl.value.semivalues.compute_shapley_semivalues],\n        [compute_banzhaf_semivalues][pydvl.value.semivalues.compute_banzhaf_semivalues],\n        or\n        [compute_beta_shapley_semivalues][pydvl.value.semivalues.compute_beta_shapley_semivalues]\n        instead.\n    The modes supported with this interface are the following. For greater\n    flexibility use\n    [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues]\n    directly.\n    - [SemiValueMode.Shapley][pydvl.value.semivalues.SemiValueMode]:\n      Shapley values.\n    - [SemiValueMode.BetaShapley][pydvl.value.semivalues.SemiValueMode.BetaShapley]:\n      Implements the Beta Shapley semi-value as introduced in\n      (Kwon and Zou, 2022)<sup><a href=\"#kwon_beta_2022\">1</a></sup>.\n      Pass additional keyword arguments `alpha` and `beta` to set the\n      parameters of the Beta distribution (both default to 1).\n    - [SemiValueMode.Banzhaf][SemiValueMode.Banzhaf]: Implements the Banzhaf\n      semi-value as introduced in (Wang and Jia, 2022)<sup><a href=\"#wang_data_2022\">1</a></sup>.\n    See [[data-valuation]] for an overview of valuation.\n    - [SemiValueMode.Banzhaf][pydvl.value.semivalues.SemiValueMode]: Implements\n      the Banzhaf semi-value as introduced in [@wang_data_2022].\n    Args:\n        u: Utility object with model, data, and scoring function.\n        done: Stopping criterion.\n        mode: The semi-value mode to use. See\n            [SemiValueMode][pydvl.value.semivalues.SemiValueMode] for a list.\n        sampler_t: The sampler type to use. See [sampler][pydvl.value.sampler]\n            for a list.\n        batch_size: Number of marginal evaluations per (parallelized) task.\n        n_jobs: Number of parallel jobs to use.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n        kwargs: Additional keyword arguments passed to\n            [compute_generic_semivalues][pydvl.value.semivalues.compute_generic_semivalues].\n    Returns:\n        Object with the results.\n    !!! warning \"Deprecation notice\"\n        Parameter `batch_size` is for experimental use and will be removed in\n        future versions.\n    \"\"\"\nif mode == SemiValueMode.Shapley:\ncoefficient = shapley_coefficient\nelif mode == SemiValueMode.BetaShapley:\nalpha = kwargs.pop(\"alpha\", 1)\nbeta = kwargs.pop(\"beta\", 1)\ncoefficient = beta_coefficient(alpha, beta)\nelif mode == SemiValueMode.Banzhaf:\ncoefficient = banzhaf_coefficient\nelse:\nraise ValueError(f\"Unknown mode {mode}\")\ncoefficient = cast(SVCoefficient, coefficient)\n# HACK: cannot infer return type because of useless IndexT, NameT\nreturn compute_generic_semivalues(  # type: ignore\nsampler_t(u.data.indices, seed=seed),\nu,\ncoefficient,\ndone,\nn_jobs=n_jobs,\nbatch_size=batch_size,\n**kwargs,\n)\n
    "},{"location":"api/pydvl/value/stopping/","title":"Stopping","text":"

    Stopping criteria for value computations.

    This module provides a basic set of stopping criteria, like MaxUpdates, MaxTime, or HistoryDeviation among others. These can behave in different ways depending on the context. For example, MaxUpdates limits the number of updates to values, which depending on the algorithm may mean a different number of utility evaluations or imply other computations like solving a linear or quadratic program.

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping--creating-stopping-criteria","title":"Creating stopping criteria","text":"

    The easiest way is to declare a function implementing the interface StoppingCriterionCallable and wrap it with make_criterion(). This creates a StoppingCriterion object that can be composed with other stopping criteria.

    Alternatively, and in particular if reporting of completion is required, one can inherit from this class and implement the abstract methods _check and completion.

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping--composing-stopping-criteria","title":"Composing stopping criteria","text":"

    Objects of type StoppingCriterion can be composed with the binary operators & (and), and | (or), following the truth tables of Status. The unary operator ~ (not) is also supported. See StoppingCriterion for details on how these operations affect the behavior of the stopping criteria.

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping--references","title":"References","text":"
    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning. In: Proceedings of the 36th International Conference on Machine Learning, PMLR, pp. 2242\u20132251.\u00a0\u21a9

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterionCallable","title":"StoppingCriterionCallable","text":"

    Bases: Protocol

    Signature for a stopping criterion

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterion","title":"StoppingCriterion(modify_result=True)","text":"

    Bases: ABC

    A composable callable object to determine whether a computation must stop.

    A StoppingCriterion is a callable taking a ValuationResult and returning a Status. It also keeps track of individual convergence of values with converged, and reports the overall completion of the computation with completion.

    Instances of StoppingCriterion can be composed with the binary operators & (and), and | (or), following the truth tables of Status. The unary operator ~ (not) is also supported. These boolean operations act according to the following rules:

    • The results of _check are combined with the operator. See Status for the truth tables.
    • The results of converged are combined with the operator (returning another boolean array).
    • The completion method returns the min, max, or the complement to 1 of the completions of the operands, for AND, OR and NOT respectively. This is required for cases where one of the criteria does not keep track of the convergence of single values, e.g. MaxUpdates, because completion by default returns the mean of the boolean convergence array.
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterion--subclassing","title":"Subclassing","text":"

    Subclassing this class requires implementing a _check method that returns a Status object based on a given ValuationResult. This method should update the attribute _converged, which is a boolean array indicating whether the value for each index has converged. When this does not make sense for a particular stopping criterion, completion should be overridden to provide an overall completion value, since its default implementation attempts to compute the mean of _converged.

    PARAMETER DESCRIPTION modify_result

    If True the status of the input ValuationResult is modified in place after the call.

    TYPE: bool DEFAULT: True

    Source code in src/pydvl/value/stopping.py
    def __init__(self, modify_result: bool = True):\nself.modify_result = modify_result\nself._converged = np.full(0, False)\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterion.converged","title":"converged: NDArray[np.bool_] property","text":"

    Returns a boolean array indicating whether the values have converged for each data point.

    Inheriting classes must set the _converged attribute in their _check.

    RETURNS DESCRIPTION NDArray[bool_]

    A boolean array indicating whether the values have converged for

    NDArray[bool_]

    each data point.

    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterion.completion","title":"completion()","text":"

    Returns a value between 0 and 1 indicating the completion of the computation.

    Source code in src/pydvl/value/stopping.py
    def completion(self) -> float:\n\"\"\"Returns a value between 0 and 1 indicating the completion of the\n    computation.\n    \"\"\"\nif self.converged.size == 0:\nreturn 0.0\nreturn float(np.mean(self.converged).item())\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.StoppingCriterion.__call__","title":"__call__(result)","text":"

    Calls _check, maybe updating the result.

    Source code in src/pydvl/value/stopping.py
    def __call__(self, result: ValuationResult) -> Status:\n\"\"\"Calls [_check][pydvl.value.stopping.StoppingCriterion._check], maybe updating the result.\"\"\"\nif len(result) == 0:\nlogger.warning(\n\"At least one iteration finished but no results where generated. \"\n\"Please check that your scorer and utility return valid numbers.\"\n)\nstatus = self._check(result)\nif self.modify_result:  # FIXME: this is not nice\nresult._status = status\nreturn status\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.AbsoluteStandardError","title":"AbsoluteStandardError(threshold, fraction=1.0, burn_in=4, modify_result=True)","text":"

    Bases: StoppingCriterion

    Determine convergence based on the standard error of the values.

    If \\(s_i\\) is the standard error for datum \\(i\\) and \\(v_i\\) its value, then this criterion returns Converged if \\(s_i < \\epsilon\\) for all \\(i\\) and a threshold value \\(\\epsilon \\gt 0\\).

    PARAMETER DESCRIPTION threshold

    A value is considered to have converged if the standard error is below this value. A way of choosing it is to pick some percentage of the range of the values. For Shapley values this is the difference between the maximum and minimum of the utility function (to see this substitute the maximum and minimum values of the utility into the marginal contribution formula).

    TYPE: float

    fraction

    The fraction of values that must have converged for the criterion to return Converged.

    TYPE: float DEFAULT: 1.0

    burn_in

    The number of iterations to ignore before checking for convergence. This is required because computations typically start with zero variance, as a result of using empty(). The default is set to an arbitrary minimum which is usually enough but may need to be increased.

    TYPE: int DEFAULT: 4

    Source code in src/pydvl/value/stopping.py
    def __init__(\nself,\nthreshold: float,\nfraction: float = 1.0,\nburn_in: int = 4,\nmodify_result: bool = True,\n):\nsuper().__init__(modify_result=modify_result)\nself.threshold = threshold\nself.fraction = fraction\nself.burn_in = burn_in\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.MaxChecks","title":"MaxChecks(n_checks, modify_result=True)","text":"

    Bases: StoppingCriterion

    Terminate as soon as the number of checks exceeds the threshold.

    A \"check\" is one call to the criterion.

    PARAMETER DESCRIPTION n_checks

    Threshold: if None, no _check is performed, effectively creating a (never) stopping criterion that always returns Pending.

    TYPE: Optional[int]

    Source code in src/pydvl/value/stopping.py
    def __init__(self, n_checks: Optional[int], modify_result: bool = True):\nsuper().__init__(modify_result=modify_result)\nif n_checks is not None and n_checks < 1:\nraise ValueError(\"n_iterations must be at least 1 or None\")\nself.n_checks = n_checks\nself._count = 0\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.MaxUpdates","title":"MaxUpdates(n_updates, modify_result=True)","text":"

    Bases: StoppingCriterion

    Terminate if any number of value updates exceeds or equals the given threshold.

    Note

    If you want to ensure that all values have been updated, you probably want MinUpdates instead.

    This checks the counts field of a ValuationResult, i.e. the number of times that each index has been updated. For powerset samplers, the maximum of this number coincides with the maximum number of subsets sampled. For permutation samplers, it coincides with the number of permutations sampled.

    PARAMETER DESCRIPTION n_updates

    Threshold: if None, no _check is performed, effectively creating a (never) stopping criterion that always returns Pending.

    TYPE: Optional[int]

    Source code in src/pydvl/value/stopping.py
    def __init__(self, n_updates: Optional[int], modify_result: bool = True):\nsuper().__init__(modify_result=modify_result)\nif n_updates is not None and n_updates < 1:\nraise ValueError(\"n_updates must be at least 1 or None\")\nself.n_updates = n_updates\nself.last_max = 0\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.MinUpdates","title":"MinUpdates(n_updates, modify_result=True)","text":"

    Bases: StoppingCriterion

    Terminate as soon as all value updates exceed or equal the given threshold.

    This checks the counts field of a ValuationResult, i.e. the number of times that each index has been updated. For powerset samplers, the minimum of this number is a lower bound for the number of subsets sampled. For permutation samplers, it lower-bounds the amount of permutations sampled.

    PARAMETER DESCRIPTION n_updates

    Threshold: if None, no _check is performed, effectively creating a (never) stopping criterion that always returns Pending.

    TYPE: Optional[int]

    Source code in src/pydvl/value/stopping.py
    def __init__(self, n_updates: Optional[int], modify_result: bool = True):\nsuper().__init__(modify_result=modify_result)\nself.n_updates = n_updates\nself.last_min = 0\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.MaxTime","title":"MaxTime(seconds, modify_result=True)","text":"

    Bases: StoppingCriterion

    Terminate if the computation time exceeds the given number of seconds.

    Checks the elapsed time since construction

    PARAMETER DESCRIPTION seconds

    Threshold: The computation is terminated if the elapsed time between object construction and a _check exceeds this value. If None, no _check is performed, effectively creating a (never) stopping criterion that always returns Pending.

    TYPE: Optional[float]

    Source code in src/pydvl/value/stopping.py
    def __init__(self, seconds: Optional[float], modify_result: bool = True):\nsuper().__init__(modify_result=modify_result)\nself.max_seconds = seconds or np.inf\nif self.max_seconds <= 0:\nraise ValueError(\"Number of seconds for MaxTime must be positive or None\")\nself.start = time()\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.HistoryDeviation","title":"HistoryDeviation(n_steps, rtol, pin_converged=True, modify_result=True)","text":"

    Bases: StoppingCriterion

    A simple check for relative distance to a previous step in the computation.

    The method used by (Ghorbani and Zou, 2019)1 computes the relative distances between the current values \\(v_i^t\\) and the values at the previous checkpoint \\(v_i^{t-\\tau}\\). If the sum is below a given threshold, the computation is terminated.

    \\[\\sum_{i=1}^n \\frac{\\left| v_i^t - v_i^{t-\\tau} \\right|}{v_i^t} < \\epsilon.\\]

    When the denominator is zero, the summand is set to the value of \\(v_i^{ t-\\tau}\\).

    This implementation is slightly generalised to allow for different number of updates to individual indices, as happens with powerset samplers instead of permutations. Every subset of indices that is found to converge can be pinned to that state. Once all indices have converged the method has converged.

    Warning

    This criterion is meant for the reproduction of the results in the paper, but we do not recommend using it in practice.

    PARAMETER DESCRIPTION n_steps

    Checkpoint values every so many updates and use these saved values to compare.

    TYPE: int

    rtol

    Relative tolerance for convergence (\\(\\epsilon\\) in the formula).

    TYPE: float

    pin_converged

    If True, once an index has converged, it is pinned

    TYPE: bool DEFAULT: True

    Source code in src/pydvl/value/stopping.py
    def __init__(\nself,\nn_steps: int,\nrtol: float,\npin_converged: bool = True,\nmodify_result: bool = True,\n):\nsuper().__init__(modify_result=modify_result)\nif n_steps < 1:\nraise ValueError(\"n_steps must be at least 1\")\nif rtol <= 0 or rtol >= 1:\nraise ValueError(\"rtol must be in (0, 1)\")\nself.n_steps = n_steps\nself.rtol = rtol\nself.update_op = np.logical_or if pin_converged else np.logical_and\nself._memory = None  # type: ignore\n
    "},{"location":"api/pydvl/value/stopping/#pydvl.value.stopping.make_criterion","title":"make_criterion(fun, converged=None, completion=None, name=None)","text":"

    Create a new StoppingCriterion from a function. Use this to enable simpler functions to be composed with bitwise operators

    PARAMETER DESCRIPTION fun

    The callable to wrap.

    TYPE: StoppingCriterionCallable

    converged

    A callable that returns a boolean array indicating what values have converged.

    TYPE: Callable[[], NDArray[bool_]] | None DEFAULT: None

    completion

    A callable that returns a value between 0 and 1 indicating the rate of completion of the computation. If not provided, the fraction of converged values is used.

    TYPE: Callable[[], float] | None DEFAULT: None

    name

    The name of the new criterion. If None, the __name__ of the function is used.

    TYPE: str | None DEFAULT: None

    RETURNS DESCRIPTION Type[StoppingCriterion]

    A new subclass of StoppingCriterion.

    Source code in src/pydvl/value/stopping.py
    def make_criterion(\nfun: StoppingCriterionCallable,\nconverged: Callable[[], NDArray[np.bool_]] | None = None,\ncompletion: Callable[[], float] | None = None,\nname: str | None = None,\n) -> Type[StoppingCriterion]:\n\"\"\"Create a new [StoppingCriterion][pydvl.value.stopping.StoppingCriterion] from a function.\n    Use this to enable simpler functions to be composed with bitwise operators\n    Args:\n        fun: The callable to wrap.\n        converged: A callable that returns a boolean array indicating what\n            values have converged.\n        completion: A callable that returns a value between 0 and 1 indicating\n            the rate of completion of the computation. If not provided, the fraction\n            of converged values is used.\n        name: The name of the new criterion. If `None`, the `__name__` of\n            the function is used.\n    Returns:\n        A new subclass of [StoppingCriterion][pydvl.value.stopping.StoppingCriterion].\n    \"\"\"\nclass WrappedCriterion(StoppingCriterion):\ndef __init__(self, modify_result: bool = True):\nsuper().__init__(modify_result=modify_result)\nself._name = name or getattr(fun, \"__name__\", \"WrappedCriterion\")\ndef _check(self, result: ValuationResult) -> Status:\nreturn fun(result)\n@property\ndef converged(self) -> NDArray[np.bool_]:\nif converged is None:\nreturn super().converged\nreturn converged()\n@property\ndef name(self):\nreturn self._name\ndef completion(self) -> float:\nif completion is None:\nreturn super().completion()\nreturn completion()\nreturn WrappedCriterion\n
    "},{"location":"api/pydvl/value/least_core/","title":"Least core","text":"

    New in version 0.4.0

    This package holds all routines for the computation of Least Core data values.

    Please refer to Data valuation for an overview.

    In addition to the standard interface via compute_least_core_values(), because computing the Least Core values requires the solution of a linear and a quadratic problem after computing all the utility values, there is the possibility of performing each step separately. This is useful when running multiple experiments: use lc_prepare_problem() or mclc_prepare_problem() to prepare a list of problems to solve, then solve them in parallel with lc_solve_problems().

    Note that mclc_prepare_problem() is parallelized itself, so preparing the problems should be done in sequence in this case. The solution of the linear systems can then be done in parallel.

    "},{"location":"api/pydvl/value/least_core/#pydvl.value.least_core.LeastCoreMode","title":"LeastCoreMode","text":"

    Bases: Enum

    Available Least Core algorithms.

    "},{"location":"api/pydvl/value/least_core/#pydvl.value.least_core.compute_least_core_values","title":"compute_least_core_values(u, *, n_jobs=1, n_iterations=None, mode=LeastCoreMode.MonteCarlo, non_negative_subsidy=False, solver_options=None, **kwargs)","text":"

    Umbrella method to compute Least Core values with any of the available algorithms.

    See Data valuation for an overview.

    The following algorithms are available. Note that the exact method can only work with very small datasets and is thus intended only for testing.

    • exact: uses the complete powerset of the training set for the constraints combinatorial_exact_shapley().
    • montecarlo: uses the approximate Monte Carlo Least Core algorithm. Implemented in montecarlo_least_core().
    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    n_jobs

    Number of jobs to run in parallel. Only used for Monte Carlo Least Core.

    TYPE: int DEFAULT: 1

    n_iterations

    Number of subsets to sample and evaluate the utility on. Only used for Monte Carlo Least Core.

    TYPE: Optional[int] DEFAULT: None

    mode

    Algorithm to use. See LeastCoreMode for available options.

    TYPE: LeastCoreMode DEFAULT: MonteCarlo

    non_negative_subsidy

    If True, the least core subsidy \\(e\\) is constrained to be non-negative.

    TYPE: bool DEFAULT: False

    solver_options

    Optional dictionary of options passed to the solvers.

    TYPE: Optional[dict] DEFAULT: None

    RETURNS DESCRIPTION ValuationResult

    Object with the computed values.

    New in version 0.5.0

    Source code in src/pydvl/value/least_core/__init__.py
    def compute_least_core_values(\nu: Utility,\n*,\nn_jobs: int = 1,\nn_iterations: Optional[int] = None,\nmode: LeastCoreMode = LeastCoreMode.MonteCarlo,\nnon_negative_subsidy: bool = False,\nsolver_options: Optional[dict] = None,\n**kwargs,\n) -> ValuationResult:\n\"\"\"Umbrella method to compute Least Core values with any of the available\n    algorithms.\n    See [Data valuation][computing-data-values] for an overview.\n    The following algorithms are available. Note that the exact method can only\n    work with very small datasets and is thus intended only for testing.\n    - `exact`: uses the complete powerset of the training set for the constraints\n      [combinatorial_exact_shapley()][pydvl.value.shapley.naive.combinatorial_exact_shapley].\n    - `montecarlo`:  uses the approximate Monte Carlo Least Core algorithm.\n      Implemented in [montecarlo_least_core()][pydvl.value.least_core.montecarlo.montecarlo_least_core].\n    Args:\n        u: Utility object with model, data, and scoring function\n        n_jobs: Number of jobs to run in parallel. Only used for Monte Carlo\n            Least Core.\n        n_iterations: Number of subsets to sample and evaluate the utility on.\n            Only used for Monte Carlo Least Core.\n        mode: Algorithm to use. See\n            [LeastCoreMode][pydvl.value.least_core.LeastCoreMode] for available\n            options.\n        non_negative_subsidy: If True, the least core subsidy $e$ is constrained\n            to be non-negative.\n        solver_options: Optional dictionary of options passed to the solvers.\n    Returns:\n        Object with the computed values.\n    !!! tip \"New in version 0.5.0\"\n    \"\"\"\nprogress: bool = kwargs.pop(\"progress\", False)\n# TODO: remove this before releasing version 0.7.0\nif kwargs:\nwarnings.warn(\nDeprecationWarning(\n\"Passing solver options as kwargs was deprecated in 0.6.0, will \"\n\"be removed in 0.7.0. `Use solver_options` instead.\"\n)\n)\nif solver_options is None:\nsolver_options = kwargs\nelse:\nsolver_options.update(kwargs)\nif mode == LeastCoreMode.MonteCarlo:\n# TODO fix progress showing and maybe_progress in remote case\nprogress = False\nif n_iterations is None:\nraise ValueError(\"n_iterations cannot be None for Monte Carlo Least Core\")\nreturn montecarlo_least_core(\nu=u,\nn_iterations=n_iterations,\nn_jobs=n_jobs,\nprogress=progress,\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n**kwargs,\n)\nelif mode == LeastCoreMode.Exact:\nreturn exact_least_core(\nu=u,\nprogress=progress,\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n)\nraise ValueError(f\"Invalid value encountered in {mode=}\")\n
    "},{"location":"api/pydvl/value/least_core/common/","title":"Common","text":""},{"location":"api/pydvl/value/least_core/common/#pydvl.value.least_core.common.lc_solve_problem","title":"lc_solve_problem(problem, *, u, algorithm, non_negative_subsidy=False, solver_options=None, **options)","text":"

    Solves a linear problem as prepared by mclc_prepare_problem(). Useful for parallel execution of multiple experiments by running this as a remote task.

    See exact_least_core() or montecarlo_least_core() for argument descriptions.

    Source code in src/pydvl/value/least_core/common.py
    def lc_solve_problem(\nproblem: LeastCoreProblem,\n*,\nu: Utility,\nalgorithm: str,\nnon_negative_subsidy: bool = False,\nsolver_options: Optional[dict] = None,\n**options,\n) -> ValuationResult:\n\"\"\"Solves a linear problem as prepared by\n    [mclc_prepare_problem()][pydvl.value.least_core.montecarlo.mclc_prepare_problem].\n    Useful for parallel execution of multiple experiments by running this as a\n    remote task.\n    See [exact_least_core()][pydvl.value.least_core.naive.exact_least_core] or\n    [montecarlo_least_core()][pydvl.value.least_core.montecarlo.montecarlo_least_core] for\n    argument descriptions.\n    \"\"\"\nn = len(u.data)\nif np.any(np.isnan(problem.utility_values)):\nwarnings.warn(\nf\"Calculation returned \"\nf\"{np.sum(np.isnan(problem.utility_values))} NaN \"\nf\"values out of {problem.utility_values.size}\",\nRuntimeWarning,\n)\n# TODO: remove this before releasing version 0.7.0\nif options:\nwarnings.warn(\nDeprecationWarning(\n\"Passing solver options as kwargs was deprecated in \"\n\"0.6.0, will be removed in 0.7.0. `Use solver_options` \"\n\"instead.\"\n)\n)\nif solver_options is None:\nsolver_options = options\nelse:\nsolver_options.update(options)\nif solver_options is None:\nsolver_options = {}\nif \"solver\" not in solver_options:\nsolver_options[\"solver\"] = cp.SCS\nif \"max_iters\" not in solver_options and solver_options[\"solver\"] == cp.SCS:\nsolver_options[\"max_iters\"] = 10000\nlogger.debug(\"Removing possible duplicate values in lower bound array\")\nb_lb = problem.utility_values\nA_lb, unique_indices = np.unique(problem.A_lb, return_index=True, axis=0)\nb_lb = b_lb[unique_indices]\nlogger.debug(\"Building equality constraint\")\nA_eq = np.ones((1, n))\n# We might have already computed the total utility one or more times.\n# This is the index of the row(s) in A_lb with all ones.\ntotal_utility_indices = np.where(A_lb.sum(axis=1) == n)[0]\nif len(total_utility_indices) == 0:\nb_eq = np.array([u(u.data.indices)])\nelse:\nb_eq = b_lb[total_utility_indices]\n# Remove the row(s) corresponding to the total utility\n# from the lower bound constraints\n# because given the equality constraint\n# it is the same as using the constraint e >= 0\n# (i.e. setting non_negative_subsidy = True).\nmask: NDArray[np.bool_] = np.ones_like(b_lb, dtype=bool)\nmask[total_utility_indices] = False\nb_lb = b_lb[mask]\nA_lb = A_lb[mask]\n# Remove the row(s) corresponding to the empty subset\n# because, given u(\u2205) = (which is almost always the case,\n# it is the same as using the constraint e >= 0\n# (i.e. setting non_negative_subsidy = True).\nemptyset_utility_indices = np.where(A_lb.sum(axis=1) == 0)[0]\nif len(emptyset_utility_indices) > 0:\nmask = np.ones_like(b_lb, dtype=bool)\nmask[emptyset_utility_indices] = False\nb_lb = b_lb[mask]\nA_lb = A_lb[mask]\n_, subsidy = _solve_least_core_linear_program(\nA_eq=A_eq,\nb_eq=b_eq,\nA_lb=A_lb,\nb_lb=b_lb,\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n)\nvalues: Optional[NDArray[np.float_]]\nif subsidy is None:\nlogger.debug(\"No values were found\")\nstatus = Status.Failed\nvalues = np.empty(n)\nvalues[:] = np.nan\nsubsidy = np.nan\nelse:\nvalues = _solve_egalitarian_least_core_quadratic_program(\nsubsidy,\nA_eq=A_eq,\nb_eq=b_eq,\nA_lb=A_lb,\nb_lb=b_lb,\nsolver_options=solver_options,\n)\nif values is None:\nlogger.debug(\"No values were found\")\nstatus = Status.Failed\nvalues = np.empty(n)\nvalues[:] = np.nan\nsubsidy = np.nan\nelse:\nstatus = Status.Converged\nreturn ValuationResult(\nalgorithm=algorithm,\nstatus=status,\nvalues=values,\nsubsidy=subsidy,\nstderr=None,\ndata_names=u.data.data_names,\n)\n
    "},{"location":"api/pydvl/value/least_core/common/#pydvl.value.least_core.common.lc_solve_problems","title":"lc_solve_problems(problems, u, algorithm, config=ParallelConfig(), n_jobs=1, non_negative_subsidy=True, solver_options=None, **options)","text":"

    Solves a list of linear problems in parallel.

    PARAMETER DESCRIPTION u

    Utility.

    TYPE: Utility

    problems

    Least Core problems to solve, as returned by mclc_prepare_problem().

    TYPE: Sequence[LeastCoreProblem]

    algorithm

    Name of the valuation algorithm.

    TYPE: str

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    n_jobs

    Number of parallel jobs to run.

    TYPE: int DEFAULT: 1

    non_negative_subsidy

    If True, the least core subsidy \\(e\\) is constrained to be non-negative.

    TYPE: bool DEFAULT: True

    solver_options

    Additional options to pass to the solver.

    TYPE: Optional[dict] DEFAULT: None

    RETURNS DESCRIPTION List[ValuationResult]

    List of solutions.

    Source code in src/pydvl/value/least_core/common.py
    def lc_solve_problems(\nproblems: Sequence[LeastCoreProblem],\nu: Utility,\nalgorithm: str,\nconfig: ParallelConfig = ParallelConfig(),\nn_jobs: int = 1,\nnon_negative_subsidy: bool = True,\nsolver_options: Optional[dict] = None,\n**options,\n) -> List[ValuationResult]:\n\"\"\"Solves a list of linear problems in parallel.\n    Args:\n        u: Utility.\n        problems: Least Core problems to solve, as returned by\n            [mclc_prepare_problem()][pydvl.value.least_core.montecarlo.mclc_prepare_problem].\n        algorithm: Name of the valuation algorithm.\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        n_jobs: Number of parallel jobs to run.\n        non_negative_subsidy: If True, the least core subsidy $e$ is constrained\n            to be non-negative.\n        solver_options: Additional options to pass to the solver.\n    Returns:\n        List of solutions.\n    \"\"\"\ndef _map_func(\nproblems: List[LeastCoreProblem], *args, **kwargs\n) -> List[ValuationResult]:\nreturn [lc_solve_problem(p, *args, **kwargs) for p in problems]\nmap_reduce_job: MapReduceJob[\n\"LeastCoreProblem\", \"List[ValuationResult]\"\n] = MapReduceJob(\ninputs=problems,\nmap_func=_map_func,\nmap_kwargs=dict(\nu=u,\nalgorithm=algorithm,\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n**options,\n),\nreduce_func=lambda x: list(itertools.chain(*x)),\nconfig=config,\nn_jobs=n_jobs,\n)\nsolutions = map_reduce_job()\nreturn solutions\n
    "},{"location":"api/pydvl/value/least_core/montecarlo/","title":"Montecarlo","text":""},{"location":"api/pydvl/value/least_core/montecarlo/#pydvl.value.least_core.montecarlo.montecarlo_least_core","title":"montecarlo_least_core(u, n_iterations, *, n_jobs=1, config=ParallelConfig(), non_negative_subsidy=False, solver_options=None, options=None, progress=False)","text":"

    Computes approximate Least Core values using a Monte Carlo approach.

    \\[ \\begin{array}{lll} \\text{minimize} & \\displaystyle{e} & \\\\ \\text{subject to} & \\displaystyle\\sum_{i\\in N} x_{i} = v(N) & \\\\ & \\displaystyle\\sum_{i\\in S} x_{i} + e \\geq v(S) & , \\forall S \\in \\{S_1, S_2, \\dots, S_m \\overset{\\mathrm{iid}}{\\sim} U(2^N) \\} \\end{array} \\]

    Where:

    • \\(U(2^N)\\) is the uniform distribution over the powerset of \\(N\\).
    • \\(m\\) is the number of subsets that will be sampled and whose utility will be computed and used to compute the data values.
    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    n_iterations

    total number of iterations to use

    TYPE: int

    n_jobs

    number of jobs across which to distribute the computation

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    non_negative_subsidy

    If True, the least core subsidy \\(e\\) is constrained to be non-negative.

    TYPE: bool DEFAULT: False

    solver_options

    Dictionary of options that will be used to select a solver and to configure it. Refer to cvxpy's documentation for all possible options.

    TYPE: Optional[dict] DEFAULT: None

    options

    (Deprecated) Dictionary of solver options. Use solver_options instead.

    TYPE: Optional[dict] DEFAULT: None

    progress

    If True, shows a tqdm progress bar

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the data values and the least core value.

    Source code in src/pydvl/value/least_core/montecarlo.py
    def montecarlo_least_core(\nu: Utility,\nn_iterations: int,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nnon_negative_subsidy: bool = False,\nsolver_options: Optional[dict] = None,\noptions: Optional[dict] = None,\nprogress: bool = False,\n) -> ValuationResult:\nr\"\"\"Computes approximate Least Core values using a Monte Carlo approach.\n    $$\n    \\begin{array}{lll}\n    \\text{minimize} & \\displaystyle{e} & \\\\\n    \\text{subject to} & \\displaystyle\\sum_{i\\in N} x_{i} = v(N) & \\\\\n    & \\displaystyle\\sum_{i\\in S} x_{i} + e \\geq v(S) & ,\n    \\forall S \\in \\{S_1, S_2, \\dots, S_m \\overset{\\mathrm{iid}}{\\sim} U(2^N) \\}\n    \\end{array}\n    $$\n    Where:\n    * $U(2^N)$ is the uniform distribution over the powerset of $N$.\n    * $m$ is the number of subsets that will be sampled and whose utility will\n      be computed and used to compute the data values.\n    Args:\n        u: Utility object with model, data, and scoring function\n        n_iterations: total number of iterations to use\n        n_jobs: number of jobs across which to distribute the computation\n        config: Object configuring parallel computation, with cluster\n            address, number of cpus, etc.\n        non_negative_subsidy: If True, the least core subsidy $e$ is constrained\n            to be non-negative.\n        solver_options: Dictionary of options that will be used to select a solver\n            and to configure it. Refer to [cvxpy's\n            documentation](https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options)\n            for all possible options.\n        options: (Deprecated) Dictionary of solver options. Use solver_options\n            instead.\n        progress: If True, shows a tqdm progress bar\n    Returns:\n        Object with the data values and the least core value.\n    \"\"\"\n# TODO: remove this before releasing version 0.7.0\nif options:\nwarnings.warn(\nDeprecationWarning(\n\"Passing solver options as kwargs was deprecated in \"\n\"0.6.0, will be removed in 0.7.0. `Use solver_options` \"\n\"instead.\"\n)\n)\nif solver_options is None:\nsolver_options = options\nelse:\nsolver_options.update(options)\nproblem = mclc_prepare_problem(\nu, n_iterations, n_jobs=n_jobs, config=config, progress=progress\n)\nreturn lc_solve_problem(\nproblem,\nu=u,\nalgorithm=\"montecarlo_least_core\",\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n)\n
    "},{"location":"api/pydvl/value/least_core/montecarlo/#pydvl.value.least_core.montecarlo.mclc_prepare_problem","title":"mclc_prepare_problem(u, n_iterations, *, n_jobs=1, config=ParallelConfig(), progress=False)","text":"

    Prepares a linear problem by sampling subsets of the data. Use this to separate the problem preparation from the solving with lc_solve_problem(). Useful for parallel execution of multiple experiments.

    See montecarlo_least_core for argument descriptions.

    Source code in src/pydvl/value/least_core/montecarlo.py
    def mclc_prepare_problem(\nu: Utility,\nn_iterations: int,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\n) -> LeastCoreProblem:\n\"\"\"Prepares a linear problem by sampling subsets of the data. Use this to\n    separate the problem preparation from the solving with\n    [lc_solve_problem()][pydvl.value.least_core.common.lc_solve_problem]. Useful\n    for parallel execution of multiple experiments.\n    See\n    [montecarlo_least_core][pydvl.value.least_core.montecarlo.montecarlo_least_core]\n    for argument descriptions.\n    \"\"\"\nn = len(u.data)\nif n_iterations < n:\nwarnings.warn(\nf\"Number of iterations '{n_iterations}' is smaller the size of the dataset '{n}'. \"\nf\"This is not optimal because in the worst case we need at least '{n}' constraints \"\n\"to satisfy the individual rationality condition.\"\n)\nif n_iterations > 2**n:\nwarnings.warn(\nf\"Passed n_iterations is greater than the number subsets! \"\nf\"Setting it to 2^{n}\",\nRuntimeWarning,\n)\nn_iterations = 2**n\niterations_per_job = max(1, n_iterations // effective_n_jobs(n_jobs, config))\nmap_reduce_job: MapReduceJob[\"Utility\", \"LeastCoreProblem\"] = MapReduceJob(\ninputs=u,\nmap_func=_montecarlo_least_core,\nreduce_func=_reduce_func,\nmap_kwargs=dict(n_iterations=iterations_per_job, progress=progress),\nn_jobs=n_jobs,\nconfig=config,\n)\nreturn map_reduce_job()\n
    "},{"location":"api/pydvl/value/least_core/naive/","title":"Naive","text":""},{"location":"api/pydvl/value/least_core/naive/#pydvl.value.least_core.naive.exact_least_core","title":"exact_least_core(u, *, non_negative_subsidy=False, solver_options=None, options=None, progress=True)","text":"

    Computes the exact Least Core values.

    Note

    If the training set contains more than 20 instances a warning is printed because the computation is very expensive. This method is mostly used for internal testing and simple use cases. Please refer to the Monte Carlo method for practical applications.

    The least core is the solution to the following Linear Programming problem:

    \\[ \\begin{array}{lll} \\text{minimize} & \\displaystyle{e} & \\\\ \\text{subject to} & \\displaystyle\\sum_{i\\in N} x_{i} = v(N) & \\\\ & \\displaystyle\\sum_{i\\in S} x_{i} + e \\geq v(S) &, \\forall S \\subseteq N \\\\ \\end{array} \\]

    Where \\(N = \\{1, 2, \\dots, n\\}\\) are the training set's indices.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function non_negative_subsidy: If True, the least core subsidy \\(e\\) is constrained to be non-negative.

    TYPE: Utility

    solver_options

    Dictionary of options that will be used to select a solver and to configure it. Refer to the cvxpy's documentation for all possible options.

    TYPE: Optional[dict] DEFAULT: None

    options

    (Deprecated) Dictionary of solver options. Use solver_options instead.

    TYPE: Optional[dict] DEFAULT: None

    progress

    If True, shows a tqdm progress bar

    TYPE: bool DEFAULT: True

    RETURNS DESCRIPTION ValuationResult

    Object with the data values and the least core value.

    Source code in src/pydvl/value/least_core/naive.py
    def exact_least_core(\nu: Utility,\n*,\nnon_negative_subsidy: bool = False,\nsolver_options: Optional[dict] = None,\noptions: Optional[dict] = None,\nprogress: bool = True,\n) -> ValuationResult:\nr\"\"\"Computes the exact Least Core values.\n    !!! Note\n        If the training set contains more than 20 instances a warning is printed\n        because the computation is very expensive. This method is mostly used for\n        internal testing and simple use cases. Please refer to the\n        [Monte Carlo method][pydvl.value.least_core.montecarlo.montecarlo_least_core]\n        for practical applications.\n    The least core is the solution to the following Linear Programming problem:\n    $$\n    \\begin{array}{lll}\n    \\text{minimize} & \\displaystyle{e} & \\\\\n    \\text{subject to} & \\displaystyle\\sum_{i\\in N} x_{i} = v(N) & \\\\\n    & \\displaystyle\\sum_{i\\in S} x_{i} + e \\geq v(S) &, \\forall S \\subseteq N \\\\\n    \\end{array}\n    $$\n    Where $N = \\{1, 2, \\dots, n\\}$ are the training set's indices.\n    Args:\n        u: Utility object with model, data, and scoring function\n            non_negative_subsidy: If True, the least core subsidy $e$ is constrained\n            to be non-negative.\n        solver_options: Dictionary of options that will be used to select a solver\n            and to configure it. Refer to the [cvxpy's\n            documentation](https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options)\n            for all possible options.\n        options: (Deprecated) Dictionary of solver options. Use `solver_options`\n            instead.\n        progress: If True, shows a tqdm progress bar\n    Returns:\n        Object with the data values and the least core value.\n    \"\"\"\nn = len(u.data)\nif n > 20:  # Arbitrary choice, will depend on time required, caching, etc.\nwarnings.warn(f\"Large dataset! Computation requires 2^{n} calls to model.fit()\")\n# TODO: remove this before releasing version 0.7.0\nif options:\nwarnings.warn(\nDeprecationWarning(\n\"Passing solver options as kwargs was deprecated in \"\n\"0.6.0, will \"\n\"be removed in 0.7.0. `Use solver_options` instead.\"\n)\n)\nif solver_options is None:\nsolver_options = options\nelse:\nsolver_options.update(options)\nproblem = lc_prepare_problem(u, progress=progress)\nreturn lc_solve_problem(\nproblem=problem,\nu=u,\nalgorithm=\"exact_least_core\",\nnon_negative_subsidy=non_negative_subsidy,\nsolver_options=solver_options,\n)\n
    "},{"location":"api/pydvl/value/least_core/naive/#pydvl.value.least_core.naive.lc_prepare_problem","title":"lc_prepare_problem(u, progress=False)","text":"

    Prepares a linear problem with all subsets of the data Use this to separate the problem preparation from the solving with lc_solve_problem(). Useful for parallel execution of multiple experiments.

    See exact_least_core() for argument descriptions.

    Source code in src/pydvl/value/least_core/naive.py
    def lc_prepare_problem(u: Utility, progress: bool = False) -> LeastCoreProblem:\n\"\"\"Prepares a linear problem with all subsets of the data\n    Use this to separate the problem preparation from the solving with\n    [lc_solve_problem()][pydvl.value.least_core.common.lc_solve_problem]. Useful for\n    parallel execution of multiple experiments.\n    See [exact_least_core()][pydvl.value.least_core.naive.exact_least_core] for argument\n    descriptions.\n    \"\"\"\nn = len(u.data)\nlogger.debug(\"Building vectors and matrices for linear programming problem\")\npowerset_size = 2**n\nA_lb = np.zeros((powerset_size, n))\nlogger.debug(\"Iterating over all subsets\")\nutility_values = np.zeros(powerset_size)\nfor i, subset in enumerate(\nmaybe_progress(\npowerset(u.data.indices), progress, total=powerset_size - 1, position=0\n)\n):\nindices: NDArray[np.bool_] = np.zeros(n, dtype=bool)\nindices[list(subset)] = True\nA_lb[i, indices] = 1\nutility_values[i] = u(subset)\nreturn LeastCoreProblem(utility_values, A_lb)\n
    "},{"location":"api/pydvl/value/loo/","title":"Loo","text":""},{"location":"api/pydvl/value/loo/loo/","title":"Loo","text":""},{"location":"api/pydvl/value/loo/loo/#pydvl.value.loo.loo.compute_loo","title":"compute_loo(u, *, n_jobs=1, config=ParallelConfig(), progress=True)","text":"

    Computes leave one out value:

    \\[v(i) = u(D) - u(D \\setminus \\{i\\}) \\] PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    progress

    If True, display a progress bar

    TYPE: bool DEFAULT: True

    n_jobs

    Number of parallel jobs to use

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    If True, display a progress bar

    TYPE: bool DEFAULT: True

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    New in version 0.7.0

    Renamed from naive_loo and added parallel computation.

    Source code in src/pydvl/value/loo/loo.py
    def compute_loo(\nu: Utility,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = True,\n) -> ValuationResult:\nr\"\"\"Computes leave one out value:\n    $$v(i) = u(D) - u(D \\setminus \\{i\\}) $$\n    Args:\n        u: Utility object with model, data, and scoring function\n        progress: If True, display a progress bar\n        n_jobs: Number of parallel jobs to use\n        config: Object configuring parallel computation, with cluster\n            address, number of cpus, etc.\n        progress: If True, display a progress bar\n    Returns:\n        Object with the data values.\n    !!! tip \"New in version 0.7.0\"\n        Renamed from `naive_loo` and added parallel computation.\n    \"\"\"\nif len(u.data) < 3:\nraise ValueError(\"Dataset must have at least 2 elements\")\nresult = ValuationResult.zeros(\nalgorithm=\"loo\",\nindices=u.data.indices,\ndata_names=u.data.data_names,\n)\nall_indices = set(u.data.indices)\ntotal_utility = u(u.data.indices)\ndef fun(idx: int) -> tuple[int, float]:\nreturn idx, total_utility - u(all_indices.difference({idx}))\nmax_workers = effective_n_jobs(n_jobs, config)\nn_submitted_jobs = 2 * max_workers  # number of jobs in the queue\n# NOTE: this could be done with a simple executor.map(), but we want to\n# display a progress bar\nwith init_executor(\nmax_workers=max_workers, config=config, cancel_futures=True\n) as executor:\npending: set[Future] = set()\nindex_it = iter(u.data.indices)\npbar = tqdm(disable=not progress, total=100, unit=\"%\")\nwhile True:\npbar.n = 100 * sum(result.counts) / len(u.data)\npbar.refresh()\ncompleted, pending = wait(pending, timeout=0.1, return_when=FIRST_COMPLETED)\nfor future in completed:\nidx, marginal = future.result()\nresult.update(idx, marginal)\n# Ensure that we always have n_submitted_jobs running\ntry:\nfor _ in range(n_submitted_jobs - len(pending)):\npending.add(executor.submit(fun, next(index_it)))\nexcept StopIteration:\nif len(pending) == 0:\nreturn result\n
    "},{"location":"api/pydvl/value/loo/naive/","title":"Naive","text":""},{"location":"api/pydvl/value/loo/naive/#pydvl.value.loo.naive.naive_loo","title":"naive_loo(u, *, progress=True, **kwargs)","text":"

    Deprecated. Use compute_loo instead.

    Source code in src/pydvl/value/loo/naive.py
    @deprecated(\ntarget=compute_loo,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.8.0\",\nargs_extra=dict(n_jobs=1),\n)\ndef naive_loo(u: Utility, *, progress: bool = True, **kwargs) -> ValuationResult:\n\"\"\"Deprecated. Use [compute_loo][pydvl.value.loo.compute_loo] instead.\"\"\"\npass  # type: ignore\n
    "},{"location":"api/pydvl/value/oob/","title":"Oob","text":""},{"location":"api/pydvl/value/oob/oob/","title":"Oob","text":""},{"location":"api/pydvl/value/oob/oob/#pydvl.value.oob.oob--references","title":"References","text":"
    1. Kwon et al. Data-OOB: Out-of-bag Estimate as a Simple and Efficient Data Value. In: Published at ICML 2023\u00a0\u21a9

    "},{"location":"api/pydvl/value/oob/oob/#pydvl.value.oob.oob.compute_data_oob","title":"compute_data_oob(u, *, n_est=10, max_samples=0.8, loss=None, n_jobs=None, seed=None, progress=False)","text":"

    Computes Data out of bag values

    This implements the method described in (Kwon and Zou, 2023)1. It fits several base estimators provided through u.model through a bagging process. The point value corresponds to the average loss of estimators which were not fit on it.

    \\(w_{bj}\\in Z\\) is the number of times the j-th datum \\((x_j, y_j)\\) is selected in the b-th bootstrap dataset.

    \\[\\psi((x_i,y_i),\\Theta_B):=\\frac{\\sum_{b=1}^{B}\\mathbb{1}(w_{bi}=0)T(y_i, \\hat{f}_b(x_i))}{\\sum_{b=1}^{B} \\mathbb{1} (w_{bi}=0)}\\]

    With:

    \\[ T: Y \\times Y \\rightarrow \\mathbb{R} \\]

    T is a score function that represents the goodness of a weak learner \\(\\hat{f}_b\\) at the i-th datum \\((x_i, y_i)\\).

    n_est and max_samples must be tuned jointly to ensure that all samples are at least 1 time out-of-bag, otherwise the result could include a NaN value for that datum.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    n_est

    Number of estimator used in the bagging procedure.

    TYPE: int DEFAULT: 10

    max_samples

    The fraction of samples to draw to train each base estimator.

    TYPE: float DEFAULT: 0.8

    loss

    A function taking as parameters model prediction and corresponding data labels(preds, y) and returning an array of point-wise errors.

    TYPE: Callable DEFAULT: None

    n_jobs

    The number of jobs to run in parallel used in the bagging procedure for both fit and predict.

    TYPE: int DEFAULT: None

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    progress

    If True, display a progress bar.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    Source code in src/pydvl/value/oob/oob.py
    def compute_data_oob(\nu: Utility,\n*,\nn_est: int = 10,\nmax_samples: float = 0.8,\nloss: Callable = None,\nn_jobs: int = None,\nseed: Optional[Seed] = None,\nprogress: bool = False,\n) -> ValuationResult:\nr\"\"\"Computes Data out of bag values\n    This implements the method described in\n    (Kwon and Zou, 2023)<sup><a href=\"kwon_data_2023\">1</a></sup>.\n    It fits several base estimators provided through u.model through a bagging\n    process. The point value corresponds to the average loss of estimators which\n    were not fit on it.\n    $w_{bj}\\in Z$ is the number of times the j-th datum $(x_j, y_j)$ is selected\n    in the b-th bootstrap dataset.\n    $$\\psi((x_i,y_i),\\Theta_B):=\\frac{\\sum_{b=1}^{B}\\mathbb{1}(w_{bi}=0)T(y_i,\n    \\hat{f}_b(x_i))}{\\sum_{b=1}^{B}\n    \\mathbb{1}\n    (w_{bi}=0)}$$\n    With:\n    $$\n    T: Y \\times Y\n    \\rightarrow \\mathbb{R}\n    $$\n    T is a score function that represents the goodness of a weak learner\n    $\\hat{f}_b$ at the i-th datum $(x_i, y_i)$.\n    `n_est` and `max_samples` must be tuned jointly to ensure that all samples\n    are at least 1 time out-of-bag, otherwise the result could include a NaN\n    value for that datum.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        n_est: Number of estimator used in the bagging procedure.\n        max_samples: The fraction of samples to draw to train each base\n            estimator.\n        loss: A function taking as parameters model prediction and corresponding\n            data labels(preds, y) and returning an array of point-wise errors.\n        n_jobs: The number of jobs to run in parallel used in the bagging\n            procedure for both fit and predict.\n        seed: Either an instance of a numpy random number generator or a seed\n            for it.\n        progress: If True, display a progress bar.\n    Returns:\n        Object with the data values.\n    \"\"\"\nrng = np.random.default_rng(seed)\nrandom_state = np.random.RandomState(rng.bit_generator)\nresult: ValuationResult[np.int_, np.object_] = ValuationResult.empty(\nalgorithm=\"data_oob\", indices=u.data.indices, data_names=u.data.data_names\n)\nif is_classifier(u.model):\nbag = BaggingClassifier(\nu.model,\nn_estimators=n_est,\nmax_samples=max_samples,\nn_jobs=n_jobs,\nrandom_state=random_state,\n)\nif loss is None:\nloss = point_wise_accuracy\nelif is_regressor(u.model):\nbag = BaggingRegressor(\nu.model,\nn_estimators=n_est,\nmax_samples=max_samples,\nn_jobs=n_jobs,\nrandom_state=random_state,\n)\nif loss is None:\nloss = neg_l2_distance\nelse:\nraise Exception(\n\"Model has to be a classifier or a regressor in sklearn format.\"\n)\nbag.fit(u.data.x_train, u.data.y_train)\nfor est, samples in maybe_progress(\nzip(bag.estimators_, bag.estimators_samples_), progress, total=n_est\n):  # The bottleneck is the bag fitting not this part so TQDM is not very useful here\noob_idx = np.setxor1d(u.data.indices, np.unique(samples))\narray_loss = loss(\ny_true=u.data.y_train[oob_idx], y_pred=est.predict(u.data.x_train[oob_idx])\n)\nresult += ValuationResult(\nalgorithm=\"data_oob\",\nindices=oob_idx,\nvalues=array_loss,\ncounts=np.ones_like(array_loss, dtype=u.data.indices.dtype),\n)\nreturn result\n
    "},{"location":"api/pydvl/value/oob/oob/#pydvl.value.oob.oob.point_wise_accuracy","title":"point_wise_accuracy(y_true, y_pred)","text":"

    Point-wise 0-1 loss between two arrays

    PARAMETER DESCRIPTION y_true

    Array of true values (e.g. labels)

    TYPE: NDArray[T]

    y_pred

    Array of estimated values (e.g. model predictions)

    TYPE: NDArray[T]

    RETURNS DESCRIPTION NDArray[T]

    Array with point-wise 0-1 losses between labels and model predictions

    Source code in src/pydvl/value/oob/oob.py
    def point_wise_accuracy(y_true: NDArray[T], y_pred: NDArray[T]) -> NDArray[T]:\nr\"\"\"Point-wise 0-1 loss between two arrays\n    Args:\n        y_true: Array of true values (e.g. labels)\n        y_pred: Array of estimated values (e.g. model predictions)\n    Returns:\n        Array with point-wise 0-1 losses between labels and model predictions\n    \"\"\"\nreturn np.array(y_pred == y_true, dtype=y_pred.dtype)\n
    "},{"location":"api/pydvl/value/oob/oob/#pydvl.value.oob.oob.neg_l2_distance","title":"neg_l2_distance(y_true, y_pred)","text":"

    Point-wise negative \\(l_2\\) distance between two arrays

    PARAMETER DESCRIPTION y_true

    Array of true values (e.g. labels)

    TYPE: NDArray[T]

    y_pred

    Array of estimated values (e.g. model predictions)

    TYPE: NDArray[T]

    RETURNS DESCRIPTION NDArray[T]

    Array with point-wise negative \\(l_2\\) distances between labels and model

    NDArray[T]

    predictions

    Source code in src/pydvl/value/oob/oob.py
    def neg_l2_distance(y_true: NDArray[T], y_pred: NDArray[T]) -> NDArray[T]:\nr\"\"\"Point-wise negative $l_2$ distance between two arrays\n    Args:\n        y_true: Array of true values (e.g. labels)\n        y_pred: Array of estimated values (e.g. model predictions)\n    Returns:\n        Array with point-wise negative $l_2$ distances between labels and model\n        predictions\n    \"\"\"\nreturn -np.square(np.array(y_pred - y_true), dtype=y_pred.dtype)\n
    "},{"location":"api/pydvl/value/shapley/","title":"Shapley","text":"

    This package holds all routines for the computation of Shapley Data value. Users will want to use compute_shapley_values or compute_semivalues as interfaces to most methods defined in the modules.

    Please refer to the guide on data valuation for an overview of all methods.

    "},{"location":"api/pydvl/value/shapley/common/","title":"Common","text":""},{"location":"api/pydvl/value/shapley/common/#pydvl.value.shapley.common.compute_shapley_values","title":"compute_shapley_values(u, *, done=MaxUpdates(100), mode=ShapleyMode.TruncatedMontecarlo, n_jobs=1, seed=None, **kwargs)","text":"

    Umbrella method to compute Shapley values with any of the available algorithms.

    See [[data-valuation]] for an overview.

    The following algorithms are available. Note that the exact methods can only work with very small datasets and are thus intended only for testing. Some algorithms also accept additional arguments, please refer to the documentation of each particular method.

    • combinatorial_exact: uses the combinatorial implementation of data Shapley. Implemented in combinatorial_exact_shapley().
    • combinatorial_montecarlo: uses the approximate Monte Carlo implementation of combinatorial data Shapley. Implemented in combinatorial_montecarlo_shapley().
    • permutation_exact: uses the permutation-based implementation of data Shapley. Computation is not parallelized. Implemented in permutation_exact_shapley().
    • permutation_montecarlo: uses the approximate Monte Carlo implementation of permutation data Shapley. Accepts a TruncationPolicy to stop computing marginals. Implemented in permutation_montecarlo_shapley().
    • owen_sampling: Uses the Owen continuous extension of the utility function to the unit cube. Implemented in owen_sampling_shapley(). This method does not take a StoppingCriterion but instead requires a parameter q_max for the number of subdivisions of the unit interval to use for integration, and another parameter n_samples for the number of subsets to sample for each \\(q\\).
    • owen_halved: Same as 'owen_sampling' but uses correlated samples in the expectation. Implemented in owen_sampling_shapley(). This method requires an additional parameter q_max for the number of subdivisions of the interval [0,0.5] to use for integration, and another parameter n_samples for the number of subsets to sample for each \\(q\\).
    • group_testing: estimates differences of Shapley values and solves a constraint satisfaction problem. High sample complexity, not recommended. Implemented in group_testing_shapley(). This method does not take a StoppingCriterion but instead requires a parameter n_samples for the number of iterations to run.

    Additionally, one can use model-specific methods:

    • knn: Exact method for K-Nearest neighbour models. Implemented in knn_shapley().
    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    done

    Object used to determine when to stop the computation for Monte Carlo methods. The default is to stop after 100 iterations. See the available criteria in stopping. It is possible to combine several of them using boolean operators. Some methods ignore this argument, others require specific subtypes.

    TYPE: StoppingCriterion DEFAULT: MaxUpdates(100)

    n_jobs

    Number of parallel jobs (available only to some methods)

    TYPE: int DEFAULT: 1

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    mode

    Choose which shapley algorithm to use. See ShapleyMode for a list of allowed value.

    TYPE: ShapleyMode DEFAULT: TruncatedMontecarlo

    RETURNS DESCRIPTION ValuationResult

    Object with the results.

    Source code in src/pydvl/value/shapley/common.py
    def compute_shapley_values(\nu: Utility,\n*,\ndone: StoppingCriterion = MaxUpdates(100),\nmode: ShapleyMode = ShapleyMode.TruncatedMontecarlo,\nn_jobs: int = 1,\nseed: Optional[Seed] = None,\n**kwargs,\n) -> ValuationResult:\n\"\"\"Umbrella method to compute Shapley values with any of the available\n    algorithms.\n    See [[data-valuation]] for an overview.\n    The following algorithms are available. Note that the exact methods can only\n    work with very small datasets and are thus intended only for testing. Some\n    algorithms also accept additional arguments, please refer to the\n    documentation of each particular method.\n    - `combinatorial_exact`: uses the combinatorial implementation of data\n      Shapley. Implemented in\n      [combinatorial_exact_shapley()][pydvl.value.shapley.naive.combinatorial_exact_shapley].\n    - `combinatorial_montecarlo`:  uses the approximate Monte Carlo\n      implementation of combinatorial data Shapley. Implemented in\n      [combinatorial_montecarlo_shapley()][pydvl.value.shapley.montecarlo.combinatorial_montecarlo_shapley].\n    - `permutation_exact`: uses the permutation-based implementation of data\n      Shapley. Computation is **not parallelized**. Implemented in\n      [permutation_exact_shapley()][pydvl.value.shapley.naive.permutation_exact_shapley].\n    - `permutation_montecarlo`: uses the approximate Monte Carlo\n      implementation of permutation data Shapley. Accepts a\n      [TruncationPolicy][pydvl.value.shapley.truncated.TruncationPolicy] to stop\n      computing marginals. Implemented in\n      [permutation_montecarlo_shapley()][pydvl.value.shapley.montecarlo.permutation_montecarlo_shapley].\n    - `owen_sampling`: Uses the Owen continuous extension of the utility\n      function to the unit cube. Implemented in\n      [owen_sampling_shapley()][pydvl.value.shapley.owen.owen_sampling_shapley]. This\n      method does not take a [StoppingCriterion][pydvl.value.stopping.StoppingCriterion]\n      but instead requires a parameter `q_max` for the number of subdivisions\n      of the unit interval to use for integration, and another parameter\n      `n_samples` for the number of subsets to sample for each $q$.\n    - `owen_halved`: Same as 'owen_sampling' but uses correlated samples in the\n      expectation. Implemented in\n      [owen_sampling_shapley()][pydvl.value.shapley.owen.owen_sampling_shapley].\n      This method  requires an additional parameter `q_max` for the number of\n      subdivisions of the interval [0,0.5] to use for integration, and another\n      parameter `n_samples` for the number of subsets to sample for each $q$.\n    - `group_testing`: estimates differences of Shapley values and solves a\n      constraint satisfaction problem. High sample complexity, not recommended.\n      Implemented in [group_testing_shapley()][pydvl.value.shapley.gt.group_testing_shapley]. This\n      method does not take a [StoppingCriterion][pydvl.value.stopping.StoppingCriterion]\n      but instead requires a parameter `n_samples` for the number of\n      iterations to run.\n    Additionally, one can use model-specific methods:\n    - `knn`: Exact method for K-Nearest neighbour models. Implemented in\n      [knn_shapley()][pydvl.value.shapley.knn.knn_shapley].\n    Args:\n        u: [Utility][pydvl.utils.utility.Utility] object with model, data, and\n            scoring function.\n        done: Object used to determine when to stop the computation for Monte\n            Carlo methods. The default is to stop after 100 iterations. See the\n            available criteria in [stopping][pydvl.value.stopping]. It is\n            possible to combine several of them using boolean operators. Some\n            methods ignore this argument, others require specific subtypes.\n        n_jobs: Number of parallel jobs (available only to some methods)\n        seed: Either an instance of a numpy random number generator or a seed\n            for it.\n        mode: Choose which shapley algorithm to use. See\n            [ShapleyMode][pydvl.value.shapley.ShapleyMode] for a list of allowed\n            value.\n    Returns:\n        Object with the results.\n    \"\"\"\nprogress: bool = kwargs.pop(\"progress\", False)\nif mode not in list(ShapleyMode):\nraise ValueError(f\"Invalid value encountered in {mode=}\")\nif mode in (\nShapleyMode.PermutationMontecarlo,\nShapleyMode.ApproShapley,\nShapleyMode.TruncatedMontecarlo,\n):\ntruncation = kwargs.pop(\"truncation\", NoTruncation())\nreturn permutation_montecarlo_shapley(  # type: ignore\nu=u, done=done, truncation=truncation, n_jobs=n_jobs, seed=seed, **kwargs\n)\nelif mode == ShapleyMode.CombinatorialMontecarlo:\nreturn combinatorial_montecarlo_shapley(\nu, done=done, n_jobs=n_jobs, seed=seed, progress=progress\n)\nelif mode == ShapleyMode.CombinatorialExact:\nreturn combinatorial_exact_shapley(u, n_jobs=n_jobs, progress=progress)\nelif mode == ShapleyMode.PermutationExact:\nreturn permutation_exact_shapley(u, progress=progress)\nelif mode == ShapleyMode.Owen or mode == ShapleyMode.OwenAntithetic:\nif kwargs.get(\"n_samples\") is None:\nraise ValueError(\"n_samples cannot be None for Owen methods\")\nif kwargs.get(\"max_q\") is None:\nraise ValueError(\"Owen Sampling requires max_q for the outer integral\")\nmethod = (\nOwenAlgorithm.Standard\nif mode == ShapleyMode.Owen\nelse OwenAlgorithm.Antithetic\n)\nreturn owen_sampling_shapley(\nu,\nn_samples=int(kwargs.get(\"n_samples\", -1)),\nmax_q=int(kwargs.get(\"max_q\", -1)),\nmethod=method,\nn_jobs=n_jobs,\nseed=seed,\n)\nelif mode == ShapleyMode.KNN:\nreturn knn_shapley(u, progress=progress)\nelif mode == ShapleyMode.GroupTesting:\nn_samples = kwargs.pop(\"n_samples\")\nif n_samples is None:\nraise ValueError(\"n_samples cannot be None for Group Testing\")\nepsilon = kwargs.pop(\"epsilon\")\nif epsilon is None:\nraise ValueError(\"Group Testing requires error bound epsilon\")\ndelta = kwargs.pop(\"delta\", 0.05)\nreturn group_testing_shapley(\nu,\nepsilon=float(epsilon),\ndelta=delta,\nn_samples=int(n_samples),\nn_jobs=n_jobs,\nprogress=progress,\nseed=seed,\n**kwargs,\n)\nelse:\nraise ValueError(f\"Invalid value encountered in {mode=}\")\n
    "},{"location":"api/pydvl/value/shapley/gt/","title":"Gt","text":"

    This module implements Group Testing for the approximation of Shapley values, as introduced in (Jia, R. et al., 2019)1. The sampling of index subsets is done in such a way that an approximation to the true Shapley values can be computed with guarantees.

    Warning

    This method is very inefficient. Potential improvements to the implementation notwithstanding, convergence seems to be very slow (in terms of evaluations of the utility required). We recommend other Monte Carlo methods instead.

    You can read more in the documentation.

    New in version 0.4.0

    "},{"location":"api/pydvl/value/shapley/gt/#pydvl.value.shapley.gt--references","title":"References","text":"
    1. Jia, R. et al., 2019. Towards Efficient Data Valuation Based on the Shapley Value. In: Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics, pp. 1167\u20131176. PMLR.\u00a0\u21a9

    "},{"location":"api/pydvl/value/shapley/gt/#pydvl.value.shapley.gt.num_samples_eps_delta","title":"num_samples_eps_delta(eps, delta, n, utility_range)","text":"

    Implements the formula in Theorem 3 of (Jia, R. et al., 2019)1 which gives a lower bound on the number of samples required to obtain an (\u03b5/\u221an,\u03b4/(N(N-1))-approximation to all pair-wise differences of Shapley values, wrt. \\(\\ell_2\\) norm.

    PARAMETER DESCRIPTION eps

    \u03b5

    TYPE: float

    delta

    \u03b4

    TYPE: float

    n

    Number of data points

    TYPE: int

    utility_range

    Range of the Utility function

    TYPE: float

    Returns: Number of samples from \\(2^{[n]}\\) guaranteeing \u03b5/\u221an-correct Shapley pair-wise differences of values with probability 1-\u03b4/(N(N-1)).

    New in version 0.4.0

    Source code in src/pydvl/value/shapley/gt.py
    def num_samples_eps_delta(\neps: float, delta: float, n: int, utility_range: float\n) -> int:\nr\"\"\"Implements the formula in Theorem 3 of (Jia, R. et al., 2019)<sup><a href=\"#jia_efficient_2019\">1</a></sup>\n    which gives a lower bound on the number of samples required to obtain an\n    (\u03b5/\u221an,\u03b4/(N(N-1))-approximation to all pair-wise differences of Shapley\n    values, wrt. $\\ell_2$ norm.\n    Args:\n        eps: \u03b5\n        delta: \u03b4\n        n: Number of data points\n        utility_range: Range of the [Utility][pydvl.utils.utility.Utility] function\n    Returns:\n        Number of samples from $2^{[n]}$ guaranteeing \u03b5/\u221an-correct Shapley\n            pair-wise differences of values with probability 1-\u03b4/(N(N-1)).\n    !!! tip \"New in version 0.4.0\"\n    \"\"\"\nconstants = _constants(n=n, epsilon=eps, delta=delta, utility_range=utility_range)\nreturn int(constants.T)\n
    "},{"location":"api/pydvl/value/shapley/gt/#pydvl.value.shapley.gt.group_testing_shapley","title":"group_testing_shapley(u, n_samples, epsilon, delta, *, n_jobs=1, config=ParallelConfig(), progress=False, seed=None, **options)","text":"

    Implements group testing for approximation of Shapley values as described in (Jia, R. et al., 2019)1.

    Warning

    This method is very inefficient. It requires several orders of magnitude more evaluations of the utility than others in montecarlo. It also uses several intermediate objects like the results from the runners and the constraint matrices which can become rather large.

    By picking a specific distribution over subsets, the differences in Shapley values can be approximated with a Monte Carlo sum. These are then used to solve for the individual values in a feasibility problem.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    n_samples

    Number of tests to perform. Use num_samples_eps_delta to estimate this.

    TYPE: int

    epsilon

    From the (\u03b5,\u03b4) sample bound. Use the same as for the estimation of n_iterations.

    TYPE: float

    delta

    From the (\u03b5,\u03b4) sample bound. Use the same as for the estimation of n_iterations.

    TYPE: float

    n_jobs

    Number of parallel jobs to use. Each worker performs a chunk of all tests (i.e. utility evaluations).

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display progress bars for each job.

    TYPE: bool DEFAULT: False

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    options

    Additional options to pass to cvxpy.Problem.solve(). E.g. to change the solver (which defaults to cvxpy.SCS) pass solver=cvxpy.CVXOPT.

    DEFAULT: {}

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    New in version 0.4.0

    Changed in version 0.5.0

    Changed the solver to cvxpy instead of scipy's linprog. Added the ability to pass arbitrary options to it.

    Source code in src/pydvl/value/shapley/gt.py
    def group_testing_shapley(\nu: Utility,\nn_samples: int,\nepsilon: float,\ndelta: float,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None,\n**options,\n) -> ValuationResult:\n\"\"\"Implements group testing for approximation of Shapley values as described\n    in (Jia, R. et al., 2019)<sup><a href=\"#jia_efficient_2019\">1</a></sup>.\n    !!! Warning\n        This method is very inefficient. It requires several orders of magnitude\n        more evaluations of the utility than others in\n        [montecarlo][pydvl.value.shapley.montecarlo]. It also uses several intermediate\n        objects like the results from the runners and the constraint matrices\n        which can become rather large.\n    By picking a specific distribution over subsets, the differences in Shapley\n    values can be approximated with a Monte Carlo sum. These are then used to\n    solve for the individual values in a feasibility problem.\n    Args:\n        u: Utility object with model, data, and scoring function\n        n_samples: Number of tests to perform. Use\n            [num_samples_eps_delta][pydvl.value.shapley.gt.num_samples_eps_delta]\n            to estimate this.\n        epsilon: From the (\u03b5,\u03b4) sample bound. Use the same as for the\n            estimation of `n_iterations`.\n        delta: From the (\u03b5,\u03b4) sample bound. Use the same as for the\n            estimation of `n_iterations`.\n        n_jobs: Number of parallel jobs to use. Each worker performs a chunk\n            of all tests (i.e. utility evaluations).\n        config: Object configuring parallel computation, with cluster\n            address, number of cpus, etc.\n        progress: Whether to display progress bars for each job.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n        options: Additional options to pass to\n            [cvxpy.Problem.solve()](https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options).\n            E.g. to change the solver (which defaults to `cvxpy.SCS`) pass\n            `solver=cvxpy.CVXOPT`.\n    Returns:\n        Object with the data values.\n    !!! tip \"New in version 0.4.0\"\n    !!! tip \"Changed in version 0.5.0\"\n        Changed the solver to cvxpy instead of scipy's linprog. Added the ability\n        to pass arbitrary options to it.\n    \"\"\"\nn = len(u.data.indices)\nconst = _constants(\nn=n,\nepsilon=epsilon,\ndelta=delta,\nutility_range=u.score_range.max() - u.score_range.min(),\n)\nT = n_samples\nif T < const.T:\nlog.warning(\nf\"n_samples of {T} are below the required {const.T} for the \"\nf\"\u03b5={epsilon:.02f} guarantee at \u03b4={1 - delta:.02f} probability\"\n)\nsamples_per_job = max(1, n_samples // effective_n_jobs(n_jobs, config))\ndef reducer(\nresults_it: Iterable[Tuple[NDArray, NDArray]]\n) -> Tuple[NDArray, NDArray]:\nreturn np.concatenate(list(x[0] for x in results_it)).astype(\nnp.float_\n), np.concatenate(list(x[1] for x in results_it)).astype(np.int_)\nseed_sequence = ensure_seed_sequence(seed)\nmap_reduce_seed_sequence, cvxpy_seed = tuple(seed_sequence.spawn(2))\nmap_reduce_job: MapReduceJob[Utility, Tuple[NDArray, NDArray]] = MapReduceJob(\nu,\nmap_func=_group_testing_shapley,\nreduce_func=reducer,\nmap_kwargs=dict(n_samples=samples_per_job, progress=progress),\nconfig=config,\nn_jobs=n_jobs,\n)\nuu, betas = map_reduce_job(seed=map_reduce_seed_sequence)\n# Matrix of estimated differences. See Eqs. (3) and (4) in the paper.\nC = np.zeros(shape=(n, n))\nfor i in range(n):\nfor j in range(i + 1, n):\nC[i, j] = np.dot(uu, betas[:, i] - betas[:, j])\nC *= const.Z / T\ntotal_utility = u(u.data.indices)\n###########################################################################\n# Solution of the constraint problem with cvxpy\nv = cp.Variable(n)\nconstraints = [cp.sum(v) == total_utility]\nfor i in range(n):\nfor j in range(i + 1, n):\nconstraints.append(v[i] - v[j] <= epsilon + C[i, j])\nconstraints.append(v[j] - v[i] <= epsilon - C[i, j])\nproblem = cp.Problem(cp.Minimize(0), constraints)\nsolver = options.pop(\"solver\", cp.SCS)\nproblem.solve(solver=solver, **options)\nif problem.status != \"optimal\":\nlog.warning(f\"cvxpy returned status {problem.status}\")\nvalues = (\nnp.nan * np.ones_like(u.data.indices)\nif not hasattr(v.value, \"__len__\")\nelse v.value\n)\nstatus = Status.Failed\nelse:\nvalues = v.value\nstatus = Status.Converged\nreturn ValuationResult(\nalgorithm=\"group_testing_shapley\",\nstatus=status,\nvalues=values,\ndata_names=u.data.data_names,\nsolver_status=problem.status,\n)\n
    "},{"location":"api/pydvl/value/shapley/knn/","title":"Knn","text":"

    This module contains Shapley computations for K-Nearest Neighbours.

    Todo

    Implement approximate KNN computation for sublinear complexity

    "},{"location":"api/pydvl/value/shapley/knn/#pydvl.value.shapley.knn--references","title":"References","text":"
    1. Jia, R. et al., 2019. Efficient Task-Specific Data Valuation for Nearest Neighbor Algorithms. In: Proceedings of the VLDB Endowment, Vol. 12, No. 11, pp. 1610\u20131623.\u00a0\u21a9

    "},{"location":"api/pydvl/value/shapley/knn/#pydvl.value.shapley.knn.knn_shapley","title":"knn_shapley(u, *, progress=True)","text":"

    Computes exact Shapley values for a KNN classifier.

    This implements the method described in (Jia, R. et al., 2019)1. It exploits the local structure of K-Nearest Neighbours to reduce the number of calls to the utility function to a constant number per index, thus reducing computation time to \\(O(n)\\).

    PARAMETER DESCRIPTION u

    Utility with a KNN model to extract parameters from. The object will not be modified nor used other than to call get_params()

    TYPE: Utility

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: True

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    RAISES DESCRIPTION TypeError

    If the model in the utility is not a sklearn.neighbors.KNeighborsClassifier.

    New in version 0.1.0

    Source code in src/pydvl/value/shapley/knn.py
    def knn_shapley(u: Utility, *, progress: bool = True) -> ValuationResult:\n\"\"\"Computes exact Shapley values for a KNN classifier.\n    This implements the method described in (Jia, R. et al., 2019)<sup><a href=\"#jia_efficient_2019a\">1</a></sup>.\n    It exploits the local structure of K-Nearest Neighbours to reduce the number\n    of calls to the utility function to a constant number per index, thus\n    reducing computation time to $O(n)$.\n    Args:\n        u: Utility with a KNN model to extract parameters from. The object\n            will not be modified nor used other than to call [get_params()](\n            <https://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html#sklearn.base.BaseEstimator.get_params>)\n        progress: Whether to display a progress bar.\n    Returns:\n        Object with the data values.\n    Raises:\n        TypeError: If the model in the utility is not a\n            [sklearn.neighbors.KNeighborsClassifier][].\n    !!! tip \"New in version 0.1.0\"\n    \"\"\"\nif not isinstance(u.model, KNeighborsClassifier):\nraise TypeError(\"KNN Shapley requires a K-Nearest Neighbours model\")\ndefaults: Dict[str, Union[int, str]] = {\n\"algorithm\": \"ball_tree\" if u.data.dim >= 20 else \"kd_tree\",\n\"metric\": \"minkowski\",\n\"p\": 2,\n}\ndefaults.update(u.model.get_params())\n# HACK: NearestNeighbors doesn't support this. There will be more...\ndel defaults[\"weights\"]\nn_neighbors: int = int(defaults[\"n_neighbors\"])\ndefaults[\"n_neighbors\"] = len(u.data)  # We want all training points sorted\nassert n_neighbors < len(u.data)\n# assert data.target_dim == 1\nnns = NearestNeighbors(**defaults).fit(u.data.x_train)\n# closest to farthest\n_, indices = nns.kneighbors(u.data.x_test)\nvalues: NDArray[np.float_] = np.zeros_like(u.data.indices, dtype=np.float_)\nn = len(u.data)\nyt = u.data.y_train\niterator = enumerate(zip(u.data.y_test, indices), start=1)\nfor j, (y, ii) in maybe_progress(iterator, progress):\nvalue_at_x = int(yt[ii[-1]] == y) / n\nvalues[ii[-1]] += (value_at_x - values[ii[-1]]) / j\nfor i in range(n - 2, n_neighbors, -1):  # farthest to closest\nvalue_at_x = (\nvalues[ii[i + 1]] + (int(yt[ii[i]] == y) - int(yt[ii[i + 1]] == y)) / i\n)\nvalues[ii[i]] += (value_at_x - values[ii[i]]) / j\nfor i in range(n_neighbors, -1, -1):  # farthest to closest\nvalue_at_x = (\nvalues[ii[i + 1]]\n+ (int(yt[ii[i]] == y) - int(yt[ii[i + 1]] == y)) / n_neighbors\n)\nvalues[ii[i]] += (value_at_x - values[ii[i]]) / j\nreturn ValuationResult(\nalgorithm=\"knn_shapley\",\nstatus=Status.Converged,\nvalues=values,\ndata_names=u.data.data_names,\n)\n
    "},{"location":"api/pydvl/value/shapley/montecarlo/","title":"Montecarlo","text":"

    Monte Carlo approximations to Shapley Data values.

    Warning

    You probably want to use the common interface provided by compute_shapley_values() instead of directly using the functions in this module.

    Because exact computation of Shapley values requires \\(\\mathcal{O}(2^n)\\) re-trainings of the model, several Monte Carlo approximations are available. The first two sample from the powerset of the training data directly: combinatorial_montecarlo_shapley() and owen_sampling_shapley(). The latter uses a reformulation in terms of a continuous extension of the utility.

    Alternatively, employing another reformulation of the expression above as a sum over permutations, one has the implementation in permutation_montecarlo_shapley(), or using an early stopping strategy to reduce computation truncated_montecarlo_shapley().

    Also see

    It is also possible to use group_testing_shapley() to reduce the number of evaluations of the utility. The method is however typically outperformed by others in this module.

    Also see

    Additionally, you can consider grouping your data points using GroupedDataset and computing the values of the groups instead. This is not to be confused with \"group testing\" as implemented in group_testing_shapley(): any of the algorithms mentioned above, including Group Testing, can work to valuate groups of samples as units.

    "},{"location":"api/pydvl/value/shapley/montecarlo/#pydvl.value.shapley.montecarlo--references","title":"References","text":"
    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning. In: Proceedings of the 36th International Conference on Machine Learning, PMLR, pp. 2242\u20132251.\u00a0\u21a9

    "},{"location":"api/pydvl/value/shapley/montecarlo/#pydvl.value.shapley.montecarlo.permutation_montecarlo_shapley","title":"permutation_montecarlo_shapley(u, done, *, truncation=NoTruncation(), n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Computes an approximate Shapley value by sampling independent permutations of the index set, approximating the sum:

    \\[ v_u(x_i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)} \\tilde{w}( | \\sigma_{:i} | )[u(\\sigma_{:i} \\cup \\{i\\}) \u2212 u(\\sigma_{:i})], \\]

    where \\(\\sigma_{:i}\\) denotes the set of indices in permutation sigma before the position where \\(i\\) appears (see [[data-valuation]] for details).

    This implements the method described in (Ghorbani and Zou, 2019)1 with a double stopping criterion.

    .. todo:: Think of how to add Robin-Gelman or some other more principled stopping criterion.

    Instead of naively implementing the expectation, we sequentially add points to coalitions from a permutation and incrementally compute marginal utilities. We stop computing marginals for a given permutation based on a TruncationPolicy. (Ghorbani and Zou, 2019)1 mention two policies: one that stops after a certain fraction of marginals are computed, implemented in FixedTruncation, and one that stops if the last computed utility (\"score\") is close to the total utility using the standard deviation of the utility as a measure of proximity, implemented in BootstrapTruncation.

    We keep sampling permutations and updating all shapley values until the StoppingCriterion returns True.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function.

    TYPE: Utility

    done

    function checking whether computation must stop.

    TYPE: StoppingCriterion

    truncation

    An optional callable which decides whether to interrupt processing a permutation and set all subsequent marginals to zero. Typically used to stop computation when the marginal is small.

    TYPE: TruncationPolicy DEFAULT: NoTruncation()

    n_jobs

    number of jobs across which to distribute the computation.

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display a progress bar.

    TYPE: bool DEFAULT: False

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Seed DEFAULT: None

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    Source code in src/pydvl/value/shapley/montecarlo.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.8.0\",\nargs_mapping=dict(\ncoordinator_update_period=None, worker_update_period=None, progress=None\n),\n)\ndef permutation_montecarlo_shapley(\nu: Utility,\ndone: StoppingCriterion,\n*,\ntruncation: TruncationPolicy = NoTruncation(),\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Seed = None,\n) -> ValuationResult:\nr\"\"\"Computes an approximate Shapley value by sampling independent\n    permutations of the index set, approximating the sum:\n    $$\n    v_u(x_i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)}\n    \\tilde{w}( | \\sigma_{:i} | )[u(\\sigma_{:i} \\cup \\{i\\}) \u2212 u(\\sigma_{:i})],\n    $$\n    where $\\sigma_{:i}$ denotes the set of indices in permutation sigma before\n    the position where $i$ appears (see [[data-valuation]] for details).\n    This implements the method described in (Ghorbani and Zou, 2019)<sup><a href=\"#ghorbani_data_2019\">1</a></sup>\n    with a double stopping criterion.\n    .. todo::\n       Think of how to add Robin-Gelman or some other more principled stopping\n       criterion.\n    Instead of naively implementing the expectation, we sequentially add points\n    to coalitions from a permutation and incrementally compute marginal utilities.\n    We stop computing marginals for a given permutation based on a\n    [TruncationPolicy][pydvl.value.shapley.truncated.TruncationPolicy].\n    (Ghorbani and Zou, 2019)<sup><a href=\"#ghorbani_data_2019\">1</a></sup>\n    mention two policies: one that stops after a certain\n    fraction of marginals are computed, implemented in\n    [FixedTruncation][pydvl.value.shapley.truncated.FixedTruncation],\n    and one that stops if the last computed utility (\"score\") is close to the\n    total utility using the standard deviation of the utility as a measure of\n    proximity, implemented in\n    [BootstrapTruncation][pydvl.value.shapley.truncated.BootstrapTruncation].\n    We keep sampling permutations and updating all shapley values\n    until the [StoppingCriterion][pydvl.value.stopping.StoppingCriterion] returns\n    `True`.\n    Args:\n        u: Utility object with model, data, and scoring function.\n        done: function checking whether computation must stop.\n        truncation: An optional callable which decides whether to interrupt\n            processing a permutation and set all subsequent marginals to\n            zero. Typically used to stop computation when the marginal is small.\n        n_jobs: number of jobs across which to distribute the computation.\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        progress: Whether to display a progress bar.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        Object with the data values.\n    \"\"\"\nalgorithm = \"permutation_montecarlo_shapley\"\nparallel_backend = init_parallel_backend(config)\nu = parallel_backend.put(u)\nmax_workers = effective_n_jobs(n_jobs, config)\nn_submitted_jobs = 2 * max_workers  # number of jobs in the executor's queue\nseed_sequence = ensure_seed_sequence(seed)\nresult = ValuationResult.zeros(\nalgorithm=algorithm, indices=u.data.indices, data_names=u.data.data_names\n)\npbar = tqdm(disable=not progress, total=100, unit=\"%\")\nwith init_executor(\nmax_workers=max_workers, config=config, cancel_futures=CancellationPolicy.ALL\n) as executor:\npending: set[Future] = set()\nwhile True:\npbar.n = 100 * done.completion()\npbar.refresh()\ncompleted, pending = wait(\npending, timeout=config.wait_timeout, return_when=FIRST_COMPLETED\n)\nfor future in completed:\nresult += future.result()\n# we could check outside the loop, but that means more\n# submissions if the stopping criterion is unstable\nif done(result):\nreturn result\n# Ensure that we always have n_submitted_jobs in the queue or running\nn_remaining_slots = n_submitted_jobs - len(pending)\nseeds = seed_sequence.spawn(n_remaining_slots)\nfor i in range(n_remaining_slots):\nfuture = executor.submit(\n_permutation_montecarlo_one_step,\nu,\ntruncation,\nalgorithm,\nseed=seeds[i],\n)\npending.add(future)\n
    "},{"location":"api/pydvl/value/shapley/montecarlo/#pydvl.value.shapley.montecarlo.combinatorial_montecarlo_shapley","title":"combinatorial_montecarlo_shapley(u, done, *, n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Computes an approximate Shapley value using the combinatorial definition:

    \\[v_u(i) = \\frac{1}{n} \\sum_{S \\subseteq N \\setminus \\{i\\}} \\binom{n-1}{ | S | }^{-1} [u(S \\cup \\{i\\}) \u2212 u(S)]\\]

    This consists of randomly sampling subsets of the power set of the training indices in u.data, and computing their marginal utilities. See Data valuation for details.

    Note that because sampling is done with replacement, the approximation is poor even for \\(2^{m}\\) subsets with \\(m>n\\), even though there are \\(2^{n-1}\\) subsets for each \\(i\\). Prefer permutation_montecarlo_shapley().

    Parallelization is done by splitting the set of indices across processes and computing the sum over subsets \\(S \\subseteq N \\setminus \\{i\\}\\) separately.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    done

    Stopping criterion for the computation.

    TYPE: StoppingCriterion

    n_jobs

    number of parallel jobs across which to distribute the computation. Each worker receives a chunk of indices

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display progress bars for each job.

    TYPE: bool DEFAULT: False

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    Source code in src/pydvl/value/shapley/montecarlo.py
    def combinatorial_montecarlo_shapley(\nu: Utility,\ndone: StoppingCriterion,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None,\n) -> ValuationResult:\nr\"\"\"Computes an approximate Shapley value using the combinatorial\n    definition:\n    $$v_u(i) = \\frac{1}{n} \\sum_{S \\subseteq N \\setminus \\{i\\}}\n    \\binom{n-1}{ | S | }^{-1} [u(S \\cup \\{i\\}) \u2212 u(S)]$$\n    This consists of randomly sampling subsets of the power set of the training\n    indices in [u.data][pydvl.utils.utility.Utility], and computing their\n    marginal utilities. See [Data valuation][computing-data-values] for details.\n    Note that because sampling is done with replacement, the approximation is\n    poor even for $2^{m}$ subsets with $m>n$, even though there are $2^{n-1}$\n    subsets for each $i$. Prefer\n    [permutation_montecarlo_shapley()][pydvl.value.shapley.montecarlo.permutation_montecarlo_shapley].\n    Parallelization is done by splitting the set of indices across processes and\n    computing the sum over subsets $S \\subseteq N \\setminus \\{i\\}$ separately.\n    Args:\n        u: Utility object with model, data, and scoring function\n        done: Stopping criterion for the computation.\n        n_jobs: number of parallel jobs across which to distribute the\n            computation. Each worker receives a chunk of\n            [indices][pydvl.utils.dataset.Dataset.indices]\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        progress: Whether to display progress bars for each job.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        Object with the data values.\n    \"\"\"\nmap_reduce_job: MapReduceJob[NDArray, ValuationResult] = MapReduceJob(\nu.data.indices,\nmap_func=_combinatorial_montecarlo_shapley,\nreduce_func=lambda results: reduce(operator.add, results),\nmap_kwargs=dict(u=u, done=done, progress=progress),\nn_jobs=n_jobs,\nconfig=config,\n)\nreturn map_reduce_job(seed=seed)\n
    "},{"location":"api/pydvl/value/shapley/naive/","title":"Naive","text":""},{"location":"api/pydvl/value/shapley/naive/#pydvl.value.shapley.naive.permutation_exact_shapley","title":"permutation_exact_shapley(u, *, progress=True)","text":"

    Computes the exact Shapley value using the formulation with permutations:

    \\[v_u(x_i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)} [u(\\sigma_{i-1} \\cup {i}) \u2212 u(\\sigma_{i})].\\]

    See Data valuation for details.

    When the length of the training set is > 10 this prints a warning since the computation becomes too expensive. Used mostly for internal testing and simple use cases. Please refer to the Monte Carlo approximations for practical applications.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    progress

    Whether to display progress bars for each job.

    TYPE: bool DEFAULT: True

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    Source code in src/pydvl/value/shapley/naive.py
    def permutation_exact_shapley(u: Utility, *, progress: bool = True) -> ValuationResult:\nr\"\"\"Computes the exact Shapley value using the formulation with permutations:\n    $$v_u(x_i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)} [u(\\sigma_{i-1} \\cup {i}) \u2212 u(\\sigma_{i})].$$\n    See [Data valuation][computing-data-values] for details.\n    When the length of the training set is > 10 this prints a warning since the\n    computation becomes too expensive. Used mostly for internal testing and\n    simple use cases. Please refer to the [Monte Carlo\n    approximations][pydvl.value.shapley.montecarlo] for practical applications.\n    Args:\n        u: Utility object with model, data, and scoring function\n        progress: Whether to display progress bars for each job.\n    Returns:\n        Object with the data values.\n    \"\"\"\nn = len(u.data)\n# Note that the cache in utility saves most of the refitting because we\n# use frozenset for the input.\nif n > 10:\nwarnings.warn(\nf\"Large dataset! Computation requires {n}! calls to utility()\",\nRuntimeWarning,\n)\nvalues = np.zeros(n)\nfor p in maybe_progress(\npermutations(u.data.indices),\nprogress,\ndesc=\"Permutation\",\ntotal=math.factorial(n),\n):\nfor i, idx in enumerate(p):\nvalues[idx] += u(p[: i + 1]) - u(p[:i])\nvalues /= math.factorial(n)\nreturn ValuationResult(\nalgorithm=\"permutation_exact_shapley\",\nstatus=Status.Converged,\nvalues=values,\ndata_names=u.data.data_names,\n)\n
    "},{"location":"api/pydvl/value/shapley/naive/#pydvl.value.shapley.naive.combinatorial_exact_shapley","title":"combinatorial_exact_shapley(u, *, n_jobs=1, config=ParallelConfig(), progress=False)","text":"

    Computes the exact Shapley value using the combinatorial definition.

    \\[v_u(i) = \\frac{1}{n} \\sum_{S \\subseteq N \\setminus \\{i\\}} \\binom{n-1}{ | S | }^{-1} [u(S \\cup \\{i\\}) \u2212 u(S)].\\]

    See Data valuation for details.

    Note

    If the length of the training set is > n_jobs*20 this prints a warning because the computation is very expensive. Used mostly for internal testing and simple use cases. Please refer to the Monte Carlo approximations for practical applications.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    n_jobs

    Number of parallel jobs to use

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display progress bars for each job.

    TYPE: bool DEFAULT: False

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    Source code in src/pydvl/value/shapley/naive.py
    def combinatorial_exact_shapley(\nu: Utility,\n*,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\n) -> ValuationResult:\nr\"\"\"Computes the exact Shapley value using the combinatorial definition.\n    $$v_u(i) = \\frac{1}{n} \\sum_{S \\subseteq N \\setminus \\{i\\}} \\binom{n-1}{ | S | }^{-1} [u(S \\cup \\{i\\}) \u2212 u(S)].$$\n    See [Data valuation][computing-data-values] for details.\n    !!! Note\n        If the length of the training set is > n_jobs*20 this prints a warning\n        because the computation is very expensive. Used mostly for internal testing\n        and simple use cases. Please refer to the\n        [Monte Carlo][pydvl.value.shapley.montecarlo] approximations for practical\n        applications.\n    Args:\n        u: Utility object with model, data, and scoring function\n        n_jobs: Number of parallel jobs to use\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        progress: Whether to display progress bars for each job.\n    Returns:\n        Object with the data values.\n    \"\"\"\n# Arbitrary choice, will depend on time required, caching, etc.\nif len(u.data) // n_jobs > 20:\nwarnings.warn(\nf\"Large dataset! Computation requires 2^{len(u.data)} calls to model.fit()\"\n)\ndef reduce_fun(results: List[NDArray]) -> NDArray:\nreturn np.array(results).sum(axis=0)  # type: ignore\nmap_reduce_job: MapReduceJob[NDArray, NDArray] = MapReduceJob(\nu.data.indices,\nmap_func=_combinatorial_exact_shapley,\nmap_kwargs=dict(u=u, progress=progress),\nreduce_func=reduce_fun,\nn_jobs=n_jobs,\nconfig=config,\n)\nvalues = map_reduce_job()\nreturn ValuationResult(\nalgorithm=\"combinatorial_exact_shapley\",\nstatus=Status.Converged,\nvalues=values,\ndata_names=u.data.data_names,\n)\n
    "},{"location":"api/pydvl/value/shapley/owen/","title":"Owen","text":""},{"location":"api/pydvl/value/shapley/owen/#pydvl.value.shapley.owen--references","title":"References","text":"
    1. Okhrati, R., Lipani, A., 2021. A Multilinear Sampling Algorithm to Estimate Shapley Values. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 7992\u20137999. IEEE.\u00a0\u21a9

    "},{"location":"api/pydvl/value/shapley/owen/#pydvl.value.shapley.owen.owen_sampling_shapley","title":"owen_sampling_shapley(u, n_samples, max_q, *, method=OwenAlgorithm.Standard, n_jobs=1, config=ParallelConfig(), progress=False, seed=None)","text":"

    Owen sampling of Shapley values as described in (Okhrati and Lipani, 2021)1.

    This function computes a Monte Carlo approximation to

    \\[v_u(i) = \\int_0^1 \\mathbb{E}_{S \\sim P_q(D_{\\backslash \\{i\\}})} [u(S \\cup \\{i\\}) - u(S)]\\]

    using one of two methods. The first one, selected with the argument mode = OwenAlgorithm.Standard, approximates the integral with:

    \\[\\hat{v}_u(i) = \\frac{1}{Q M} \\sum_{j=0}^Q \\sum_{m=1}^M [u(S^{(q_j)}_m \\cup \\{i\\}) - u(S^{(q_j)}_m)],\\]

    where \\(q_j = \\frac{j}{Q} \\in [0,1]\\) and the sets \\(S^{(q_j)}\\) are such that a sample \\(x \\in S^{(q_j)}\\) if a draw from a \\(Ber(q_j)\\) distribution is 1.

    The second method, selected with the argument mode = OwenAlgorithm.Antithetic, uses correlated samples in the inner sum to reduce the variance:

    \\[\\hat{v}_u(i) = \\frac{1}{2 Q M} \\sum_{j=0}^Q \\sum_{m=1}^M [u(S^{(q_j)}_m \\cup \\{i\\}) - u(S^{(q_j)}_m) + u((S^{(q_j)}_m)^c \\cup \\{i\\}) - u((S^{( q_j)}_m)^c)],\\]

    where now \\(q_j = \\frac{j}{2Q} \\in [0,\\frac{1}{2}]\\), and \\(S^c\\) is the complement of \\(S\\).

    Note

    The outer integration could be done instead with a quadrature rule.

    PARAMETER DESCRIPTION u

    Utility object holding data, model and scoring function.

    TYPE: Utility

    n_samples

    Numer of sets to sample for each value of q

    TYPE: int

    max_q

    Number of subdivisions for q \u2208 [0,1] (the element sampling probability) used to approximate the outer integral.

    TYPE: int

    method

    Selects the algorithm to use, see the description. Either OwenAlgorithm.Full for \\(q \\in [0,1]\\) or OwenAlgorithm.Halved for \\(q \\in [0,0.5]\\) and correlated samples

    TYPE: OwenAlgorithm DEFAULT: Standard

    n_jobs

    Number of parallel jobs to use. Each worker receives a chunk of the total of max_q values for q.

    TYPE: int DEFAULT: 1

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    progress

    Whether to display progress bars for each job.

    TYPE: bool DEFAULT: False

    seed

    Either an instance of a numpy random number generator or a seed for it.

    TYPE: Optional[Seed] DEFAULT: None

    RETURNS DESCRIPTION ValuationResult

    Object with the data values.

    New in version 0.3.0

    Changed in version 0.5.0

    Support for parallel computation and enable antithetic sampling.

    Source code in src/pydvl/value/shapley/owen.py
    def owen_sampling_shapley(\nu: Utility,\nn_samples: int,\nmax_q: int,\n*,\nmethod: OwenAlgorithm = OwenAlgorithm.Standard,\nn_jobs: int = 1,\nconfig: ParallelConfig = ParallelConfig(),\nprogress: bool = False,\nseed: Optional[Seed] = None\n) -> ValuationResult:\nr\"\"\"Owen sampling of Shapley values as described in\n    (Okhrati and Lipani, 2021)<sup><a href=\"#okhrati_multilinear_2021\">1</a></sup>.\n    This function computes a Monte Carlo approximation to\n    $$v_u(i) = \\int_0^1 \\mathbb{E}_{S \\sim P_q(D_{\\backslash \\{i\\}})}\n    [u(S \\cup \\{i\\}) - u(S)]$$\n    using one of two methods. The first one, selected with the argument ``mode =\n    OwenAlgorithm.Standard``, approximates the integral with:\n    $$\\hat{v}_u(i) = \\frac{1}{Q M} \\sum_{j=0}^Q \\sum_{m=1}^M [u(S^{(q_j)}_m\n    \\cup \\{i\\}) - u(S^{(q_j)}_m)],$$\n    where $q_j = \\frac{j}{Q} \\in [0,1]$ and the sets $S^{(q_j)}$ are such that a\n    sample $x \\in S^{(q_j)}$ if a draw from a $Ber(q_j)$ distribution is 1.\n    The second method, selected with the argument ``mode =\n    OwenAlgorithm.Antithetic``, uses correlated samples in the inner sum to\n    reduce the variance:\n    $$\\hat{v}_u(i) = \\frac{1}{2 Q M} \\sum_{j=0}^Q \\sum_{m=1}^M [u(S^{(q_j)}_m\n    \\cup \\{i\\}) - u(S^{(q_j)}_m) + u((S^{(q_j)}_m)^c \\cup \\{i\\}) - u((S^{(\n    q_j)}_m)^c)],$$\n    where now $q_j = \\frac{j}{2Q} \\in [0,\\frac{1}{2}]$, and $S^c$ is the\n    complement of $S$.\n    !!! Note\n        The outer integration could be done instead with a quadrature rule.\n    Args:\n        u: [Utility][pydvl.utils.utility.Utility] object holding data, model\n            and scoring function.\n        n_samples: Numer of sets to sample for each value of q\n        max_q: Number of subdivisions for q \u2208 [0,1] (the element sampling\n            probability) used to approximate the outer integral.\n        method: Selects the algorithm to use, see the description. Either\n            [OwenAlgorithm.Full][pydvl.value.shapley.owen.OwenAlgorithm] for\n            $q \\in [0,1]$ or\n            [OwenAlgorithm.Halved][pydvl.value.shapley.owen.OwenAlgorithm] for\n            $q \\in [0,0.5]$ and correlated samples\n        n_jobs: Number of parallel jobs to use. Each worker receives a chunk\n            of the total of `max_q` values for q.\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        progress: Whether to display progress bars for each job.\n        seed: Either an instance of a numpy random number generator or a seed for it.\n    Returns:\n        Object with the data values.\n    !!! tip \"New in version 0.3.0\"\n    !!! tip \"Changed in version 0.5.0\"\n        Support for parallel computation and enable antithetic sampling.\n    \"\"\"\nmap_reduce_job: MapReduceJob[NDArray, ValuationResult] = MapReduceJob(\nu.data.indices,\nmap_func=_owen_sampling_shapley,\nreduce_func=lambda results: reduce(operator.add, results),\nmap_kwargs=dict(\nu=u,\nmethod=OwenAlgorithm(method),\nn_samples=n_samples,\nmax_q=max_q,\nprogress=progress,\n),\nn_jobs=n_jobs,\nconfig=config,\n)\nreturn map_reduce_job(seed=seed)\n
    "},{"location":"api/pydvl/value/shapley/truncated/","title":"Truncated","text":""},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated--references","title":"References","text":"
    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning. In: Proceedings of the 36th International Conference on Machine Learning, PMLR, pp. 2242\u20132251.\u00a0\u21a9

    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.TruncationPolicy","title":"TruncationPolicy()","text":"

    Bases: ABC

    A policy for deciding whether to stop computing marginals in a permutation.

    Statistics are kept on the number of calls and truncations as n_calls and n_truncations respectively.

    ATTRIBUTE DESCRIPTION n_calls

    Number of calls to the policy.

    TYPE: int

    n_truncations

    Number of truncations made by the policy.

    TYPE: int

    Todo

    Because the policy objects are copied to the workers, the statistics are not accessible from the coordinating process. We need to add methods for this.

    Source code in src/pydvl/value/shapley/truncated.py
    def __init__(self):\nself.n_calls: int = 0\nself.n_truncations: int = 0\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.TruncationPolicy.reset","title":"reset() abstractmethod","text":"

    Reset the policy to a state ready for a new permutation.

    Source code in src/pydvl/value/shapley/truncated.py
    @abc.abstractmethod\ndef reset(self):\n\"\"\"Reset the policy to a state ready for a new permutation.\"\"\"\n...\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.TruncationPolicy.__call__","title":"__call__(idx, score)","text":"

    Check whether the computation should be interrupted.

    PARAMETER DESCRIPTION idx

    Position in the permutation currently being computed.

    TYPE: int

    score

    Last utility computed.

    TYPE: float

    RETURNS DESCRIPTION bool

    True if the computation should be interrupted.

    Source code in src/pydvl/value/shapley/truncated.py
    def __call__(self, idx: int, score: float) -> bool:\n\"\"\"Check whether the computation should be interrupted.\n    Args:\n        idx: Position in the permutation currently being computed.\n        score: Last utility computed.\n    Returns:\n        `True` if the computation should be interrupted.\n    \"\"\"\nret = self._check(idx, score)\nself.n_calls += 1\nself.n_truncations += 1 if ret else 0\nreturn ret\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.NoTruncation","title":"NoTruncation","text":"

    Bases: TruncationPolicy

    A policy which never interrupts the computation.

    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.FixedTruncation","title":"FixedTruncation(u, fraction)","text":"

    Bases: TruncationPolicy

    Break a permutation after computing a fixed number of marginals.

    The experiments in Appendix B of (Ghorbani and Zou, 2019)1 show that when the training set size is large enough, one can simply truncate the iteration over permutations after a fixed number of steps. This happens because beyond a certain number of samples in a training set, the model becomes insensitive to new ones. Alas, this strongly depends on the data distribution and the model and there is no automatic way of estimating this number.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    fraction

    Fraction of marginals in a permutation to compute before stopping (e.g. 0.5 to compute half of the marginals).

    TYPE: float

    Source code in src/pydvl/value/shapley/truncated.py
    def __init__(self, u: Utility, fraction: float):\nsuper().__init__()\nif fraction <= 0 or fraction > 1:\nraise ValueError(\"fraction must be in (0, 1]\")\nself.max_marginals = len(u.data) * fraction\nself.count = 0\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.RelativeTruncation","title":"RelativeTruncation(u, rtol)","text":"

    Bases: TruncationPolicy

    Break a permutation if the marginal utility is too low.

    This is called \"performance tolerance\" in (Ghorbani and Zou, 2019)1.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    rtol

    Relative tolerance. The permutation is broken if the last computed utility is less than total_utility * rtol.

    TYPE: float

    Source code in src/pydvl/value/shapley/truncated.py
    def __init__(self, u: Utility, rtol: float):\nsuper().__init__()\nself.rtol = rtol\nlogger.info(\"Computing total utility for permutation truncation.\")\nself.total_utility = u(u.data.indices)\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.BootstrapTruncation","title":"BootstrapTruncation(u, n_samples, sigmas=1)","text":"

    Bases: TruncationPolicy

    Break a permutation if the last computed utility is close to the total utility, measured as a multiple of the standard deviation of the utilities.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    n_samples

    Number of bootstrap samples to use to compute the variance of the utilities.

    TYPE: int

    sigmas

    Number of standard deviations to use as a threshold.

    TYPE: float DEFAULT: 1

    Source code in src/pydvl/value/shapley/truncated.py
    def __init__(self, u: Utility, n_samples: int, sigmas: float = 1):\nsuper().__init__()\nself.n_samples = n_samples\nlogger.info(\"Computing total utility for permutation truncation.\")\nself.total_utility = u(u.data.indices)\nself.count: int = 0\nself.variance: float = 0\nself.mean: float = 0\nself.sigmas: float = sigmas\n
    "},{"location":"api/pydvl/value/shapley/truncated/#pydvl.value.shapley.truncated.truncated_montecarlo_shapley","title":"truncated_montecarlo_shapley(u, *, done, truncation, config=ParallelConfig(), n_jobs=1, coordinator_update_period=10, worker_update_period=5)","text":"

    Warning

    This method is deprecated and only a wrapper for permutation_montecarlo_shapley.

    Todo

    Think of how to add Robin-Gelman or some other more principled stopping criterion.

    PARAMETER DESCRIPTION u

    Utility object with model, data, and scoring function

    TYPE: Utility

    done

    Check on the results which decides when to stop sampling permutations.

    TYPE: StoppingCriterion

    truncation

    callable that decides whether to stop computing marginals for a given permutation.

    TYPE: TruncationPolicy

    config

    Object configuring parallel computation, with cluster address, number of cpus, etc.

    TYPE: ParallelConfig DEFAULT: ParallelConfig()

    n_jobs

    Number of permutation monte carlo jobs to run concurrently.

    TYPE: int DEFAULT: 1

    Returns: Object with the data values.

    Source code in src/pydvl/value/shapley/truncated.py
    @deprecated(\ntarget=True,\ndeprecated_in=\"0.7.0\",\nremove_in=\"0.8.0\",\nargs_mapping=dict(coordinator_update_period=None, worker_update_period=None),\n)\ndef truncated_montecarlo_shapley(\nu: Utility,\n*,\ndone: StoppingCriterion,\ntruncation: TruncationPolicy,\nconfig: ParallelConfig = ParallelConfig(),\nn_jobs: int = 1,\ncoordinator_update_period: int = 10,\nworker_update_period: int = 5,\n) -> ValuationResult:\n\"\"\"\n    !!! Warning\n        This method is deprecated and only a wrapper for\n        [permutation_montecarlo_shapley][pydvl.value.shapley.montecarlo.permutation_montecarlo_shapley].\n    !!! Todo\n        Think of how to add Robin-Gelman or some other more principled stopping\n        criterion.\n    Args:\n        u: Utility object with model, data, and scoring function\n        done: Check on the results which decides when to stop sampling\n            permutations.\n        truncation: callable that decides whether to stop computing marginals\n            for a given permutation.\n        config: Object configuring parallel computation, with cluster address,\n            number of cpus, etc.\n        n_jobs: Number of permutation monte carlo jobs to run concurrently.\n    Returns:\n        Object with the data values.\n    \"\"\"\nfrom pydvl.value.shapley.montecarlo import permutation_montecarlo_shapley\nreturn cast(\nValuationResult,\npermutation_montecarlo_shapley(\nu, done=done, truncation=truncation, config=config, n_jobs=n_jobs\n),\n)\n
    "},{"location":"api/pydvl/value/shapley/types/","title":"Types","text":""},{"location":"api/pydvl/value/shapley/types/#pydvl.value.shapley.types.ShapleyMode","title":"ShapleyMode","text":"

    Bases: str, Enum

    Supported algorithms for the computation of Shapley values.

    Todo

    Make algorithms register themselves here.

    "},{"location":"examples/data_oob/","title":"Data OOB","text":"

    This notebook introduces the Data-OOB method, an implementation based on a publication from Kwon and Zou \"Data-OOB: Out-of-bag Estimate as a Simple and Efficient Data Value\" ICML 2023 , using pyDVL.

    The objective of this paper is mainly to overcome the computational bottleneck of shapley-based data valuation methods that require to fit a significant number of models to accurately estimate marginal contributions. The algorithms computes data values from out of bag estimates using a bagging model.

    The value can be interpreted as a partition of the OOB estimate, which is originally introduced to estimate the prediction error. This OOB estimate is given as:

    \\[ \\sum_{i=1}^n\\frac{\\sum_{b=1}^{B}\\mathbb{1}(w_{bi}=0)T(y_i, \\hat{f}_b(x_i))}{\\sum_{b=1}^{B} \\mathbb{1} (w_{bi}=0)} \\]
    from pydvl.utils import Dataset, Scorer, Seed, Utility, ensure_seed_sequence\nfrom pydvl.value import ValuationResult, compute_data_oob\nRANDOM_SEED = 42\n

    We will work with the adult classification dataset from the UCI repository. The objective is to predict whether a person earns more than 50k a year based on a set of features such as age, education, occupation, etc.

    With a helper function we download the data and obtain the following pandas dataframe, where the categorical features have been removed:

    age fnlwgt education-num capital-gain capital-loss hours-per-week income 0 39 77516 13 2174 0 40 <=50K 1 50 83311 13 0 0 13 <=50K 2 38 215646 9 0 0 40 <=50K 3 53 234721 7 0 0 40 <=50K 4 28 338409 13 0 0 40 <=50K
    data = Dataset.from_arrays(\nX=data_adult.drop(columns=[\"income\"]).values,\ny=data_adult.loc[:, \"income\"].cat.codes.values,\nrandom_state=RANDOM_SEED,\n)\nmodel = KNeighborsClassifier(n_neighbors=5)\nutility = Utility(model, data, Scorer(\"accuracy\", default=0.0))\n
    n_estimators = [100, 500]\noob_values = [\ncompute_data_oob(utility, n_est=n_est, max_samples=0.95, seed=RANDOM_SEED)\nfor n_est in n_estimators\n]\n

    The two results are stored in an array of ValuationResult objects. Here's their distribution. The left-hand side depicts value as it increases with rank and a 99% t-confidence interval. The right-hand side shows the histogram of values.

    Observe how adding estimators reduces the variance of the values, but doesn't change their distribution much.

    "},{"location":"examples/data_oob/#bagging-for-data-valuation","title":"Bagging for data valuation","text":""},{"location":"examples/data_oob/#setup","title":"Setup","text":"

    We begin by importing the main libraries and setting some defaults.

    If you are reading this in the documentation, some boilerplate (including most plotting code) has been omitted for convenience."},{"location":"examples/data_oob/#computing-the-oob-values","title":"Computing the OOB values","text":"

    The main idea of Data-OOB is to take an existing classifier or regression model and compute a per-sample out-of-bag performance estimate via bagging.

    For this example, we use a simple KNN classifier with \\(k=5\\) neighbours on the data and compute the data-oob values with two choices for the number of estimators in the bagging. For that we construct a Utility object using the Scorer class to specify the metric to use for the evaluation. Note how we pass a random seed to Dataset.from_arrays in order to ensure that we always get the same split when running this notebook multiple times. This will be particularly important when running the standard point removal experiments later.

    We then use the compute_data_oob function to compute the data-oob values.

    "},{"location":"examples/data_oob/#point-removal-experiments","title":"Point removal experiments","text":"

    The standard procedure for the evaluation of data valuation schemes is the point removal experiment. The objective is to measure the evolution of performance when the best/worst points are removed from the training set. This can be done with the function compute_removal_score, which takes precomputed values and computes the performance of the model as points are removed.

    In order to test the true performance of DataOOB, we repeat the whole task of computing the values and the point removal experiment multiple times, including the splitting of the dataset into training and valuation sets. It is important to remember to pass random state adequately for full reproducibility.

    "},{"location":"examples/influence_imagenet/","title":"For CNNs","text":"If you are reading this in the documentation, some boilerplate has been omitted for convenience.
    %load_ext autoreload\n
    %autoreload\n%matplotlib inline\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport os\nimport pandas as pd\nimport torch\nfrom torch import nn\nfrom notebook_support import (\nplot_sample_images,\nplot_lowest_highest_influence_images,\nplot_losses,\ncorrupt_imagenet,\nload_preprocess_imagenet,\nplot_corrupted_influences_distribution,\ncompute_mean_corrupted_influences,\nTrainingManager,\nMODEL_PATH,\nnew_resnet_model,\n)\ndefault_figsize = (7, 7)\nplt.rcParams[\"figure.figsize\"] = default_figsize\nplt.rcParams[\"font.size\"] = 12\nplt.rcParams[\"xtick.labelsize\"] = 12\nplt.rcParams[\"ytick.labelsize\"] = 10\nhessian_reg = 1e4 if os.environ.get(\"CI\") else 1e-3\nrandom_state = 42\nnp.random.seed(random_state)\nDEVICE = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n
    from pydvl.influence.general import compute_influences\nfrom pydvl.reporting.plots import plot_influence_distribution_by_label\nfrom sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score\n
    label_names = {90: \"tables\", 100: \"boats\"}\ntrain_ds, val_ds, test_ds = load_preprocess_imagenet(\ntrain_size=0.8,\ntest_size=0.1,\nkeep_labels=label_names,\ndownsampling_ratio=1,\n)\nprint(\"Normalised image dtype:\", train_ds[\"normalized_images\"][0].dtype)\nprint(\"Label type:\", type(train_ds[\"labels\"][0]))\nprint(\"Image type:\", type(train_ds[\"images\"][0]))\ntrain_ds.info()\n
    \nNormalised image dtype: torch.float32\nLabel type: <class 'str'>\nImage type: <class 'PIL.JpegImagePlugin.JpegImageFile'>\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 707 entries, 0 to 706\nData columns (total 3 columns):\n #   Column             Non-Null Count  Dtype \n---  ------             --------------  ----- \n 0   normalized_images  707 non-null    object\n 1   labels             707 non-null    object\n 2   images             707 non-null    object\ndtypes: object(3)\nmemory usage: 16.7+ KB\n\n

    Let's take a closer look at a few image samples

    plot_sample_images(train_ds, n_images_per_class=3)\n

    Let's now further pre-process the data and prepare for model training. The helper function process_io converts the normalized images into tensors and the labels to the indices 0 and 1 to train the classifier.

    from typing import Tuple\ndef process_io(df: pd.DataFrame, labels: dict) -&gt; Tuple[torch.Tensor, torch.Tensor]:\nx = df[\"normalized_images\"]\ny = df[\"labels\"]\nds_label_to_model_label = {\nds_label: idx for idx, ds_label in enumerate(labels.values())\n}\nx_nn = torch.stack(x.tolist()).to(DEVICE)\ny_nn = torch.tensor([ds_label_to_model_label[yi] for yi in y], device=DEVICE)\nreturn x_nn, y_nn\ntrain_x, train_y = process_io(train_ds, label_names)\nval_x, val_y = process_io(val_ds, label_names)\ntest_x, test_y = process_io(test_ds, label_names)\n
    model_ft = new_resnet_model(output_size=len(label_names))\nmgr = TrainingManager(\n\"model_ft\",\nmodel_ft,\nnn.CrossEntropyLoss(),\ntrain_x,\ntrain_y,\nval_x,\nval_y,\nMODEL_PATH,\n)\n# Set use_cache=False to retrain the model\ntrain_loss, val_loss = mgr.train(n_epochs=50, use_cache=True)\n
    \nCached model found, loading...\n\n
    plot_losses(train_loss, val_loss)\n

    The confusion matrix and \\(F_1\\) score look good, especially considering the low resolution of the images and their complexity (they contain different objects)

    pred_y_test = np.argmax(model_ft(test_x).detach(), axis=1)\nmodel_score = f1_score(test_y, pred_y_test, average=\"weighted\")\ncm = confusion_matrix(test_y, pred_y_test)\ndisp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_names.values())\nprint(\"f1_score of model:\", model_score)\ndisp.plot();\n
    \nf1_score of model: 0.8468272032336833\n\n
    influences = compute_influences(\nmodel=mgr.model,\nloss=mgr.loss,\nx=train_x,\ny=train_y,\nx_test=test_x,\ny_test=test_y,\nhessian_regularization=hessian_reg,\ninversion_method=\"cg\",\ninfluence_type=\"up\",\nprogress=True,\n)\n
    test_image_idx = 42\nmodel_label_to_ds_label = {\nidx: ds_label for idx, ds_label in enumerate(label_names.values())\n}\npredicted_label = model_label_to_ds_label[\nnp.argmax(model_ft(test_x[test_image_idx].unsqueeze(0)).detach(), axis=1).item()\n]\ntrue_label = test_ds[\"labels\"][test_image_idx]\nplt.rcParams[\"figure.figsize\"] = (3, 3)\nplt.imshow(test_ds[\"images\"][test_image_idx])\nplt.axis(\"off\")\nplt.title(f\"Predicted: {predicted_label} - True: {true_label}\")\nplt.show()\n

    Now we plot the histogram of the influence that all training images have on the image selected above, separated by their label.

    plt.rcParams[\"figure.figsize\"] = default_figsize\nplot_influence_distribution_by_label(\ninfluences[test_image_idx],\ntrain_ds[\"labels\"].values,\ntitle_extra=f\"over index {test_image_idx}\",\n)\n

    Rather unsurprisingly, the training samples that have the same label as the test image have, on average, a higher influence on the classifier's output for it. Let's then take them and visualize those with the highest and lowest influence:

    images_with_same_label = train_ds[\"labels\"] == test_ds[\"labels\"][test_image_idx]\ninfluence_values_with_same_label = influences[test_image_idx][images_with_same_label]\nimages_same_label = train_ds[\"images\"][images_with_same_label].values\nplot_lowest_highest_influence_images(\ninfluence_values_with_same_label, subset_images=images_same_label, num_to_plot=3\n)\n

    Looking at the images, it is difficult to explain why those on the right are more influential than those on the left. At first sight, the choice seems to be random (or at the very least noisy). Let's dig in a bit more by looking at average influences:

    avg_influences = np.mean(influences, axis=0)\n

    Once again, let's plot the histogram of influence values by label.

    plot_influence_distribution_by_label(\navg_influences, train_ds[\"labels\"].values, \"over all test samples\"\n)\n

    Next, for each class (you can change value by changing label key) we can have a look at the top and bottom images by average influence, i.e. we can show the images that have the highest and lowest average influence over all test images.

    label = \"tables\"\nimg_with_selected_label = train_ds[\"labels\"] == label\nif_selected_label = avg_influences[img_with_selected_label]\nimges_same_label = train_ds[\"images\"][img_with_selected_label].values\nplot_lowest_highest_influence_images(if_selected_label, imges_same_label, num_to_plot=3)\n

    Once again, it is not easy to explain why the images on the left have a lower influence than the ones on the right. One could argue that in order to predict that there is a dining table in the image it is beneficial to clearly see both the chairs and the table itself, a feature missing in some samples on the left. Also, color seems to be a discriminant: houses with a blue painting could get confused with the water around a boat. Of course, this is debatable and different people could come up with other explanations a posteriori.

    corrupted_model = new_resnet_model(output_size=len(label_names))\ncorrupted_dataset, corrupted_indices = corrupt_imagenet(\ndataset=train_ds,\nfraction_to_corrupt=0.1,\navg_influences=avg_influences,\n)\ncorrupted_train_x, corrupted_train_y = process_io(corrupted_dataset, label_names)\nmgr = TrainingManager(\n\"corrupted_model\",\ncorrupted_model,\nnn.CrossEntropyLoss(),\ncorrupted_train_x,\ncorrupted_train_y,\nval_x,\nval_y,\nMODEL_PATH,\n)\ntraining_loss, validation_loss = mgr.train(n_epochs=50, use_cache=True)\n
    \nCached model found, loading...\n\n
    plot_losses(training_loss, validation_loss)\n
    pred_y_test = np.argmax(corrupted_model(test_x).detach(), axis=1)\nmodel_score = f1_score(test_y, pred_y_test, average=\"weighted\")\nprint(\"F1 score of model with corrupted data:\", model_score)\n
    \nF1 score of model with corrupted data: 0.8164795918367347\n\n

    Interestingly, despite being trained on a corrupted dataset, the model has a fairly high \\(F_1\\) score. Let's now calculate the influence of the corrupted training data points over the test data points.

    influences = compute_influences(\nmodel=mgr.model,\nloss=mgr.loss,\nx=corrupted_train_x,\ny=corrupted_train_y,\nx_test=test_x,\ny_test=test_y,\nhessian_regularization=hessian_reg,\ninversion_method=\"cg\",\ninfluence_type=\"up\",\nprogress=True,\n)\n
    \nSplit Gradient:   0%|          | 0/98 [00:00<?, ?it/s]\n
    \nConjugate gradient:   0%|          | 0/98 [00:00<?, ?it/s]\n
    \nSplit Gradient:   0%|          | 0/707 [00:00<?, ?it/s]\n

    As before, since we are interested in the average influence on the test dataset, we take the average of influences across rows, and then plot the highest and lowest influences for a chosen label

    avg_corrupted_influences = np.mean(influences, axis=0)\n
    label = \"boats\"\nimg_with_selected_label = corrupted_dataset[\"labels\"] == label\nif_selected_label = avg_corrupted_influences[img_with_selected_label]\nimges_same_label = corrupted_dataset[\"images\"][img_with_selected_label].values\nplot_lowest_highest_influence_images(if_selected_label, imges_same_label, num_to_plot=3)\n

    As expected, the samples with lowest (negative) influence for the label \"boats\" are those that have been corrupted: all the images on the left are tables! We can compare the average influence of corrupted data with non-corrupted ones

    plot_corrupted_influences_distribution(\ncorrupted_dataset, corrupted_indices, avg_corrupted_influences\n)\n
    compute_mean_corrupted_influences(\ncorrupted_dataset, corrupted_indices, avg_corrupted_influences\n)\n
    label avg_non_corrupted_infl avg_corrupted_infl score_diff 0 boats 0.945390 -0.890972 1.836362 1 tables -1.092637 -2.757206 1.664569

    And indeed corrupted data have a more negative influence on average than clean ones!

    Despite this being a useful property, influence functions are known to be unreliable for tasks of data valuation, especially in deep learning where the fundamental assumption of the theory (convexity) is grossly violated. A lot of factors (e.g. the size of the network, the training process or the Hessian regularization term) can interfere with the computation, to the point that often the results that we obtain cannot be trusted. This has been extensively studied in the recent paper:

    Basu, S., P. Pope, and S. Feizi. Influence Functions in Deep Learning Are Fragile. International Conference on Learning Representations (ICLR). 2021.

    Nevertheless, influence functions offer a relatively quick and mathematically rigorous way to evaluate (at first order) the importance of a training point for a model's prediction.

    "},{"location":"examples/influence_imagenet/#influence-functions-for-neural-networks","title":"Influence functions for neural networks","text":"

    This notebook explores the use of influence functions for convolutional neural networks. In the first part we will investigate the usefulness, or lack thereof, of influence functions for the interpretation of a classifier's outputs.

    For our study we choose a pre-trained ResNet18, fine-tuned on the tiny-imagenet dataset. This dataset was created for a Stanford course on Deep Learning for Computer Vision, and is a subset of the famous ImageNet with 200 classes instead of 1000, and images down-sampled to a lower resolution of 64x64 pixels.

    After tuning the last layers of the network, we will use pyDVL to find the most and the least influential training images for the test set. This can sometimes be used to explain inference errors, or to direct efforts during data collection, although we will face inconclusive results with our model and data. This illustrates well-known issues of influence functions for neural networks.

    However, in the final part of the notebook we will see that influence functions are an effective tool for finding anomalous or corrupted data points.

    We conclude with an appendix with some basic theoretical concepts used.

    "},{"location":"examples/influence_imagenet/#imports-and-setup","title":"Imports and setup","text":""},{"location":"examples/influence_imagenet/#loading-and-preprocessing-the-dataset","title":"Loading and preprocessing the dataset","text":"

    We pick two classes arbitrarily to work with: 90 and 100, corresponding respectively to dining tables, and boats in Venice (you can of course select any other two classes, or more of them, although that would imply longer training times and some modifications in the notebook below). The dataset is loaded with load_preprocess_imagenet(), which returns three pandas DataFrames with training, validation and test sets respectively. Each dataframe has three columns: normalized images, labels and the original images. Note that you can load a subset of the data decreasing downsampling_ratio.

    "},{"location":"examples/influence_imagenet/#model-definition-and-training","title":"Model definition and training","text":"

    We use a ResNet18 from torchvision with final layers modified for binary classification.

    Training for influence computation is facilitated by :class:~pydvl.influence.model_wrappers.torch_wrappers.TorchModel, a convenience wrapper around torch models which is part of pyDVL. We wrap this with a simple class TrainingManager which transparently handles persistence after training. The latter is not part of the main pyDVL package but just a way to reduce clutter in this notebook.

    We train the model for 50 epochs and save the results. Then we plot the train and validation loss curves.

    "},{"location":"examples/influence_imagenet/#influence-computation","title":"Influence computation","text":"

    Let's now calculate influences! The main method is :func:~pydvl.influence.general.compute_influences, which takes a trained nn.Model, the training loss, some input dataset with labels (which typically is the training data, or a subset of it) and some test data.

    Other important parameters are the Hessian regularization term, which should be chosen as small as possible for the computation to converge (further details on why this is important can be found in the Appendix).

    Since Resnet18 is quite big, we pick conjugate gradient (cg) as the method for inversion of the Hessian. A naive computation would require a lot of memory. Finally, the influence type will be up. The other option, perturbation, is beyond the scope of this notebook, but more info can be found in the notebook using the Wine dataset or in the documentation for pyDVL.

    The output of calculate_influences is a matrix of size test_set_length x training_set_length. Each row represents a test data point, and each column a training data point, so that entry \\((i,j)\\) represents the influence of training point \\(j\\) on test point \\(i\\).

    "},{"location":"examples/influence_imagenet/#analysing-influences","title":"Analysing influences","text":"

    With the computed influences we can study single images or all of them together:

    "},{"location":"examples/influence_imagenet/#influence-on-a-single-test-image","title":"Influence on a single test image","text":"

    Let's take any image in the test set:

    "},{"location":"examples/influence_imagenet/#analysing-the-average-influence-on-test-samples","title":"Analysing the average influence on test samples","text":"

    By averaging across the rows of the influence matrix, we obtain the average influence of each training sample on the whole test set:

    "},{"location":"examples/influence_imagenet/#detecting-corrupted-data","title":"Detecting corrupted data","text":"

    After facing the shortcomings of influence functions for explaining decisions, we move to an application with clear-cut results. Influences can be successfully used to detect corrupted or mislabeled samples, making them an effective tool to \"debug\" training data.

    We begin by training a new model (with the same architecture as before) on a dataset with some corrupted labels. The method get_corrupted_imagenet will take the training dataset and corrupt a certain fraction of the labels by flipping them. We use the same number of epochs and optimizer as before.

    "},{"location":"examples/influence_imagenet/#theory-of-influence-functions-for-neural-networks","title":"Theory of influence functions for neural networks","text":"

    In this appendix we will briefly go through the basic ideas of influence functions adapted for neural networks as introduced in Koh, Pang Wei, and Percy Liang. \"Understanding Black-box Predictions via Influence Functions\" International conference on machine learning. PMLR, 2017.

    Note however that this paper departs from the standard and established theory and notation for influence functions. For a rigorous introduction to the topic we recommend classical texts like Hampel, Frank R., Elvezio M. Ronchetti, Peter J. Rousseeuw, and Werner A. Stahel. Robust Statistics: The Approach Based on Influence Functions. 1st edition. Wiley Series in Probability and Statistics. New York: Wiley-Interscience, 2005. https://doi.org/10.1002/9781118186435.

    "},{"location":"examples/influence_imagenet/#upweighting-points","title":"Upweighting points","text":"

    Let's start by considering some input space \\(\\mathcal{X}\\) to a model (e.g. images) and an output space \\(\\mathcal{Y}\\) (e.g. labels). Let's take \\(z_i = (x_i, y_i)\\) to be the \\(i\\)-th training point, and \\(\\theta\\) to be the (potentially highly) multi-dimensional parameters of the neural network (i.e. \\(\\theta\\) is a big array with very many parameters). We will indicate with \\(L(z, \\theta)\\) the loss of the model for point \\(z\\) and parameters \\(\\theta\\). When training the model we minimize the loss over all points, i.e. the optimal parameters are calculated through gradient descent on the following formula: $$ \\hat{\\theta} = \\arg \\min_\\theta \\frac{1}{n}\\sum_{i=1}^n L(z_i, \\theta) $$ where \\(n\\) is the total number of training data points.

    For notational convenience, let's define $$ \\hat{\\theta}{-z} = \\arg \\min\\theta \\frac{1}{n}\\sum_{z_i \\ne z} L(z_i, \\theta) \\ , $$ i.e. \\(\\hat{\\theta}_{-z}\\) are the model parameters that minimize the total loss when \\(z\\) is not in the training dataset.

    In order to check the impact of each training point on the model, we would need to calculate \\(\\hat{\\theta}_{-z}\\) for each \\(z\\) in the training dataset, thus re-training the model at least ~\\(n\\) times (more if model training is noisy). This is computationally very expensive, especially for big neural networks. To circumvent this problem, we can just calculate a first order approximation of \\(\\hat{\\theta}\\). This can be done through single backpropagation and without re-training the full model.

    Let's define $$ \\hat{\\theta}{\\epsilon, z} = \\arg \\min\\theta \\frac{1}{n}\\sum_{i=1}^n L(z_i, \\theta) + \\epsilon L(z, \\theta) \\ , $$ which is the optimal \\(\\hat{\\theta}\\) if we were to up-weigh \\(z\\) by an amount \\(\\epsilon\\).

    From a classical result (a simple derivation is available in Appendix A of Koh and Liang's paper), we know that: $$ \\frac{d \\ \\hat{\\theta}{\\epsilon, z}}{d \\epsilon} \\Big| = -H_{\\hat{\\theta}}^{-1} \\nabla_\\theta L(z, \\hat{\\theta}) $$ where \\(H_{\\hat{\\theta}} = \\frac{1}{n} \\sum_{i=1}^n \\nabla_\\theta^2 L(z_i, \\hat{\\theta})\\) is the Hessian of \\(L\\). Importantly, notice that this expression is only valid when \\(\\hat{\\theta}\\) is a minimum of \\(L\\), or otherwise \\(H_{\\hat{\\theta}}\\) cannot be inverted!

    "},{"location":"examples/influence_imagenet/#approximating-the-influence-of-a-point","title":"Approximating the influence of a point","text":"

    We will define the influence of training point \\(z\\) on test point \\(z_{\\text{test}}\\) as \\(\\mathcal{I}(z, z_{\\text{test}}) = L(z_{\\text{test}}, \\hat{\\theta}_{-z}) - L(z_{\\text{test}}, \\hat{\\theta})\\) (notice that it is higher for points \\(z\\) which positively impact the model score, since if they are excluded, the loss is higher). In practice, however, we will always use the infinitesimal approximation \\(\\mathcal{I}_{up}(z, z_{\\text{test}})\\), defined as $$ \\mathcal{I}{up}(z, z}) = - \\frac{d L(z_{\\text{test}}, \\hat{\\theta}{\\epsilon, z})}{d \\epsilon} \\Big| $$

    Using the chain rule and the results calculated above, we thus have:

    \\[ \\mathcal{I}_{up}(z, z_{\\text{test}}) = - \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ \\frac{d \\hat{\\theta}_{\\epsilon, z}}{d \\epsilon} \\Big|_{\\epsilon=0} = \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ H_{\\hat{\\theta}}^{-1} \\ \\nabla_\\theta L(z, \\hat{\\theta}) \\]

    In order to calculate this expression we need the gradient and the Hessian of the loss wrt. the model parameters \\(\\hat{\\theta}\\). This can be easily done through a single backpropagation pass.

    "},{"location":"examples/influence_imagenet/#regularizing-the-hessian","title":"Regularizing the Hessian","text":"

    One very important assumption that we make when approximating influence is that \\(\\hat{\\theta}\\) is at least a local minimum of the loss. However, we clearly cannot guarantee this except for convex models, and despite good apparent convergence, \\(\\hat{\\theta}\\) might be located in a region with flat curvature or close to a saddle point. In particular, the Hessian might have vanishing eigenvalues making its direct inversion impossible.

    To circumvent this problem, instead of inverting the true Hessian \\(H_{\\hat{\\theta}}\\), one can invert a small perturbation thereof: \\(H_{\\hat{\\theta}} + \\lambda \\mathbb{I}\\), with \\(\\mathbb{I}\\) being the identity matrix. This standard trick ensures that the eigenvalues of \\(H_{\\hat{\\theta}}\\) are bounded away from zero and therefore the matrix is invertible. In order for this regularization not to corrupt the outcome too much, the parameter \\(\\lambda\\) should be as small as possible while still allowing a reliable inversion of \\(H_{\\hat{\\theta}} + \\lambda \\mathbb{I}\\).

    "},{"location":"examples/influence_synthetic/","title":"For mislabeled data","text":"
    %load_ext autoreload\n
    %autoreload\n%matplotlib inline\nimport os\nimport random\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\nimport matplotlib.pyplot as plt\nfrom pydvl.influence import compute_influences, TorchTwiceDifferentiable\nfrom support.shapley import (\nsynthetic_classification_dataset,\ndecision_boundary_fixed_variance_2d,\n)\nfrom support.common import (\nplot_gaussian_blobs,\nplot_losses,\nplot_influences,\n)\nfrom support.torch import (\nfit_torch_model,\nTorchLogisticRegression,\n)\nfrom sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\nfrom torch.optim import AdamW, lr_scheduler\nfrom torch.utils.data import DataLoader\n
    \n/Users/fabio/miniconda3/envs/pydvl_env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n\n
    plt.rcParams[\"figure.figsize\"] = (16, 8)\nplt.rcParams[\"font.size\"] = 12\nplt.rcParams[\"xtick.labelsize\"] = 12\nplt.rcParams[\"ytick.labelsize\"] = 10\n
    random_state = 24\nis_CI = os.environ.get(\"CI\")\n
    num_samples = 10000\nnum_features = 2\nsigma = 0.2\nmeans = np.asarray([[0.0, 0.0], [1.0, 1.0]])\n
    random.seed(random_state)\nnp.random.seed(random_state)\n

    The following code snippet generates the aforementioned dataset.

    train_data, val_data, test_data = synthetic_classification_dataset(\nmeans, sigma, num_samples, train_size=0.7, test_size=0.2\n)\n# In CI we only use a subset of the training set\nif is_CI:\ntrain_data = (train_data[0][:10], train_data[1][:10])\n

    Given the simplicity of the dataset, we can calculate exactly the optimal decision boundary(that which maximizes our accuracy). The following code maps a continuous line of z values to a 2-dimensional vector in feature space (More details are in the appendix to this notebook.)

    decision_boundary_fn = decision_boundary_fixed_variance_2d(means[0], means[1])\ndecision_boundary = decision_boundary_fn(np.linspace(-1.5, 1.5, 100))\n
    plot_gaussian_blobs(\ntrain_data,\ntest_data,\nxlabel=\"$x_0$\",\nylabel=\"$x_1$\",\nlegend_title=\"$y - labels$\",\nline=decision_boundary,\ns=10,\nsuptitle=\"Plot of train-test data\",\n)\n

    Note that there are samples which go across the optimal decision boundary and will be wrongly labelled. The optimal decision boundary can not discriminate these as the mislabelling is a consequence of the presence of random noise.

    model = TorchLogisticRegression(num_features)\ndevice = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\nnum_epochs = 50\nlr = 0.05\nweight_decay = 0.05\nbatch_size = 256\ntrain_data_loader = DataLoader(\nlist(zip(train_data[0], train_data[1].astype(float))),\nbatch_size=batch_size,\nshuffle=True,\n)\nval_data_loader = DataLoader(\nlist(zip(val_data[0], val_data[1].astype(float))),\nbatch_size=batch_size,\nshuffle=True,\n)\noptimizer = AdamW(params=model.parameters(), lr=lr, weight_decay=weight_decay)\nscheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)\nlosses = fit_torch_model(\nmodel=model,\ntraining_data=train_data_loader,\nval_data=val_data_loader,\nloss=F.binary_cross_entropy,\noptimizer=optimizer,\nscheduler=scheduler,\nnum_epochs=num_epochs,\n)\n
    \nModel fitting:   0%|          | 0/50 [00:00<?, ?it/s]Model fitting: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 50/50 [00:02<00:00, 19.41it/s]\n\n

    And let's check that the model is not overfitting

    plot_losses(losses)\n

    A look at the confusion matrix also shows good results

    model.eval()\npred_probabilities = model(test_data[0]).detach()\npred_y_test = [1 if prob &gt; 0.5 else 0 for prob in pred_probabilities]\ncm = confusion_matrix(test_data[1], pred_y_test)\ndisp = ConfusionMatrixDisplay(confusion_matrix=cm)\ndisp.plot();\n

    It is important that the model converges to a point near the optimum, since the influence values assume that we are at a minimum (or close) in the loss landscape. The function

    \\[I(x_1, y_1, x_2, y_2) \\colon \\mathbb{R}^d \\times \\mathbb{R}^d \\to \\mathbb{R}\\]

    measures the influence of the data point \\(x_1\\) onto \\(x_2\\) conditioned on the training targets \\(y_1\\) and \\(y_2\\) trough some model parameters \\(\\theta\\). If the loss function L is differentiable, we can take \\(I\\) to be

    $$ I(x_1, x_2) = \\nabla_\\theta\\; L(x_1, y_1) ^\\mathsf{T} \\; H_\\theta^{-1} \\; \\nabla_\\theta \\; L(x_2, y_2) $$ See \"Understanding Black-box Predictions via Influence Functions\" for a detailed derivation of this formula

    Let's take a subset of the training data points, which we will calculate the influence values of.

    x = train_data[0][:100]\ny = train_data[1][:100]\n

    In pyDVL, the influence of the training points on the test points can be calculated with the following

    train_data_loader = DataLoader(list(zip(x, y.astype(float))), batch_size=batch_size)\ntest_data_loader = DataLoader(\nlist(zip(test_data[0], test_data[1].astype(float))), batch_size=batch_size\n)\ninfluence_values = compute_influences(\ndifferentiable_model=TorchTwiceDifferentiable(model, F.binary_cross_entropy),\ntraining_data=train_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"up\",\ninversion_method=\"direct\",  # use 'cg' for big models\n)\n

    The above explicitly constructs the Hessian. This can often be computationally expensive and conjugate gradient approximate calculation should be used for bigger models.

    With the influence type 'up', training influences have shape [NxM] where N is the number of test samples and M is the number of training samples. They therefore associate to each training sample its influence on each test sample. Influence type 'perturbation', instead, return an array of shape [NxMxF], where F is the number of features in input, ie. the length of x.

    In our case, in order to have a value of the total average influence of a point we can just average across training samples.

    mean_train_influences = np.mean(influence_values.numpy(), axis=0)\n

    Let's plot the results (adjust colorbar_limits for better color gradient)

    plot_influences(\nx,\nmean_train_influences,\nline=decision_boundary,\nxlabel=\"$x_0$\",\nylabel=\"$x_1$\",\nsuptitle=\"Influences of input points\",\nlegend_title=\"influence values\",\n# colorbar_limits=(-0.3,),\n);\n

    We can see that, as we approach the separation line, the influences tend to move away from zero, i.e. the points become more decisive for model training, some in a positive way, some negative.

    As a further test, let's introduce some labelling errors into \\(y\\) and see how the distribution of the influences changes. Let's flip the first 10 labels and calculate influences

    y_corrupted = np.copy(y)\ny_corrupted[:10] = [1 - yi for yi in y[:10]]\ntrain_corrupted_data_loader = DataLoader(\nlist(zip(x, y_corrupted.astype(float))), batch_size=batch_size\n)\ninfluence_values = compute_influences(\ndifferentiable_model=TorchTwiceDifferentiable(model, F.binary_cross_entropy),\ntraining_data=train_corrupted_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"up\",\ninversion_method=\"direct\",\n)\nmean_train_influences = np.mean(influence_values.numpy(), axis=0)\n
    print(\"Average mislabelled data influence:\", np.mean(mean_train_influences[:10]))\nprint(\"Average correct data influence:\", np.mean(mean_train_influences[10:]))\n
    \nAverage mislabelled data influence: -0.8225848370029777\nAverage correct data influence: 0.011277048916970962\n\n
    plot_influences(\nx,\nmean_train_influences,\ncorrupted_indices=np.array(range(10)),\nline=decision_boundary,\nxlabel=\"$x_0$\",\nylabel=\"$x_1$\",\nsuptitle=\"Influences of input points with corrupted data\",\nlegend_title=\"influence values\",\n# colorbar_limits=(-0.3,),\n);\n

    Red circles indicate the points which have been corrupted. We can see that the mislabelled data have a more negative average influence on the model, especially those that are farther away from the decision boundary.

    The \"direct\" method that we have used above involves the inversion of the Hessian matrix of the model. If a model has \\(n\\) training points and \\(\\theta \\in \\mathbb{R}^p\\) parameters, this requires \\(O(n \\ p^2 + p^3)\\) operations, which for larger models, like neural networks, becomes quickly unfeasible. Conjugate gradient avoids the explicit computation of the Hessian via a technique called implicit Hessian-vector products (HVPs), which typically takes \\(O(n \\ p)\\) operations.

    In the next cell we will use conjugate gradient to compute the influence factors. Since logistic regression is a very simple model, \"cg\" actually slows computation with respect to the direct method, which in this case is a much better choice. Nevertheless, we are able to verify that the influences calculated with \"cg\" are the same (to a minor error) as those calculated directly.

    influence_values = compute_influences(\ndifferentiable_model=TorchTwiceDifferentiable(model, F.binary_cross_entropy),\ntraining_data=train_corrupted_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"up\",\ninversion_method=\"cg\",\nprogress=True,\n)\nmean_train_influences = np.mean(influence_values.numpy(), axis=0)\nprint(\"Average mislabelled data influence:\", np.mean(mean_train_influences[:10]))\nprint(\"Average correct data influence:\", np.mean(mean_train_influences[10:]))\n
    \nBatch Test Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 8/8 [00:00<00:00, 17.89it/s]\nBatch Train Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 308.47it/s]\nConjugate gradient: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [00:16<00:00, 118.24it/s]\nBatch Split Input Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 44.89it/s]\n
    \nAverage mislabelled data influence: -0.82248804123547\nAverage correct data influence: 0.01127580743952819\n\n
    \n\n\n

    Averages are very similar to the ones calculated through direct method. Same is true for the plot

    plot_influences(\nx,\nmean_train_influences,\ncorrupted_indices=np.array(range(10)),\nline=decision_boundary,\nxlabel=\"$x_0$\",\nylabel=\"$x_1$\",\nsuptitle=\"Influences of input points with corrupted data\",\nlegend_title=\"influence values\",\n# colorbar_limits=(-0.1, 0.1),\n);\n
    "},{"location":"examples/influence_synthetic/#influence-functions-for-data-mislabeling","title":"Influence functions for data mislabeling","text":"

    In this notebook, we will take a closer look at the theory of influence functions with the help of a synthetic dataset. Data mislabeling occurs whenever some examples from a usually big dataset are wrongly-labeled. In real-life this happens fairly often, e.g. as a consequence of human error, or noise in the data.

    Let's consider a classification problem with the following notation:

    \\[ \\begin{align*} x_i &\\in \\mathbb{R}^d \\\\ y_i &\\in \\{0, 1\\} \\\\ \\forall i &\\in [ N ] \\end{align*} \\]

    In other words, we have a dataset containing \\(N\\) samples, each with label 1 or 0. As typical example you can think of y indicating whether a patient has a disease based on some feature representation \\(x\\).

    Let's now introduce a toy model that will help us delve into the theory and practical utility of influence functions. We will assume that \\(y\\) is a Bernoulli binary random variable while the input \\(x\\) is d-dimensional Gaussian distribution which depends on the label \\(y\\). More precisely:

    \\[ y_i \\sim \\text{Ber}\\left (0.5 \\right) \\\\ x_i \\sim \\mathcal{N}\\left ((1 - y_i) \\mu_1 + y_i \\mu_2, \\sigma^2 I \\right), \\]

    with fixed means and diagonal covariance. Implementing the sampling scheme in python is straightforward and can be achieved by first sampling \\(y\\) and afterward \\(x\\).

    "},{"location":"examples/influence_synthetic/#imports","title":"Imports","text":""},{"location":"examples/influence_synthetic/#constants","title":"Constants","text":""},{"location":"examples/influence_synthetic/#dataset","title":"Dataset","text":""},{"location":"examples/influence_synthetic/#plotting-the-dataset","title":"Plotting the dataset","text":"

    Let's plot the dataset is plotted with their respective labels and the optimal decision line

    "},{"location":"examples/influence_synthetic/#training-the-model","title":"Training the model","text":"

    We will now train a logistic regression model on the training data. This can be done with the following

    "},{"location":"examples/influence_synthetic/#calculating-influences","title":"Calculating influences","text":""},{"location":"examples/influence_synthetic/#inversion-through-conjugate-gradient","title":"Inversion through conjugate gradient","text":""},{"location":"examples/influence_synthetic/#appendix-calculating-the-decision-boundary","title":"Appendix: Calculating the decision boundary","text":"

    For obtaining the optimal discriminator one has to solve the equation

    \\[p(x|y=0)=p(x|y=1)\\]

    and determine the solution set \\(X\\). Let's take the following probabilities

    \\[ \\begin{align*} p(x|y=0)&=\\mathcal{N}\\left (\\mu_1, \\sigma^2 I \\right) \\\\ p(x|y=1)&=\\mathcal{N}\\left (\\mu_2, \\sigma^2 I \\right) \\end{align*} \\]

    For a single fixed diagonal variance parameterized by \\(\\sigma\\), the optimal discriminator lays at points which are equidistant from the means of the two distributions, i.e.

    \\[ \\begin{align*} \\| x - \\mu_1 \\|^2 &= \\| x - \\mu_2 \\|^2 \\\\ \\| \\mu_1 \\|^2 -2 x^\\mathsf{T} \\mu_1 &= \\| \\mu_2 \\|^2 -2 x^\\mathsf{T} \\mu_2 \\\\ \\implies 0 &= 2 (\\mu_2 - \\mu_1)^\\mathsf{T} x + \\| \\mu_1 \\|^2 - \\| \\mu_2 \\|^2 \\\\ 0 &= \\mu_1^\\mathsf{T}x - \\mu_2^\\mathsf{T}x - \\frac{1}{2} \\mu_1^\\mathsf{T} \\mu_1 + \\frac{1}{2} \\mu_2^\\mathsf{T} \\mu_2 \\end{align*} \\]

    This is just the implicit description of the line. Solving for the explicit form can be achieved by enforcing a functional form \\(f(z) = x = a z + b\\) with \\(z \\in \\mathbb{R}\\) onto \\(x\\). After the term is inserted in the previous equation

    \\[ 0 = (\\mu_2 - \\mu_1)^\\mathsf{T} (az + b) + \\frac{1}{2} \\| \\mu_1 \\|^2 - \\| \\mu_2 \\|^2 \\]

    We can write \\(a\\) since, by symmetry, it is expected to be explicitly orthogonal to \\(\\mu_2 - \\mu_1\\). Then, solving for \\(b\\), the solution can be found to be

    \\[ f(z) = \\underbrace{\\begin{bmatrix} 0 & 1 \\\\ -1 & 0 \\end{bmatrix} (\\mu_2 - \\mu_1)}_a z + \\underbrace{\\frac{\\mu_1 + \\mu_2}{2}}_b \\]"},{"location":"examples/influence_wine/","title":"For outlier detection","text":"

    Let's start by loading the imports, the dataset and splitting it into train, validation and test sets. We will use a large test set to have a less noisy estimate of the average influence.

    %load_ext autoreload\n
    %autoreload\n%matplotlib inline\nimport os\nimport random\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\nfrom support.common import plot_losses\nfrom support.torch import TorchMLP, fit_torch_model\nfrom pydvl.influence import compute_influences, TorchTwiceDifferentiable\nfrom support.shapley import load_wine_dataset\nfrom sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score\nfrom torch.optim import Adam, lr_scheduler\nfrom torch.utils.data import DataLoader, TensorDataset\n
    plt.rcParams[\"figure.figsize\"] = (16, 8)\nplt.rcParams[\"font.size\"] = 12\nplt.rcParams[\"xtick.labelsize\"] = 12\nplt.rcParams[\"ytick.labelsize\"] = 10\n
    random_state = 24\nis_CI = os.environ.get(\"CI\")\n
    random.seed(random_state)\nnp.random.seed(random_state)\n
    training_data, val_data, test_data, feature_names = load_wine_dataset(\ntrain_size=0.3, test_size=0.6\n)\n# In CI we only use a subset of the training set\nif is_CI:\ntrain_data = (training_data[0][:10], training_data[1][:10])\n

    We will corrupt some of the training points by flipping their labels

    num_corrupted_idxs = 10\ntraining_data[1][:num_corrupted_idxs] = torch.tensor(\n[(val + 1) % 3 for val in training_data[1][:num_corrupted_idxs]]\n)\n

    and let's wrap it in a pytorch data loader

    training_data_loader = DataLoader(\nTensorDataset(*training_data), batch_size=32, shuffle=False\n)\nval_data_loader = DataLoader(TensorDataset(*val_data), batch_size=32, shuffle=False)\ntest_data_loader = DataLoader(TensorDataset(*test_data), batch_size=32, shuffle=False)\n
    feature_dimension = 13\nnum_classes = 3\nnetwork_size = [16, 16]\nlayers_size = [feature_dimension, *network_size, num_classes]\nnum_epochs = 300\nlr = 0.005\nweight_decay = 0.01\ndevice = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\nnn_model = TorchMLP(layers_size)\nnn_model.to(device)\noptimizer = Adam(params=nn_model.parameters(), lr=lr, weight_decay=weight_decay)\nscheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)\nlosses = fit_torch_model(\nmodel=nn_model,\ntraining_data=training_data_loader,\nval_data=val_data_loader,\nloss=F.cross_entropy,\noptimizer=optimizer,\nscheduler=scheduler,\nnum_epochs=num_epochs,\n)\n
    \nModel fitting: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 300/300 [00:00<00:00, 307.77it/s]\n\n

    Let's check that the training has found a stable minimum by plotting the training and validation loss

    plot_losses(losses)\n

    Since it is a classification problem, let's also take a look at the confusion matrix on the test set

    nn_model.eval()\npred_y_test = np.argmax(nn_model(test_data[0]).detach(), axis=1)\ncm = confusion_matrix(test_data[1], pred_y_test)\ndisp = ConfusionMatrixDisplay(confusion_matrix=cm)\ndisp.plot();\n

    And let's compute the f1 score of the model

    f1_score(test_data[1], pred_y_test, average=\"weighted\")\n
    \n0.9906846833902615\n

    Let's now move to calculating influences of each point on the total score.

    train_influences = compute_influences(\nTorchTwiceDifferentiable(nn_model, F.cross_entropy),\ntraining_data=training_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"up\",\ninversion_method=\"direct\",\nhessian_regularization=0.1,\nprogress=True,\n)\n
    \nBatch Test Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4/4 [00:00<00:00, 67.10it/s]\nMVP: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 547/547 [00:00<00:00, 742.01it/s] \nBatch Split Input Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2/2 [00:00<00:00, 85.02it/s]\n\n

    the returned matrix, train_influences, has a quantity of columns equal to the points in the training set, and a number of rows equal to the points in the test set. At each element \\(a_{i,j}\\) it stores the influence that training point \\(j\\) has on the classification of test point \\(i\\).

    If we take the average across every column of the influences matrix, we obtain an estimate of the overall influence of a training point on the total accuracy of the network.

    mean_train_influences = np.mean(train_influences.numpy(), axis=0)\n

    The following histogram shows that there are big differences in score within the training set (notice the log-scale on the y axis).

    _, ax = plt.subplots()\nax.hist(mean_train_influences[num_corrupted_idxs:], label=\"normal\")\nax.hist(mean_train_influences[:num_corrupted_idxs], label=\"corrupted\", bins=5)\nax.set_title(\"Influece scores distribution\")\nax.set_xlabel(\"influece score\")\nax.set_ylabel(\"number of points\")\nax.legend()\nplt.show()\n

    We can see that the corrupted points tend to have a negative effect on the model, as expected

    print(\n\"Average influence of corrupted points: \",\nnp.mean(mean_train_influences[:num_corrupted_idxs]),\n)\nprint(\n\"Average influence of other points: \",\nnp.mean(mean_train_influences[num_corrupted_idxs:]),\n)\n
    \nAverage influence of corrupted points:  -0.05317057\nAverage influence of other points:  0.034408495\n\n

    We have seen how to calculate the influence of single training points on each test point using influence_type 'up'. Using influence_type 'perturbation' we can also calculate the influence of the input features of each point. In the next cell we will calculate the average influence of each feature on training and test points, and ultimately assess which are the most relevant to model performance.

    feature_influences = compute_influences(\nTorchTwiceDifferentiable(nn_model, F.cross_entropy),\ntraining_data=training_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"perturbation\",\ninversion_method=\"direct\",\nhessian_regularization=1,\nprogress=True,\n)\n
    \nBatch Test Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4/4 [00:00<00:00, 61.20it/s]\nMVP: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 547/547 [00:00<00:00, 1265.72it/s]\nBatch Influence Perturbation: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2/2 [00:03<00:00,  1.66s/it]\n\n
    mean_feature_influences = np.mean(feature_influences.numpy(), axis=(0, 1))\n_, ax = plt.subplots()\nax.bar(feature_names, mean_feature_influences)\nax.set_xlabel(\"training features\")\nax.set_ylabel(\"influence values\")\nax.set_title(\"Average feature influence\")\nplt.xticks(rotation=60)\nplt.show()\n

    The calculation of the Hessian matrix (necessary to calculate the influences) can be quite numerically challenging, but there are some techniques to speed up its calculation. PyDVL allows to use the full method (\"direct\") or the conjugate gradient method (\"cg\"). The first one should be used only for very small networks (like our current example), while for bigger ones \"cg\" is advisable.

    cg_train_influences = compute_influences(\nTorchTwiceDifferentiable(nn_model, F.cross_entropy),\ntraining_data=training_data_loader,\ntest_data=test_data_loader,\ninfluence_type=\"up\",\ninversion_method=\"cg\",\nhessian_regularization=0.1,\nprogress=True,\n)\nmean_cg_train_influences = np.mean(cg_train_influences.numpy(), axis=0)\n
    \nBatch Test Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4/4 [00:00<00:00, 81.02it/s]\nBatch Train Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2/2 [00:00<00:00, 535.33it/s]\nConjugate gradient: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 107/107 [00:04<00:00, 22.66it/s]\nBatch Split Input Gradients: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2/2 [00:00<00:00, 98.91it/s]\n\n

    Let's compare the results obtained through conjugate gradient with those from the direct method

    print(\nf\"Percentage error of cg over direct method:{np.mean(np.abs(mean_cg_train_influences - mean_train_influences)/np.abs(mean_train_influences))*100} %\"\n)\n
    \nPercentage error of cg over direct method:1.5124550145628746e-05 %\n\n

    This was a quick introduction to the pyDVL interface for influence functions. Despite their speed and simplicity, influence functions are known to be a very noisy estimator of data quality, as pointed out in the paper \"Influence functions in deep learning are fragile\". The size of the network, the weight decay, the inversion method used for calculating influences, the size of the test set: they all add up to the total amount of noise. Experiments may therefore give quantitative and qualitatively different results if not averaged across several realisations. Shapley values, on the contrary, have shown to be a more robust, but this comes at the cost of high computational requirements. PyDVL employs several parallelization and caching techniques to optimize such calculations.

    "},{"location":"examples/influence_wine/#influence-functions-for-outlier-detection","title":"Influence functions for outlier detection","text":"

    This notebook shows how to calculate influences on a NN model using pyDVL for an arbitrary dataset, and how this can be used to find anomalous or corrupted data points.

    It uses the wine dataset from sklearn: given a set of 13 different input parameters regarding a particular bottle, each related to some physical property (e.g. concentration of magnesium, malic acidity, alcoholic percentage, etc.), the model will need to predict to which of 3 classes the wine belongs to. For more details, please refer to the sklearn documentation.

    "},{"location":"examples/influence_wine/#imports","title":"Imports","text":""},{"location":"examples/influence_wine/#constants","title":"Constants","text":""},{"location":"examples/influence_wine/#dataset","title":"Dataset","text":""},{"location":"examples/influence_wine/#fit-a-neural-network-to-the-data","title":"Fit a neural network to the data","text":"

    We will train a 2-layer neural network. PyDVL has some convenience wrappers to initialize a pytorch NN. If you already have a model loaded and trained, you can skip this section.

    "},{"location":"examples/influence_wine/#calculating-influences-for-small-neural-networks","title":"Calculating influences for small neural networks","text":"

    The following cell calculates the influences of each training data point on the neural network. Neural networks have typically a very bumpy parameter space, which, during training, is explored until the configuration that minimises the loss is found. There is an important assumption in influence functions that the model lays at a (at least local) minimum of such loss, and if this is not fulfilled many issues can arise. In order to avoid this scenario, a regularisation term should be used whenever dealing with big and noisy models.

    "},{"location":"examples/influence_wine/#influence-of-training-features","title":"Influence of training features","text":""},{"location":"examples/influence_wine/#speeding-up-influences-for-big-models","title":"Speeding up influences for big models","text":""},{"location":"examples/least_core_basic/","title":"Least Core","text":"

    We will be using the following functions and classes from pyDVL.

    %autoreload\nfrom pydvl.utils import (\nDataset,\nUtility,\n)\nfrom pydvl.value import compute_least_core_values, LeastCoreMode, ValuationResult\nfrom pydvl.reporting.plots import shaded_mean_std\nfrom pydvl.reporting.scores import compute_removal_score\n
    X, y = make_classification(\nn_samples=200,\nn_features=50,\nn_informative=25,\nn_classes=3,\nrandom_state=random_state,\n)\n
    full_dataset = Dataset.from_arrays(\nX, y, stratify_by_target=True, random_state=random_state\n)\nsmall_dataset = Dataset.from_arrays(\nX,\ny,\nstratify_by_target=True,\ntrain_size=10,\nrandom_state=random_state,\n)\n
    model = LogisticRegression(max_iter=500, solver=\"liblinear\")\n
    model.fit(full_dataset.x_train, full_dataset.y_train)\nprint(\nf\"Training accuracy: {100 * model.score(full_dataset.x_train, full_dataset.y_train):0.2f}%\"\n)\nprint(\nf\"Testing accuracy: {100 * model.score(full_dataset.x_test, full_dataset.y_test):0.2f}%\"\n)\n
    \nTraining accuracy: 86.25%\nTesting accuracy: 70.00%\n\n
    model.fit(small_dataset.x_train, small_dataset.y_train)\nprint(\nf\"Training accuracy: {100 * model.score(small_dataset.x_train, small_dataset.y_train):0.2f}%\"\n)\nprint(\nf\"Testing accuracy: {100 * model.score(small_dataset.x_test, small_dataset.y_test):0.2f}%\"\n)\n
    \nTraining accuracy: 100.00%\nTesting accuracy: 47.89%\n\n
    utility = Utility(model=model, data=small_dataset)\n
    exact_values = compute_least_core_values(\nu=utility,\nmode=LeastCoreMode.Exact,\nprogress=True,\n)\n
    \n  0%|          | 0/1023 [00:00<?, ?it/s]\n
    exact_values_df = exact_values.to_dataframe(column=\"exact_value\").T\nexact_values_df = exact_values_df[sorted(exact_values_df.columns)]\n
    budget_array = np.linspace(200, 2 ** len(small_dataset), num=10, dtype=int)\nall_estimated_values_df = []\nall_errors = {budget: [] for budget in budget_array}\nfor budget in tqdm(budget_array):\ndfs = []\nerrors = []\ncolumn_name = f\"estimated_value_{budget}\"\nfor i in range(20):\nvalues = compute_least_core_values(\nu=utility,\nmode=LeastCoreMode.MonteCarlo,\nn_iterations=budget,\nn_jobs=n_jobs,\n)\ndf = (\nvalues.to_dataframe(column=column_name)\n.drop(columns=[f\"{column_name}_stderr\"])\n.T\n)\ndf = df[sorted(df.columns)]\nerror = mean_squared_error(\nexact_values_df.loc[\"exact_value\"].values, df.values.ravel()\n)\nall_errors[budget].append(error)\ndf[\"budget\"] = budget\ndfs.append(df)\nestimated_values_df = pd.concat(dfs)\nall_estimated_values_df.append(estimated_values_df)\nvalues_df = pd.concat(all_estimated_values_df)\nerrors_df = pd.DataFrame(all_errors)\n
    \n  0%|          | 0/10 [00:00<?, ?it/s]\n

    We can see that the approximation error decreases, on average, as the we increase the budget.

    Still, the decrease may not always necessarily happen when we increase the number of iterations because of the fact that we sample the subsets with replacement in the Monte Carlo method i.e there may be repeated subsets.

    utility = Utility(model=model, data=full_dataset)\n
    method_names = [\"Random\", \"Least Core\"]\nremoval_percentages = np.arange(0, 0.41, 0.05)\n
    all_scores = []\nfor i in trange(5):\nfor method_name in method_names:\nif method_name == \"Random\":\nvalues = ValuationResult.from_random(size=len(utility.data))\nelse:\nvalues = compute_least_core_values(\nu=utility,\nmode=LeastCoreMode.MonteCarlo,\nn_iterations=25000,\nn_jobs=n_jobs,\n)\nscores = compute_removal_score(\nu=utility,\nvalues=values,\npercentages=removal_percentages,\nremove_best=True,\n)\nscores[\"method_name\"] = method_name\nall_scores.append(scores)\nscores_df = pd.DataFrame(all_scores)\n
    \n  0%|          | 0/5 [00:00<?, ?it/s]\n

    We can clearly see that removing the most valuable data points, as given by the Least Core method, leads to, on average, a decrease in the model's performance and that the method outperforms random removal of data points.

    all_scores = []\nfor i in trange(5):\nfor method_name in method_names:\nif method_name == \"Random\":\nvalues = ValuationResult.from_random(size=len(utility.data))\nelse:\nvalues = compute_least_core_values(\nu=utility,\nmode=LeastCoreMode.MonteCarlo,\nn_iterations=25000,\nn_jobs=n_jobs,\n)\nscores = compute_removal_score(\nu=utility,\nvalues=values,\npercentages=removal_percentages,\n)\nscores[\"method_name\"] = method_name\nall_scores.append(scores)\nscores_df = pd.DataFrame(all_scores)\n
    \n  0%|          | 0/5 [00:00<?, ?it/s]\n

    We can clearly see that removing the least valuable data points, as given by the Least Core method, leads to, on average, an increase in the model's performance and that the method outperforms the random removal of data points.

    "},{"location":"examples/least_core_basic/#least-core-for-data-valuation","title":"Least Core for Data Valuation","text":"

    This notebook introduces Least Core methods for the computation of data values using pyDVL.

    Shapley values define a fair way of distributing the worth of the whole training set when every data point is part of it. But they do not consider the question of stability of subsets: Could some data points obtain a higher payoff if they formed smaller subsets? It is argued that this might be relevant if data providers are paid based on data value, since Shapley values can incentivise them not to contribute their data to the \"grand coalition\", but instead try to form smaller ones. Whether this is of actual practical relevance is debatable, but in any case, the least core is an alternative tool available for any task of Data Valuation

    The Core is another approach to compute data values originating in cooperative game theory that attempts to answer those questions. It is the set of feasible payoffs that cannot be improved upon by a coalition of the participants.

    Its use for Data Valuation was first described in the paper If You Like Shapley Then You\u2019ll Love the Core by Tom Yan and Ariel D. Procaccia.

    The Least Core value \\(v\\) of the \\(i\\)-th sample in dataset \\(D\\) wrt. utility \\(u\\) is computed by solving the following Linear Program:

    \\[ \\begin{array}{lll} \\text{minimize} & \\displaystyle{e} & \\\\ \\text{subject to} & \\displaystyle\\sum_{x_i\\in D} v_u(x_i) = u(D) & \\\\ & \\displaystyle\\sum_{x_i\\in S} v_u(x_i) + e \\geq u(S) &, \\forall S \\subset D, S \\neq \\emptyset \\\\ \\end{array} \\]

    To illustrate this method we will use a synthetic dataset. We will first use a subset of 10 data point to compute the exact values and use them to assess the Monte Carlo approximation. Afterwards, we will conduct the data removal experiments as described by Ghorbani and Zou in their paper Data Shapley: Equitable Valuation of Data for Machine Learning: We compute the data valuation given different computation budgets and incrementally remove a percentage of the best, respectively worst, data points and observe how that affects the utility.

    "},{"location":"examples/least_core_basic/#setup","title":"Setup","text":"

    We begin by importing the main libraries and setting some defaults.

    If you are reading this in the documentation, some boilerplate (including most plotting code) has been omitted for convenience."},{"location":"examples/least_core_basic/#dataset","title":"Dataset","text":"

    We generate a synthetic dataset using the make_classification function from scikit-learn.

    We sample 200 data points from a 50-dimensional Gaussian distribution with 25 informative features and 25 non-informative features (generated as random linear combinations of the informative features).

    The 200 samples are uniformly distributed across 3 classes with a small percentage of noise added to the labels to make the task a bit more difficult.

    "},{"location":"examples/least_core_basic/#estimating-least-core-values","title":"Estimating Least Core Values","text":"

    In this first section we will use a smaller subset of the dataset containing 10 samples in order to be able to compute exact values in a reasonable amount of time. Afterwards, we will use the Monte Carlo method with a limited budget (maximum number of subsets) to approximate these values.

    "},{"location":"examples/least_core_basic/#data-removal","title":"Data Removal","text":"

    We now move on to the data removal experiments using the full dataset.

    In these experiments, we first rank the data points from most valuable to least valuable using the values estimated by the Monte Carlo Least Core method. Then, we gradually remove from 5 to 40 percent, by increments of 5 percentage points, of the most valuable/least valuable ones, train the model on this subset and compute its accuracy.

    "},{"location":"examples/least_core_basic/#remove-best","title":"Remove Best","text":"

    We start by removing the best data points and seeing how the model's accuracy evolves.

    "},{"location":"examples/least_core_basic/#remove-worst","title":"Remove Worst","text":"

    We then proceed to removing the worst data points and seeing how the model's accuracy evolves.

    "},{"location":"examples/shapley_basic_spotify/","title":"Shapley values","text":"

    This notebook introduces Shapley methods for the computation of data value using pyDVL.

    In order to illustrate the practical advantages, we will predict the popularity of songs in the dataset Top Hits Spotify from 2000-2019, and highlight how data valuation can help investigate and boost the performance of the models. In doing so, we will describe the basic usage patterns of pyDVL.

    Recall that data value is a function of three things:

    1. The dataset.
    2. The model.
    3. The performance metric or scoring function.

    Below we will describe how to instantiate each one of these objects and how to use them for data valuation. Please also see the documentation on data valuation.

    We will be using the following functions from pyDVL. The main entry point is the function compute_shapley_values(), which provides a facade to all Shapley methods. In order to use it we need the classes Dataset, Utility and Scorer.

    %autoreload\nfrom pydvl.reporting.plots import plot_shapley\nfrom pydvl.utils.dataset import GroupedDataset\nfrom support.shapley import load_spotify_dataset\nfrom pydvl.value import *\n
    training_data, val_data, test_data = load_spotify_dataset(\nval_size=0.3, test_size=0.3, target_column=\"popularity\", random_state=random_state\n)\n
    training_data[0].head()\n
    artist song duration_ms explicit year danceability energy key loudness mode speechiness acousticness instrumentalness liveness valence tempo genre 1561 Fetty Wap 679 (feat. Remy Boyz) 196693 True 2015 0.618 0.717 7 -5.738 1 0.3180 0.00256 0.000000 0.6250 0.603 190.050 8 1410 Meghan Trainor All About That Bass 187920 True 2015 0.807 0.887 9 -3.726 1 0.0503 0.05730 0.000003 0.1240 0.961 134.052 14 1772 Katy Perry Chained To The Rhythm 237733 False 2017 0.562 0.800 0 -5.404 1 0.1120 0.08140 0.000000 0.1990 0.471 95.029 14 1670 Sigala Sweet Lovin' - Radio Edit 202149 False 2015 0.683 0.910 10 -1.231 1 0.0515 0.05530 0.000005 0.3360 0.674 124.977 15 1780 Liam Payne Strip That Down 204502 False 2017 0.869 0.485 6 -5.595 1 0.0545 0.24600 0.000000 0.0765 0.527 106.028 14

    The dataset has many high-level features, some quite intuitive ('duration_ms' or 'tempo'), while others are a bit more cryptic ('valence'?). For information on each feature, please consult the dataset's website.

    In our analysis, we will use all the columns, except for 'artist' and 'song', to predict the 'popularity' of each song. We will nonetheless keep the information on song and artist in a separate object for future reference.

    song_name = training_data[0][\"song\"]\nartist = training_data[0][\"artist\"]\ntraining_data[0] = training_data[0].drop([\"song\", \"artist\"], axis=1)\ntest_data[0] = test_data[0].drop([\"song\", \"artist\"], axis=1)\nval_data[0] = val_data[0].drop([\"song\", \"artist\"], axis=1)\n

    Input and label data are then used to instantiate a Dataset object:

    dataset = Dataset(*training_data, *val_data)\n

    The calculation of exact Shapley values is computationally very expensive (exponentially so!) because it requires training the model on every possible subset of the training set. For this reason, PyDVL implements techniques to speed up the calculation, such as Monte Carlo approximations, surrogate models or caching of intermediate results and grouping of data to calculate group Shapley values instead of single data points.

    In our case, we will group songs by artist and calculate the Shapley value for the artists. Given the pandas Series for 'artist', to group the dataset by it, one does the following:

    grouped_dataset = GroupedDataset.from_dataset(dataset=dataset, data_groups=artist)\n
    utility = Utility(\nmodel=GradientBoostingRegressor(n_estimators=3),\ndata=grouped_dataset,\nscorer=Scorer(\"neg_mean_absolute_error\", default=0.0),\n)\nvalues = compute_shapley_values(\nutility,\nmode=ShapleyMode.TruncatedMontecarlo,\n# Stop if the standard error is below 1% of the range of the values (which is ~2),\n# or if the number of updates exceeds 1000\ndone=AbsoluteStandardError(threshold=0.2, fraction=0.9) | MaxUpdates(1000),\ntruncation=RelativeTruncation(utility, rtol=0.01),\nn_jobs=-1,\n)\nvalues.sort(key=\"value\")\ndf = values.to_dataframe(column=\"data_value\", use_names=True)\n

    The function compute_shapley_values() serves as a common access point to all Shapley methods. For most of them, we must choose a StoppingCriterion with the argument done=. In this case we choose to stop when the ratio of standard error to value is below 0.2 for at least 90% of the training points, or if the number of updates of any index exceeds 1000. The mode argument specifies the Shapley method to use. In this case, we use the Truncated Monte Carlo approximation, which is the fastest of the Monte Carlo methods, owing both to using the permutation definition of Shapley values and the ability to truncate the iteration over a given permutation. We configure this to happen when the contribution of the remaining elements is below 1% of the total utility with the parameter truncation= and the policy RelativeTruncation.

    Let's take a look at the returned dataframe:

    df.head()\n
    data_value data_value_stderr Kendrick Lamar -1.279149 0.091670 BLACKPINK -1.277363 0.177476 Adele -1.241698 0.183732 5 Seconds of Summer -1.228002 0.103377 Flume -1.197065 0.102345

    The first thing to notice is that we sorted the results in ascending order of Shapley value. The index holds the labels for each data group: in this case, artist names. The column data_value is just that: the Shapley Data value, and data_value_stderr is its estimated standard error because we are using a Monte Carlo approximation.

    Let us plot the results. In the next cell we will take the 30 artists with the lowest score and plot their values with 95% Normal confidence intervals. Keep in mind that Monte Carlo Shapley is typically very noisy, and it can take many steps to arrive at a clean estimate.

    We can immediately see that many artists (groups of samples) have very low, even negative value, which means that they tend to decrease the total score of the model when present in the training set! What happens if we remove them?

    In the next cell we create a new training set excluding the artists with the lowest scores:

    low_dvl_artists = df.iloc[:30].index.to_list()\nartist_filter = ~artist.isin(low_dvl_artists)\nX_train_good_dvl = training_data[0][artist_filter]\ny_train_good_dvl = training_data[1][artist_filter]\n

    Now we will use this \"cleaned\" dataset to retrain the same model and compare its mean absolute error to the one trained on the full dataset. Notice that the score now is calculated using the test set, while in the calculation of the Shapley values we were using the validation set.

    model_good_data = GradientBoostingRegressor(n_estimators=3).fit(\nX_train_good_dvl, y_train_good_dvl\n)\nerror_good_data = mean_absolute_error(\nmodel_good_data.predict(test_data[0]), test_data[1]\n)\nmodel_all_data = GradientBoostingRegressor(n_estimators=3).fit(\ntraining_data[0], training_data[1]\n)\nerror_all_data = mean_absolute_error(model_all_data.predict(test_data[0]), test_data[1])\nprint(f\"Improvement: {100*(error_all_data - error_good_data)/error_all_data:02f}%\")\n
    \nImprovement: 13.940685%\n\n

    The score has improved by almost 14%! This is quite an important result, as it shows a consistent process to improve the performance of a model by excluding data points from its training set.

    One must however proceed with caution instead of simply throwing away data. For one, `mean_absolute_error` is an estimate of generalization error on unseen data, so the improvement we see on the test set might not be as large upon deployment. It would be advisable to cross-validate this whole process to obtain more conservative estimates. It is also advisable to manually inspect the artists with low value and to try to understand the reason why the model behaves like it does. Finally, remember that **the value depends on the model chosen**! Artists that are detrimental to the Gradient Boosting Regressor might be informative for a different model (although it is likely that the worst ones share some characteristic making them \"bad\" for other regressors).

    Let us take all the songs by Rihanna, set their score to 0 and re-calculate the Shapley values.

    y_train_anomalous = training_data[1].copy(deep=True)\ny_train_anomalous[artist == \"Rihanna\"] = 0\nanomalous_dataset = Dataset(\nx_train=training_data[0],\ny_train=y_train_anomalous,\nx_test=val_data[0],\ny_test=val_data[1],\n)\ngrouped_anomalous_dataset = GroupedDataset.from_dataset(anomalous_dataset, artist)\nanomalous_utility = Utility(\nmodel=GradientBoostingRegressor(n_estimators=3),\ndata=grouped_anomalous_dataset,\nscorer=Scorer(\"neg_mean_absolute_error\", default=0.0),\n)\nvalues = compute_shapley_values(\nanomalous_utility,\nmode=ShapleyMode.TruncatedMontecarlo,\ndone=AbsoluteStandardError(threshold=0.2, fraction=0.9) | MaxUpdates(1000),\nn_jobs=-1,\n)\nvalues.sort(key=\"value\")\ndf = values.to_dataframe(column=\"data_value\", use_names=True)\n

    Let us now consider the low-value artists (at least for predictive purposes, no claims are made about their artistic value!) and plot the results

    And Rihanna (our anomalous data group) has moved from top contributor to having negative impact on the performance of the model, as expected!

    What is going on? A popularity of 0 for Rihanna's songs is inconsistent with listening patterns for other artists. In artificially setting this, we degrade the predictive power of the model.

    By dropping low-value groups or samples, one can often increase model performance, but by inspecting them, it is possible to identify bogus data sources or acquisition methods.

    "},{"location":"examples/shapley_basic_spotify/#shapley-for-data-valuation","title":"Shapley for data valuation","text":""},{"location":"examples/shapley_basic_spotify/#setup","title":"Setup","text":"

    We begin by importing the main libraries and setting some defaults.

    If you are reading this in the documentation, some boilerplate (including most plotting code) has been omitted for convenience."},{"location":"examples/shapley_basic_spotify/#loading-and-grouping-the-dataset","title":"Loading and grouping the dataset","text":"

    pyDVL provides a support function for this notebook, load_spotify_dataset(), which downloads data on songs published after 2014, and splits 30% of data for testing, and 30% of the remaining data for validation. The return value is a triple of training, validation and test data as lists of the form [X_input, Y_label].

    "},{"location":"examples/shapley_basic_spotify/#creating-the-utility-and-computing-values","title":"Creating the utility and computing values","text":"

    Now we can calculate the contribution of each group to the model performance.

    As a model, we use scikit-learn's GradientBoostingRegressor, but pyDVL can work with any model from sklearn, xgboost or lightgbm. More precisely, any model that implements the protocol pydvl.utils.types.SupervisedModel, which is just the standard sklearn interface of fit(),predict() and score() can be used to construct the utility.

    The third and final component is the scoring function. It can be anything like accuracy or \\(R^2\\), and is set with a string from the standard sklearn scoring methods. Please refer to that documentation on information on how to define your own scoring function.

    We group dataset, model and scoring function into an instance of Utility.

    "},{"location":"examples/shapley_basic_spotify/#evaluation-on-anomalous-data","title":"Evaluation on anomalous data","text":"

    One interesting test is to corrupt some data and to monitor how their value changes. To do this, we will take one of the artists with the highest value and set the popularity of all their songs to 0.

    "},{"location":"examples/shapley_knn_flowers/","title":"KNN Shapley","text":"

    This notebook shows how to calculate Shapley values for the K-Nearest Neighbours algorithm. By making use of the local structure of KNN, it is possible to compute an exact value in almost linear time, as opposed to exponential complexity of exact, model-agnostic Shapley.

    The main idea is to exploit the fact that adding or removing points beyond the k-ball doesn't influence the score. Because the algorithm then essentially only needs to do a search it runs in \\(\\mathcal{O}(N \\log N)\\) time.

    By further using approximate nearest neighbours, it is possible to achieve \\((\\epsilon,\\delta)\\)-approximations in sublinear time. However, this is not implemented in pyDVL yet.

    We refer to the original paper that pyDVL implements for details: Jia, Ruoxi, David Dao, Boxin Wang, Frances Ann Hubis, Nezihe Merve Gurel, Bo Li, Ce Zhang, Costas Spanos, and Dawn Song. Efficient Task-Specific Data Valuation for Nearest Neighbor Algorithms. Proceedings of the VLDB Endowment 12, no. 11 (1 July 2019): 1610\u201323.

    The main entry point is the function compute_shapley_values(), which provides a facade to all Shapley methods. In order to use it we need the classes Dataset, Utility and Scorer, all of which can be imported from pydvl.value:

    from pydvl.value import *\n
    sklearn_dataset = sk.datasets.load_iris()\ndata = Dataset.from_sklearn(sklearn_dataset)\nknn = sk.neighbors.KNeighborsClassifier(n_neighbors=5)\nutility = Utility(knn, data)\n
    shapley_values = compute_shapley_values(utility, mode=ShapleyMode.KNN, progress=True)\nshapley_values.sort(key=\"value\")\nvalues = shapley_values.values\n
    \n0it [00:00, ?it/s]\n

    If we now look at the distribution of Shapley values for each class, we see that each has samples with both high and low scores. This is expected, because an accurate model uses information of all classes.

    corrupted_data = deepcopy(data)\nn_corrupted = 10\ncorrupted_data.y_train[:n_corrupted] = (corrupted_data.y_train[:n_corrupted] + 1) % 3\nknn = sk.neighbors.KNeighborsClassifier(n_neighbors=5)\ncontaminated_values = compute_shapley_values(\nUtility(knn, corrupted_data), mode=ShapleyMode.KNN\n)\n

    Taking the average corrupted value and comparing it to non-corrupted ones, we notice that on average anomalous points have a much lower score, i.e. they tend to be much less valuable to the model.

    To do this, first we make sure that we access the results by data index with a call to ValuationResult.sort(), then we split the values into two groups: corrupted and non-corrupted. Note how we access property values of the ValuationResult object. This is a numpy array of values, sorted however the object was sorted. Finally, we compute the quantiles of the two groups and compare them. We see that the corrupted mean is in the lowest percentile of the value distribution, while the correct mean is in the 70th percentile.

    contaminated_values.sort(\nkey=\"index\"\n)  # This is redundant, but illustrates sorting, which is in-place\ncorrupted_shapley_values = contaminated_values.values[:n_corrupted]\ncorrect_shapley_values = contaminated_values.values[n_corrupted:]\nmean_corrupted = np.mean(corrupted_shapley_values)\nmean_correct = np.mean(correct_shapley_values)\npercentile_corrupted = np.round(100 * np.mean(values &lt; mean_corrupted), 0)\npercentile_correct = np.round(100 * np.mean(values &lt; mean_correct), 0)\nprint(\nf\"The corrupted mean is at percentile {percentile_corrupted:.0f} of the value distribution.\"\n)\nprint(\nf\"The correct mean is percentile {percentile_correct:.0f} of the value distribution.\"\n)\n
    \nThe corrupted mean is at percentile 1 of the value distribution.\nThe correct mean is percentile 71 of the value distribution.\n\n

    This is confirmed if we plot the distribution of Shapley values and circle corrupt points in red. They all tend to have low Shapley scores, regardless of their position in space and assigned label:

    "},{"location":"examples/shapley_knn_flowers/#knn-shapley","title":"KNN Shapley","text":""},{"location":"examples/shapley_knn_flowers/#setup","title":"Setup","text":"

    We begin by importing the main libraries and setting some defaults.

    If you are reading this in the documentation, some boilerplate (including most plotting code) has been omitted for convenience."},{"location":"examples/shapley_knn_flowers/#building-a-dataset-and-a-utility","title":"Building a Dataset and a Utility","text":"

    We use the sklearn iris dataset and wrap it into a pydvl.utils.dataset.Dataset calling the factory pydvl.utils.dataset.Dataset.from_sklearn(). This automatically creates a train/test split for us which will be used to compute the utility.

    We then create a model and instantiate a Utility using data and model. The model needs to implement the protocol pydvl.utils.types.SupervisedModel, which is just the standard sklearn interface of fit(),predict() and score(). In constructing the Utility one can also choose a scoring function, but we pick the default which is just the model's knn.score().

    "},{"location":"examples/shapley_knn_flowers/#computing-values","title":"Computing values","text":"

    Calculating the Shapley values is straightforward. We just call compute_shapley_values() with the utility object we created above. The function returns a ValuationResult. This object contains the values themselves, data indices and labels.

    "},{"location":"examples/shapley_knn_flowers/#inspecting-the-results","title":"Inspecting the results","text":"

    Let us first look at the labels' distribution as a function of petal and sepal length:

    "},{"location":"examples/shapley_knn_flowers/#corrupting-labels","title":"Corrupting labels","text":"

    To test how informative values are, we can corrupt some training labels and see how their Shapley values change with respect to the non-corrupted points.

    "},{"location":"examples/shapley_utility_learning/","title":"Data utility learning","text":"

    This notebook introduces Data Utility Learning, a method of approximating Data Shapley values by learning to estimate the utility function.

    The idea is to employ a model to learn the performance of the learning algorithm of interest on unseen data combinations (i.e. subsets of the dataset). The method was originally described in Wang, Tianhao, Yu Yang, and Ruoxi Jia. Improving Cooperative Game Theory-Based Data Valuation via Data Utility Learning. arXiv, 2022.

    Warning: Work on Data Utility Learning is preliminary. It remains to be seen when or whether it can be put effectively into application. For this further testing and benchmarking are required.

    Recall the definition of Shapley value \\(v_u(i)\\) for data point \\(i\\):

    \\[\\begin{equation} v_u(i) = \\frac{1}{n} \\sum_{S \\subseteq N \\setminus \\{i\\}} \\binom{n-1}{|S|}^{-1} [u(S \\cup \\{i\\}) \u2212 u(S)] , \\tag{1} \\label{eq:shapley-def} \\end{equation}\\]

    where \\(N\\) is the set of all indices in the training set and \\(u\\) is the utility.

    In Data Utility Learning, to avoid the exponential cost of computing this sum, one learns a surrogate model for \\(u\\). We start by sampling so-called utility samples to form a training set \\(S_\\mathrm{train}\\) for our utility model. Each utility sample is a tuple consisting of a subset of indices \\(S_j\\) in the dataset and its utility \\(u(S_j)\\):

    \\[\\mathcal{S}_\\mathrm{train} = \\{(S_j, u(S_j): j = 1 , ..., m_\\mathrm{train}\\}\\]

    where \\(m_\\mathrm{train}\\) denotes the training budget for the learned utility function.

    The subsets are then transformed into boolean vectors \\(\\phi\\) in which a \\(1\\) at index \\(k\\) means that the \\(k\\)-th sample of the dataset is present in the subset:

    \\[S_j \\mapsto \\phi_j \\in \\{ 0, 1 \\}^{N}\\]

    We fit a regression model \\(\\tilde{u}\\), called data utility model, on the transformed utility samples \\(\\phi (\\mathcal{S}_\\mathrm{train}) := \\{(\\phi(S_j), u(S_j): j = 1 , ..., m_\\mathrm{train}\\}\\) and use it to predict instead of computing the utility for any \\(S_j \\notin \\mathcal{S}_\\mathrm{train}\\). We abuse notation and identify \\(\\tilde{u}\\) with the composition \\(\\tilde{u} \\circ \\phi : N \\rightarrow \\mathbb{R}\\).

    The main assumption is that it is much faster to fit and use \\(\\tilde{u}\\) than it is to compute \\(u\\) and that for most \\(i\\), \\(v_\\tilde{u}(i) \\approx v_u(i)\\) in some sense.

    As is the case with all other Shapley methods, the main entry point is the function compute_shapley_values(), which provides a facade to all algorithms in this family. We use it with the usual classes Dataset and Utility. In addition, we must import the core class for learning a utility, DataUtilityLearning.

    %autoreload\nfrom pydvl.utils import DataUtilityLearning, top_k_value_accuracy\nfrom pydvl.reporting.plots import shaded_mean_std\nfrom pydvl.value import *\n
    dataset = Dataset.from_sklearn(\nload_iris(), train_size=0.1, random_state=random_state, stratify_by_target=True\n)\n

    We verify that, as in the paper, if we fit a Support-Vector Classifier to the training data, we obtain an accuracy of around 92%:

    model = LinearSVC()\nmodel.fit(dataset.x_train, dataset.y_train)\nprint(f\"Mean accuracy: {100 * model.score(dataset.x_test, dataset.y_test):0.2f}%\")\n
    \nMean accuracy: 92.59%\n\n
    computation_times = {}\n
    utility = Utility(model=model, data=dataset)\n
    start_time = time.monotonic()\nresult = compute_shapley_values(\nu=utility,\nmode=ShapleyMode.CombinatorialExact,\nn_jobs=-1,\nprogress=False,  # Does not display correctly in a notebook\n)\ncomputation_time = time.monotonic() - start_time\ncomputation_times[\"exact\"] = computation_time\ndf = result.to_dataframe(column=\"exact\").drop(columns=[\"exact_stderr\"])\n

    We now estimate the Data Shapley values using the DataUtilityLearning wrapper. This class wraps a Utility and delegates calls to it, up until a given budget. Every call yields a utility sample which is saved under the hood for training of the given utility model. Once the budget is exhausted, DataUtilityLearning fits the model to the utility samples and all subsequent calls use the learned model to predict the wrapped utility instead of delegating to it.

    For the utility model we follow the paper and use a fully connected neural network. To train it we use a total of training_budget utility samples. We repeat this multiple times for each training budget.

    Note how we use a MonteCarlo approximation instead of `combinatorial_exact` as before. This is because the exact computation samples subsets in a particular order, from the lowest size to the largest. Because the training budget for the model to learn the utility is around 1/4th of the total number of subsets, this would mean that we would never see utility samples for the larger sizes and the model would be biased (try it!)
    mlp_kwargs = dict(\nhidden_layer_sizes=(20, 10),\nactivation=\"relu\",\nsolver=\"adam\",\nlearning_rate_init=0.001,\nbatch_size=32,\nmax_iter=800,\n)\nprint(\nf\"Doing {n_runs} runs for each of {len(training_budget_values)} different training budgets.\"\n)\npbar = tqdm(\nproduct(range(n_runs), training_budget_values),\ntotal=n_runs * len(training_budget_values),\n)\nfor idx, budget in pbar:\npbar.set_postfix_str(f\"Run {idx} for training budget: {budget}\")\ndul_utility = DataUtilityLearning(\nu=utility, training_budget=budget, model=MLPRegressor(**mlp_kwargs)\n)\nstart_time = time.monotonic()\n# DUL will kick in after training_budget calls to utility\nresult = compute_shapley_values(\nu=dul_utility,\nmode=ShapleyMode.PermutationMontecarlo,\ndone=MaxUpdates(300),\nn_jobs=-1,\n)\ncomputation_time = time.monotonic() - start_time\nif budget in computation_times:\ncomputation_times[budget].append(computation_time)\nelse:\ncomputation_times[budget] = [computation_time]\ndul_df = result.to_dataframe(column=f\"{budget}_{idx}\").drop(\ncolumns=[f\"{budget}_{idx}_stderr\"]\n)\ndf = pd.concat([df, dul_df], axis=1)\ncomputation_times_df = pd.DataFrame(computation_times)\n
    \nDoing 10 runs for each of 10 different training budgets.\n\n
    \n  0%|          | 0/100 [00:00<?, ?it/s]\n

    Next we compute the \\(l_1\\) error for the different training budgets across all runs and plot mean and standard deviation. We obtain results analogous to Figure 1 of the paper, verifying that the method indeed works for estimating the Data Shapley values (at least in this context).

    In the plot we also display the mean and standard deviation of the computation time taken for each training budget.

    errors = np.zeros((len(training_budget_values), n_runs), dtype=float)\naccuracies = np.zeros((len(training_budget_values), n_runs), dtype=float)\ntop_k = 3\nfor i, budget in enumerate(training_budget_values):\nfor j in range(n_runs):\ny_true = df[\"exact\"].values\ny_estimated = df[f\"{budget}_{j}\"].values\nerrors[i, j] = np.linalg.norm(y_true - y_estimated, ord=2)\naccuracies[i, j] = top_k_value_accuracy(y_true, y_estimated, k=top_k)\nerror_from_mean = np.linalg.norm(df[\"exact\"].values - df[\"exact\"].values.mean(), ord=2)\n

    Let us next look at how well the ranking of values resulting from using the surrogate \\(\\tilde{u}\\) matches the ranking by the exact values. For this we fix \\(k=3\\) and consider the \\(k\\) samples with the highest value according to \\(\\tilde{u}\\) and \\(u\\):

    Finally, for each sample, we look at the distance of the estimates to the exact value across runs. Boxes are centered at the 50th percentile with wiskers at the 25th and 75th. We plot relative distances, as a percentage. We observe a general tendency to underestimate the value:

    highest_value_index = df.index[df[\"exact\"].argmax()]\ny_train_corrupted = dataset.y_train.copy()\ny_train_corrupted[highest_value_index] = (\ny_train_corrupted[highest_value_index] + 1\n) % 3\ncorrupted_dataset = Dataset(\nx_train=dataset.x_train,\ny_train=y_train_corrupted,\nx_test=dataset.x_test,\ny_test=dataset.y_test,\n)\n

    We retrain the model on the new dataset and verify that the accuracy decreases:

    model = LinearSVC()\nmodel.fit(dataset.x_train, y_train_corrupted)\nprint(f\"Mean accuracy: {100 * model.score(dataset.x_test, dataset.y_test):0.2f}%\")\n
    \nMean accuracy: 82.96%\n\n

    Finally, we recompute the values of all samples using the exact method and the best training budget previously obtained and then plot the resulting scores.

    best_training_budget = training_budget_values[errors.mean(axis=1).argmin()]\nutility = Utility(\nmodel=LinearSVC(),\ndata=corrupted_dataset,\n)\nresult = compute_shapley_values(\nu=utility,\nmode=ShapleyMode.CombinatorialExact,\nn_jobs=-1,\nprogress=False,\n)\ndf_corrupted = result.to_dataframe(column=\"exact\").drop(columns=[\"exact_stderr\"])\ndul_utility = DataUtilityLearning(\nu=utility, training_budget=best_training_budget, model=MLPRegressor(**mlp_kwargs)\n)\nresult = compute_shapley_values(\nu=dul_utility,\nmode=ShapleyMode.PermutationMontecarlo,\ndone=MaxUpdates(300),\nn_jobs=-1,\n)\ndul_df = result.to_dataframe(column=\"estimated\").drop(columns=[\"estimated_stderr\"])\ndf_corrupted = pd.concat([df_corrupted, dul_df], axis=1)\n

    We can see in the figure that both methods assign the lowest value to the sample with the corrupted label.

    As mentioned above, despite the previous results, this work is preliminary and the usefulness of Data Utility Learning remains to be tested in practice."},{"location":"examples/shapley_utility_learning/#data-utility-learning","title":"Data Utility Learning","text":""},{"location":"examples/shapley_utility_learning/#setup","title":"Setup","text":"

    We begin by importing the main libraries and setting some defaults.

    If you are reading this in the documentation, some boilerplate (including most plotting code) has been omitted for convenience."},{"location":"examples/shapley_utility_learning/#dataset","title":"Dataset","text":"

    Following the paper, we take 15 samples (10%) from the Iris dataset and compute their Data Shapley values by using all the remaining samples as test set for computing the utility, which in this case is accuracy.

    "},{"location":"examples/shapley_utility_learning/#data-shapley","title":"Data Shapley","text":"

    We start by defining the utility using the model and computing the exact Data Shapley values by definition \\(\\ref{eq:shapley-def}\\).

    "},{"location":"examples/shapley_utility_learning/#evaluation-on-anomalous-data","title":"Evaluation on anomalous data","text":"

    One interesting way to assess the Data Utility Learning approach is to corrupt some data and monitor how the value changes. To do this, we will take the sample with the highest score and change its label.

    "},{"location":"getting-started/first-steps/","title":"Getting started","text":"

    Warning

    Make sure you have read Installing pyDVL before using the library. In particular read about how caching and parallelization work, since they might require additional setup.

    "},{"location":"getting-started/first-steps/#main-concepts","title":"Main concepts","text":"

    pyDVL aims to be a repository of production-ready, reference implementations of algorithms for data valuation and influence functions. Even though we only briefly introduce key concepts in the documentation, the following sections should be enough to get you started.

    • Basics of data valuation for key objects and usage patterns for Shapley value computation and related methods.
    • Computing Influence Values for instructions on how to compute influence functions.
    "},{"location":"getting-started/first-steps/#running-the-examples","title":"Running the examples","text":"

    If you are somewhat familiar with the concepts of data valuation, you can start by browsing our worked-out examples illustrating pyDVL's capabilities either:

    • In the examples under Basics of data valuation and Computing Influence Values.
    • Using binder notebooks, deployed from each example's page.
    • Locally, by starting a jupyter server at the root of the project. You will have to install jupyter first manually since it's not a dependency of the library.
    "},{"location":"getting-started/first-steps/#advanced-usage","title":"Advanced usage","text":"

    Besides the do's and don'ts of data valuation itself, which are the subject of the examples and the documentation of each method, there are two main things to keep in mind when using pyDVL.

    "},{"location":"getting-started/first-steps/#caching","title":"Caching","text":"

    pyDVL uses memcached to cache the computation of the utility function and speed up some computations (see the installation guide).

    Caching of the utility function is disabled by default. When it is enabled it takes into account the data indices passed as argument and the utility function wrapped into the Utility object. This means that care must be taken when reusing the same utility function with different data, see the documentation for the caching module for more information.

    In general, caching won't play a major role in the computation of Shapley values because the probability of sampling the same subset twice, and hence needing the same utility function computation, is very low. However, it can be very useful when comparing methods that use the same utility function, or when running multiple experiments with the same data.

    When is the cache really necessary?

    Crucially, semi-value computations with the PermutationSampler require caching to be enabled, or they will take twice as long as the direct implementation in compute_shapley_values.

    "},{"location":"getting-started/first-steps/#parallelization","title":"Parallelization","text":"

    pyDVL supports joblib for local parallelization (within one machine) and ray for distributed parallelization (across multiple machines).

    The former works out of the box but for the latter you will need to provide a running cluster (or run ray in local mode).

    As of v0.7.0 pyDVL does not allow requesting resources per task sent to the cluster, so you will need to make sure that each worker has enough resources to handle the tasks it receives. A data valuation task using game-theoretic methods will typically make a copy of the whole model and dataset to each worker, even if the re-training only happens on a subset of the data. This means that you should make sure that each worker has enough memory to handle the whole dataset.

    "},{"location":"getting-started/installation/","title":"Installing pyDVL","text":"

    To install the latest release use:

    pip install pyDVL\n

    To use all features of influence functions use instead:

    pip install pyDVL[influence]\n

    This includes a dependency on PyTorch (Version 2.0 and above) and thus is left out by default.

    In case that you have a supported version of CUDA installed (v11.2 to 11.8 as of this writing), you can enable eigenvalue computations for low-rank approximations with CuPy on the GPU by using:

    pip install pyDVL[cupy]\n

    If you use a different version of CUDA, please install CuPy manually.

    In order to check the installation you can use:

    python -c \"import pydvl; print(pydvl.__version__)\"\n

    You can also install the latest development version from TestPyPI:

    pip install pyDVL --index-url https://test.pypi.org/simple/\n
    "},{"location":"getting-started/installation/#dependencies","title":"Dependencies","text":"

    pyDVL requires Python >= 3.8, Memcached for caching and Ray for parallelization in a cluster (locally it uses joblib). Additionally, the Influence functions module requires PyTorch (see Installing pyDVL).

    ray is used to distribute workloads across nodes in a cluster (it can be used locally as well, but for this we recommend joblib instead). Please follow the instructions in their documentation to set up the cluster. Once you have a running cluster, you can use it by passing the address of the head node to parallel methods via ParallelConfig.

    "},{"location":"getting-started/installation/#setting-up-the-cache","title":"Setting up the cache","text":"

    memcached is an in-memory key-value store accessible over the network. pyDVL uses it to cache the computation of the utility function and speed up some computations (in particular, semi-value computations with the PermutationSampler but other methods may benefit as well).

    You can either install it as a package or run it inside a docker container (the simplest). For installation instructions, refer to the Getting started section in memcached's wiki. Then you can run it with:

    memcached -u user\n

    To run memcached inside a container in daemon mode instead, do:

    docker container run -d --rm -p 11211:11211 memcached:latest\n

    Using the cache

    Continue reading about the cache in the First Steps and the documentation for the caching module.

    "},{"location":"influence/","title":"The influence function","text":""},{"location":"influence/#the-influence-function","title":"The influence function","text":"

    Warning

    The code in the package pydvl.influence is experimental. Package structure and basic API are bound to change before v1.0.0

    The influence function (IF) is a method to quantify the effect (influence) that each training point has on the parameters of a model, and by extension on any function thereof. In particular, it allows to estimate how much each training sample affects the error on a test point, making the IF useful for understanding and debugging models.

    Alas, the influence function relies on some assumptions that can make their application difficult. Yet another drawback is that they require the computation of the inverse of the Hessian of the model wrt. its parameters, which is intractable for large models like deep neural networks. Much of the recent research tackles this issue using approximations, like a Neuman series (Agarwal et al., 2017)1, with the most successful solution using a low-rank approximation that iteratively finds increasing eigenspaces of the Hessian (Schioppa et al., 2021)2.

    pyDVL implements several methods for the efficient computation of the IF for machine learning. In the examples we document some of the difficulties that can arise when using the IF.

    "},{"location":"influence/#construction","title":"Construction","text":"

    First introduced in the context of robust statistics in (Hampel, 1974)3, the IF was popularized in the context of machine learning in (Koh and Liang, 2017)4.

    Following their formulation, consider an input space \\(\\mathcal{X}\\) (e.g. images) and an output space \\(\\mathcal{Y}\\) (e.g. labels). Let's take \\(z_i = (x_i, y_i)\\), for \\(i \\in \\{1,...,n\\}\\) to be the \\(i\\)-th training point, and \\(\\theta\\) to be the (potentially highly) multi-dimensional parameters of a model (e.g. \\(\\theta\\) is a big array with all of a neural network's parameters, including biases and/or dropout rates). We will denote with \\(L(z, \\theta)\\) the loss of the model for point \\(z\\) when the parameters are \\(\\theta.\\)

    To train a model, we typically minimize the loss over all \\(z_i\\), i.e. the optimal parameters are

    \\[\\hat{\\theta} = \\arg \\min_\\theta \\sum_{i=1}^n L(z_i, \\theta).\\]

    In practice, lack of convexity means that one doesn't really obtain the minimizer of the loss, and the training is stopped when the validation loss stops decreasing.

    For notational convenience, let's define

    \\[\\hat{\\theta}_{-z} = \\arg \\min_\\theta \\sum_{z_i \\ne z} L(z_i, \\theta), \\]

    i.e. \\(\\hat{\\theta}_{-z}\\) are the model parameters that minimize the total loss when \\(z\\) is not in the training dataset.

    In order to compute the impact of each training point on the model, we would need to calculate \\(\\hat{\\theta}_{-z}\\) for each \\(z\\) in the training dataset, thus re-training the model at least ~\\(n\\) times (more if model training is stochastic). This is computationally very expensive, especially for big neural networks. To circumvent this problem, we can just calculate a first order approximation of \\(\\hat{\\theta}\\). This can be done through single backpropagation and without re-training the full model.

    pyDVL supports two ways of computing the empirical influence function, namely up-weighting of samples and perturbation influences. The choice is done by the parameter influence_type in the main entry point compute_influences.

    "},{"location":"influence/#approximating-the-influence-of-a-point","title":"Approximating the influence of a point","text":"

    Let's define

    \\[\\hat{\\theta}_{\\epsilon, z} = \\arg \\min_\\theta \\frac{1}{n}\\sum_{i=1}^n L(z_i, \\theta) + \\epsilon L(z, \\theta), \\]

    which is the optimal \\(\\hat{\\theta}\\) when we up-weight \\(z\\) by an amount \\(\\epsilon \\gt 0\\).

    From a classical result (a simple derivation is available in Appendix A of (Koh and Liang, 2017)4), we know that:

    \\[\\frac{d \\ \\hat{\\theta}_{\\epsilon, z}}{d \\epsilon} \\Big|_{\\epsilon=0} = -H_{\\hat{\\theta}}^{-1} \\nabla_\\theta L(z, \\hat{\\theta}), \\]

    where \\(H_{\\hat{\\theta}} = \\frac{1}{n} \\sum_{i=1}^n \\nabla_\\theta^2 L(z_i, \\hat{\\theta})\\) is the Hessian of \\(L\\). These quantities are also knows as influence factors.

    Importantly, notice that this expression is only valid when \\(\\hat{\\theta}\\) is a minimum of \\(L\\), or otherwise \\(H_{\\hat{\\theta}}\\) cannot be inverted! At the same time, in machine learning full convergence is rarely achieved, so direct Hessian inversion is not possible. Approximations need to be developed that circumvent the problem of inverting the Hessian of the model in all those (frequent) cases where it is not positive definite.

    The influence of training point \\(z\\) on test point \\(z_{\\text{test}}\\) is defined as:

    \\[\\mathcal{I}(z, z_{\\text{test}}) = L(z_{\\text{test}}, \\hat{\\theta}_{-z}) - L(z_{\\text{test}}, \\hat{\\theta}). \\]

    Notice that \\(\\mathcal{I}\\) is higher for points \\(z\\) which positively impact the model score, since the loss is higher when they are excluded from training. In practice, one needs to rely on the following infinitesimal approximation:

    \\[\\mathcal{I}_{up}(z, z_{\\text{test}}) = - \\frac{d L(z_{\\text{test}}, \\hat{\\theta}_{\\epsilon, z})}{d \\epsilon} \\Big|_{\\epsilon=0} \\]

    Using the chain rule and the results calculated above, we get:

    \\[\\mathcal{I}_{up}(z, z_{\\text{test}}) = - \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ \\frac{d \\hat{\\theta}_{\\epsilon, z}}{d \\epsilon} \\Big|_{\\epsilon=0} = \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ H_{\\hat{\\theta}}^{-1} \\ \\nabla_\\theta L(z, \\hat{\\theta}) \\]

    All the resulting factors are gradients of the loss wrt. the model parameters \\(\\hat{\\theta}\\). This can be easily computed through one or more backpropagation passes.

    "},{"location":"influence/#perturbation-definition-of-the-influence-score","title":"Perturbation definition of the influence score","text":"

    How would the loss of the model change if, instead of up-weighting an individual point \\(z\\), we were to up-weight only a single feature of that point? Given \\(z = (x, y)\\), we can define \\(z_{\\delta} = (x+\\delta, y)\\), where \\(\\delta\\) is a vector of zeros except for a 1 in the position of the feature we want to up-weight. In order to approximate the effect of modifying a single feature of a single point on the model score we can define

    \\[\\hat{\\theta}_{\\epsilon, z_{\\delta} ,-z} = \\arg \\min_\\theta \\frac{1}{n}\\sum_{i=1}^n L(z_{i}, \\theta) + \\epsilon L(z_{\\delta}, \\theta) - \\epsilon L(z, \\theta), \\]

    Similarly to what was done above, we up-weight point \\(z_{\\delta}\\), but then we also remove the up-weighting for all the features that are not modified by \\(\\delta\\). From the calculations in the previous section, it is then easy to see that

    \\[\\frac{d \\ \\hat{\\theta}_{\\epsilon, z_{\\delta} ,-z}}{d \\epsilon} \\Big|_{\\epsilon=0} = -H_{\\hat{\\theta}}^{-1} \\nabla_\\theta \\Big( L(z_{\\delta}, \\hat{\\theta}) - L(z, \\hat{\\theta}) \\Big) \\]

    and if the feature space is continuous and as \\(\\delta \\to 0\\) we can write

    \\[\\frac{d \\ \\hat{\\theta}_{\\epsilon, z_{\\delta} ,-z}}{d \\epsilon} \\Big|_{\\epsilon=0} = -H_{\\hat{\\theta}}^{-1} \\ \\nabla_x \\nabla_\\theta L(z, \\hat{\\theta}) \\delta + \\mathcal{o}(\\delta) \\]

    The influence of each feature of \\(z\\) on the loss of the model can therefore be estimated through the following quantity:

    \\[\\mathcal{I}_{pert}(z, z_{\\text{test}}) = - \\lim_{\\delta \\to 0} \\ \\frac{1}{\\delta} \\frac{d L(z_{\\text{test}}, \\hat{\\theta}_{\\epsilon, \\ z_{\\delta}, \\ -z})}{d \\epsilon} \\Big|_{\\epsilon=0} \\]

    which, using the chain rule and the results calculated above, is equal to

    \\[\\mathcal{I}_{pert}(z, z_{\\text{test}}) = - \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ \\frac{d \\hat{\\theta}_{\\epsilon, z_{\\delta} ,-z}}{d \\epsilon} \\Big|_{\\epsilon=0} = \\nabla_\\theta L(z_{\\text{test}}, \\hat{\\theta})^\\top \\ H_{\\hat{\\theta}}^{-1} \\ \\nabla_x \\nabla_\\theta L(z, \\hat{\\theta}) \\]

    The perturbation definition of the influence score is not straightforward to understand, but it has a simple interpretation: it tells how much the loss of the model changes when a certain feature of point z is up-weighted. A positive perturbation influence score indicates that the feature might have a positive effect on the accuracy of the model.

    It is worth noting that the perturbation influence score is a very rough estimate of the impact of a point on the models loss and it is subject to large approximation errors. It can nonetheless be used to build training-set attacks, as done in (Koh and Liang, 2017)4.

    "},{"location":"influence/#computation","title":"Computation","text":"

    The main entry point of the library for influence calculation is compute_influences. Given a pre-trained pytorch model with a loss, first an instance of TorchTwiceDifferentiable needs to be created:

    from pydvl.influence import TorchTwiceDifferentiable\nwrapped_model = TorchTwiceDifferentiable(model, loss, device)\n

    The device specifies where influence calculation will be run.

    Given training and test data loaders, the influence of each training point on each test point can be calculated via:

    from pydvl.influence import compute_influences\nfrom torch.utils.data import DataLoader\ntraining_data_loader = DataLoader(...)\ntest_data_loader = DataLoader(...)\ncompute_influences(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\n)\n

    The result is a tensor with one row per test point and one column per training point. Thus, each entry \\((i, j)\\) represents the influence of training point \\(j\\) on test point \\(i\\). A large positive influence indicates that training point \\(j\\) tends to improve the performance of the model on test point \\(i\\), and vice versa, a large negative influence indicates that training point \\(j\\) tends to worsen the performance of the model on test point \\(i\\).

    "},{"location":"influence/#perturbation-influences","title":"Perturbation influences","text":"

    The method of empirical influence computation can be selected in compute_influences with the parameter influence_type:

    from pydvl.influence import compute_influences\ncompute_influences(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\ninfluence_type=\"perturbation\",\n)\n

    The result is a tensor with at least three dimensions. The first two dimensions are the same as in the case of influence_type=up case, i.e. one row per test point and one column per training point. The remaining dimensions are the same as the number of input features in the data. Therefore, each entry in the tensor represents the influence of each feature of each training point on each test point.

    "},{"location":"influence/#approximate-matrix-inversion","title":"Approximate matrix inversion","text":"

    In almost every practical application it is not possible to construct, even less invert the complete Hessian in memory. pyDVL offers several approximate algorithms to invert it by setting the parameter inversion_method of compute_influences.

    from pydvl.influence import compute_influences\ncompute_influences(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\ninversion_method=\"cg\"\n)\n

    Each inversion method has its own set of parameters that can be tuned to improve the final result. These parameters can be passed directly to compute_influences as keyword arguments. For example, the following code sets the maximum number of iterations for conjugate gradient to \\(100\\) and the minimum relative error to \\(0.01\\):

    from pydvl.influence import compute_influences\ncompute_influences(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\ninversion_method=\"cg\",\nhessian_regularization=1e-4,\nmaxiter=100,\nrtol=0.01\n)\n
    "},{"location":"influence/#hessian-regularization","title":"Hessian regularization","text":"

    Additionally, and as discussed in the introduction, in machine learning training rarely converges to a global minimum of the loss. Despite good apparent convergence, \\(\\hat{\\theta}\\) might be located in a region with flat curvature or close to a saddle point. In particular, the Hessian might have vanishing eigenvalues making its direct inversion impossible. Certain methods, such as the Arnoldi method are robust against these problems, but most are not.

    To circumvent this problem, many approximate methods can be implemented. The simplest adds a small hessian perturbation term, i.e. \\(H_{\\hat{\\theta}} + \\lambda \\mathbb{I}\\), with \\(\\mathbb{I}\\) being the identity matrix. This standard trick ensures that the eigenvalues of \\(H_{\\hat{\\theta}}\\) are bounded away from zero and therefore the matrix is invertible. In order for this regularization not to corrupt the outcome too much, the parameter \\(\\lambda\\) should be as small as possible while still allowing a reliable inversion of \\(H_{\\hat{\\theta}} + \\lambda \\mathbb{I}\\).

    from pydvl.influence import compute_influences\ncompute_influences(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\ninversion_method=\"cg\",\nhessian_regularization=1e-4\n)\n
    "},{"location":"influence/#influence-factors","title":"Influence factors","text":"

    The compute_influences method offers a fast way to obtain the influence scores given a model and a dataset. Nevertheless, it is often more convenient to inspect and save some of the intermediate results of influence calculation for later use.

    The influence factors(refer to the previous section for a definition) are typically the most computationally demanding part of influence calculation. They can be obtained via the compute_influence_factors function, saved, and later used for influence calculation on different subsets of the training dataset.

    from pydvl.influence import compute_influence_factors\ninfluence_factors = compute_influence_factors(\nwrapped_model,\ntraining_data_loader,\ntest_data_loader,\ninversion_method=\"cg\"\n)\n

    The result is an object of type InverseHvpResult, which holds the calculated influence factors (influence_factors.x) and a dictionary with the info on the inversion process (influence_factors.info).

    "},{"location":"influence/#methods-for-inverse-hvp-calculation","title":"Methods for inverse HVP calculation","text":"

    In order to calculate influence values, pydvl implements several methods for the calculation of the inverse Hessian vector product (iHVP). More precisely, given a model, training data and a tensor \\(b\\), the function solve_hvp will find \\(x\\) such that \\(H x = b\\), with \\(H\\) is the hessian of model.

    Many different inversion methods can be selected via the parameter inversion_method of compute_influences.

    The following subsections will offer more detailed explanations for each method.

    "},{"location":"influence/#direct-inversion","title":"Direct inversion","text":"

    With inversion_method = \"direct\" pyDVL will calculate the inverse Hessian using the direct matrix inversion. This means that the Hessian will first be explicitly created and then inverted. This method is the most accurate, but also the most computationally demanding. It is therefore not recommended for large datasets or models with many parameters.

    import torch\nfrom pydvl.influence.inversion import solve_hvp\nb = torch.Tensor(...)\nsolve_hvp(\n\"direct\",\nwrapped_model,\ntraining_data_loader,\nb,\n)\n

    The result, an object of type InverseHvpResult, which holds two objects: influence_factors.x and influence_factors.info. The first one is the inverse Hessian vector product, while the second one is a dictionary with the info on the inversion process. For this method, the info consists of the Hessian matrix itself.

    "},{"location":"influence/#conjugate-gradient","title":"Conjugate Gradient","text":"

    This classical procedure for solving linear systems of equations is an iterative method that does not require the explicit inversion of the Hessian. Instead, it only requires the calculation of Hessian-vector products, making it a good choice for large datasets or models with many parameters. It is nevertheless much slower to converge than the direct inversion method and not as accurate. More info on the theory of conjugate gradient can be found on Wikipedia.

    In pyDVL, you can select conjugate gradient with inversion_method = \"cg\", like this:

    from pydvl.influence.inversion import solve_hvp\nsolve_hvp(\n\"cg\",\nwrapped_model,\ntraining_data_loader,\nb,\nx0=None,\nrtol=1e-7,\natol=1e-7,\nmaxiter=None,\n)\n

    The additional optional parameters x0, rtol, atol, and maxiter are passed to the solve_batch_cg function, and are respecively the initial guess for the solution, the relative tolerance, the absolute tolerance, and the maximum number of iterations.

    The resulting InverseHvpResult holds the solution of the iHVP, influence_factors.x, and some info on the inversion process influence_factors.info. More specifically, for each batch this will contain the number of iterations, a boolean indicating if the inversion converged, and the residual of the inversion.

    "},{"location":"influence/#linear-time-stochastic-second-order-approximation-lissa","title":"Linear time Stochastic Second-Order Approximation (LiSSA)","text":"

    The LiSSA method is a stochastic approximation of the inverse Hessian vector product. Compared to conjugate gradient it is faster but less accurate and typically suffers from instability.

    In order to find the solution of the HVP, LiSSA iteratively approximates the inverse of the Hessian matrix with the following update:

    \\[H^{-1}_{j+1} b = b + (I - d) \\ H - \\frac{H^{-1}_j b}{s},\\]

    where \\(d\\) and \\(s\\) are a dampening and a scaling factor, which are essential for the convergence of the method and they need to be chosen carefully, and I is the identity matrix. More info on the theory of LiSSA can be found in the original paper (Agarwal et al., 2017)1.

    In pyDVL, you can select LiSSA with inversion_method = \"lissa\", like this:

    from pydvl.influence.inversion import solve_hvp\nsolve_hvp(\n\"lissa\",\nwrapped_model,\ntraining_data_loader,\nb,\nmaxiter=1000,\ndampen=0.0,\nscale=10.0,\nh0=None,\nrtol=1e-4,\n)\n

    with the additional optional parameters maxiter, dampen, scale, h0, and rtol, which are passed to the solve_lissa function, being the maximum number of iterations, the dampening factor, the scaling factor, the initial guess for the solution and the relative tolerance, respectively.

    The resulting InverseHvpResult holds the solution of the iHVP, influence_factors.x, and, within influence_factors.info, the maximum percentage error and the mean percentage error of the approximation.

    "},{"location":"influence/#arnoldi-solver","title":"Arnoldi solver","text":"

    The Arnoldi method is a Krylov subspace method for approximating dominating eigenvalues and eigenvectors. Under a low rank assumption on the Hessian at a minimizer (which is typically observed for deep neural networks), this approximation captures the essential action of the Hessian. More concretely, for \\(Hx=b\\) the solution is approximated by

    \\[x \\approx V D^{-1} V^T b\\]

    where \\(D\\) is a diagonal matrix with the top (in absolute value) eigenvalues of the Hessian and \\(V\\) contains the corresponding eigenvectors. See also (Schioppa et al., 2021)2.

    In pyDVL, you can use Arnoldi with inversion_method = \"arnoldi\", as follows:

    from pydvl.influence.inversion import solve_hvp\nsolve_hvp(\n\"arnoldi\",\nwrapped_model,\ntraining_data_loader,\nb,\nhessian_perturbation=0.0,\nrank_estimate=10,\ntol=1e-6,\neigen_computation_on_gpu=False \n)\n

    For the parameters, check solve_arnoldi. The resulting InverseHvpResult holds the solution of the iHVP, influence_factors.x, and, within influence_factors.info, the computed eigenvalues and eigenvectors.

    1. Agarwal, N., Bullins, B., Hazan, E., 2017. Second-Order Stochastic Optimization for Machine Learning in Linear Time. JMLR 18, 1\u201340.\u00a0\u21a9\u21a9

    2. Schioppa, A., Zablotskaia, P., Vilar, D., Sokolov, A., 2021. Scaling Up Influence Functions. Presented at the AAAI-22, arXiv. https://doi.org/10.48550/arXiv.2112.03052 \u21a9\u21a9

    3. Hampel, F.R., 1974. The Influence Curve and Its Role in Robust Estimation. J. Am. Stat. Assoc. 69, 383\u2013393. https://doi.org/10.2307/2285666 \u21a9

    4. Koh, P.W., Liang, P., 2017. Understanding Black-box Predictions via Influence Functions, in: Proceedings of the 34th International Conference on Machine Learning. Presented at the International Conference on Machine Learning, PMLR, pp. 1885\u20131894.\u00a0\u21a9\u21a9\u21a9

    "},{"location":"value/","title":"Data valuation","text":"

    Note

    If you want to jump right into the steps to compute values, skip ahead to Computing data values.

    Data valuation is the task of assigning a number to each element of a training set which reflects its contribution to the final performance of some model trained on it. Some methods attempt to be model-agnostic, but in most cases the model is an integral part of the method. In these cases, this number is not an intrinsic property of the element of interest, but typically a function of three factors:

    1. The dataset \\(D\\), or more generally, the distribution it was sampled from: In some cases one only cares about values wrt. a given data set, in others value would ideally be the (expected) contribution of a data point to any random set \\(D\\) sampled from the same distribution. pyDVL implements methods of the first kind.

    2. The algorithm \\(\\mathcal{A}\\) mapping the data \\(D\\) to some estimator \\(f\\) in a model class \\(\\mathcal{F}\\). E.g. MSE minimization to find the parameters of a linear model.

    3. The performance metric of interest \\(u\\) for the problem. When value depends on a model, it must be measured in some way which uses it. E.g. the \\(R^2\\) score or the negative MSE over a test set. This metric will be computed over a held-out valuation set.

    pyDVL collects algorithms for the computation of data values in this sense, mostly those derived from cooperative game theory. The methods can be found in the package pydvl.value , with support from modules pydvl.utils.dataset and pydvl.utils.utility, as detailed below.

    Warning

    Be sure to read the section on the difficulties using data values.

    There are three main families of methods for data valuation: game-theoretic, influence-based and intrinsic. As of v0.7.0 pyDVL supports the first two. Here, we focus on game-theoretic concepts and refer to the main documentation on the influence funtion for the second.

    "},{"location":"value/#game-theoretical-methods","title":"Game theoretical methods","text":"

    The main contenders in game-theoretic approaches are Shapley values (Ghorbani and Zou, 2019)1, (Kwon et al., 2021)2, (Schoch et al., 2022)3, their generalization to so-called semi-values by (Kwon and Zou, 2022)4 and (Wang and Jia, 2022)5, and the Core (Yan and Procaccia, 2021)6. All of these are implemented in pyDVL.

    In these methods, data points are considered players in a cooperative game whose outcome is the performance of the model when trained on subsets (coalitions) of the data, measured on a held-out valuation set. This outcome, or utility, must typically be computed for every subset of the training set, so that an exact computation is \\(\\mathcal{O} (2^n)\\) in the number of samples \\(n\\), with each iteration requiring a full re-fitting of the model using a coalition as training set. Consequently, most methods involve Monte Carlo approximations, and sometimes approximate utilities which are faster to compute, e.g. proxy models (Wang et al., 2022)7 or constant-cost approximations like Neural Tangent Kernels (Wu et al., 2022)8.

    The reasoning behind using game theory is that, in order to be useful, an assignment of value, dubbed valuation function, is usually required to fulfil certain requirements of consistency and \"fairness\". For instance, in some applications value should not depend on the order in which data are considered, or it should be equal for samples that contribute equally to any subset of the data (of equal size). When considering aggregated value for (sub-)sets of data there are additional desiderata, like having a value function that does not increase with repeated samples. Game-theoretic methods are all rooted in axioms that by construction ensure different desiderata, but despite their practical usefulness, none of them are either necessary or sufficient for all applications. For instance, SV methods try to equitably distribute all value among all samples, failing to identify repeated ones as unnecessary, with e.g. a zero value.

    "},{"location":"value/#applications-of-data-valuation","title":"Applications of data valuation","text":"

    Many applications are touted for data valuation, but the results can be inconsistent. Values have a strong dependency on the training procedure and the performance metric used. For instance, accuracy is a poor metric for imbalanced sets and this has a stark effect on data values. Some models exhibit great variance in some regimes and this again has a detrimental effect on values.

    Nevertheless, some of the most promising applications are:

    • Cleaning of corrupted data.
    • Pruning unnecessary or irrelevant data.
    • Repairing mislabeled data.
    • Guiding data acquisition and annotation (active learning).
    • Anomaly detection and model debugging and interpretation.

    Additionally, one of the motivating applications for the whole field is that of data markets: a marketplace where data owners can sell their data to interested parties. In this setting, data valuation can be key component to determine the price of data. Algorithm-agnostic methods like LAVA (Just et al., 2023)9 are particularly well suited for this, as they use the Wasserstein distance between a vendor's data and the buyer's to determine the value of the former.

    However, this is a complex problem which can face practical banal problems like the fact that data owners may not wish to disclose their data for valuation.

    "},{"location":"value/#computing-data-values","title":"Computing data values","text":"

    Using pyDVL to compute data values is a simple process that can be broken down into three steps:

    1. Creating a Dataset object from your data.
    2. Creating a Utility which ties your model to the dataset and a scoring function.
    3. Computing values with a method of your choice, e.g. via compute_shapley_values.
    "},{"location":"value/#creating-a-dataset","title":"Creating a Dataset","text":"

    The first item in the tuple \\((D, \\mathcal{A}, u)\\) characterising data value is the dataset. The class Dataset is a simple convenience wrapper for the train and test splits that is used throughout pyDVL. The test set will be used to evaluate a scoring function for the model.

    It can be used as follows:

    import numpy as np\nfrom pydvl.utils import Dataset\nfrom sklearn.model_selection import train_test_split\nX, y = np.arange(100).reshape((50, 2)), np.arange(50)\nX_train, X_test, y_train, y_test = train_test_split(\nX, y, test_size=0.5, random_state=16\n)\ndataset = Dataset(X_train, X_test, y_train, y_test)\n

    It is also possible to construct Datasets from sklearn toy datasets for illustrative purposes using from_sklearn.

    "},{"location":"value/#grouping-data","title":"Grouping data","text":"

    Be it because data valuation methods are computationally very expensive, or because we are interested in the groups themselves, it can be often useful or necessary to group samples to valuate them together. GroupedDataset provides an alternative to Dataset with the same interface which allows this.

    You can see an example in action in the Spotify notebook, but here's a simple example grouping a pre-existing Dataset. First we construct an array mapping each index in the dataset to a group, then use from_dataset:

    import numpy as np\nfrom pydvl.utils import GroupedDataset\n# Randomly assign elements to any one of num_groups:\ndata_groups = np.random.randint(0, num_groups, len(dataset))\ngrouped_dataset = GroupedDataset.from_dataset(dataset, data_groups)\n
    "},{"location":"value/#creating-a-utility","title":"Creating a Utility","text":"

    In pyDVL we have slightly overloaded the name \"utility\" and use it to refer to an object that keeps track of all three items in \\((D, \\mathcal{A}, u)\\). This will be an instance of Utility which, as mentioned, is a convenient wrapper for the dataset, model and scoring function used for valuation methods.

    Here's a minimal example:

    import sklearn as sk\nfrom pydvl.utils import Dataset, Utility\ndataset = Dataset.from_sklearn(sk.datasets.load_iris())\nmodel = sk.svm.SVC()\nutility = Utility(model, dataset)\n

    The object utility is a callable that data valuation methods will execute with different subsets of training data. Each call will retrain the model on a subset and evaluate it on the test data using a scoring function. By default, Utility will use model.score(), but it is possible to use any scoring function (greater values must be better). In particular, the constructor accepts the same types as argument as sklearn.model_selection.cross_validate: a string, a scorer callable or None for the default.

    utility = Utility(model, dataset, \"explained_variance\")\n

    Utility will wrap the fit() method of the model to cache its results. This greatly reduces computation times of Monte Carlo methods. Because of how caching is implemented, it is important not to reuse Utility objects for different datasets. You can read more about setting up the cache in the installation guide and the documentation of the caching module.

    "},{"location":"value/#using-custom-scorers","title":"Using custom scorers","text":"

    The scoring argument of Utility can be used to specify a custom Scorer object. This is a simple wrapper for a callable that takes a model, and test data and returns a score.

    More importantly, the object provides information about the range of the score, which is used by some methods by estimate the number of samples necessary, and about what default value to use when the model fails to train.

    Note

    The most important property of a Scorer is its default value. Because many models will fail to fit on small subsets of the data, it is important to provide a sensible default value for the score.

    It is possible to skip the construction of the Scorer when constructing the Utility object. The two following calls are equivalent:

    from pydvl.utils import Utility, Scorer\nutility = Utility(\nmodel, dataset, \"explained_variance\", score_range=(-np.inf, 1), default_score=0.0\n)\nutility = Utility(\nmodel, dataset, Scorer(\"explained_variance\", range=(-np.inf, 1), default=0.0)\n)\n
    "},{"location":"value/#learning-the-utility","title":"Learning the utility","text":"

    Because each evaluation of the utility entails a full retrain of the model with a new subset of the training set, it is natural to try to learn this mapping from subsets to scores. This is the idea behind Data Utility Learning (DUL) (Wang et al., 2022)7 and in pyDVL it's as simple as wrapping the Utility inside DataUtilityLearning:

    from pydvl.utils import Utility, DataUtilityLearning, Dataset\nfrom sklearn.linear_model import LinearRegression, LogisticRegression\nfrom sklearn.datasets import load_iris\ndataset = Dataset.from_sklearn(load_iris())\nu = Utility(LogisticRegression(), dataset, enable_cache=False)\ntraining_budget = 3\nwrapped_u = DataUtilityLearning(u, training_budget, LinearRegression())\n# First 3 calls will be computed normally\nfor i in range(training_budget):\n_ = wrapped_u((i,))\n# Subsequent calls will be computed using the fit model for DUL\nwrapped_u((1, 2, 3))\n

    As you can see, all that is required is a model to learn the utility itself and the fitting and using of the learned model happens behind the scenes.

    There is a longer example with an investigation of the results achieved by DUL in a dedicated notebook.

    "},{"location":"value/#leave-one-out-values","title":"Leave-One-Out values","text":"

    LOO is the simplest approach to valuation. It assigns to each sample its marginal utility as value:

    \\[v_u(i) = u(D) \u2212 u(D_{-i}).\\]

    For notational simplicity, we consider the valuation function as defined over the indices of the dataset \\(D\\), and \\(i \\in D\\) is the index of the sample, \\(D_{-i}\\) is the training set without the sample \\(x_i\\), and \\(u\\) is the utility function.

    For the purposes of data valuation, this is rarely useful beyond serving as a baseline for benchmarking. Although in some benchmarks it can perform astonishingly well on occasion. One particular weakness is that it does not necessarily correlate with an intrinsic value of a sample: since it is a marginal utility, it is affected by diminishing returns. Often, the training set is large enough for a single sample not to have any significant effect on training performance, despite any qualities it may possess. Whether this is indicative of low value or not depends on each one's goals and definitions, but other methods are typically preferable.

    from pydvl.value.loo import compute_loo\nvalues = compute_loo(utility, n_jobs=-1)\n

    The return value of all valuation functions is an object of type ValuationResult. This can be iterated over, indexed with integers, slices and Iterables, as well as converted to a pandas.DataFrame.

    "},{"location":"value/#problems-of-data-values","title":"Problems of data values","text":"

    There are a number of factors that affect how useful values can be for your project. In particular, regression can be especially tricky, but the particular nature of every (non-trivial) ML problem can have an effect:

    • Unbounded utility: Choosing a scorer for a classifier is simple: accuracy or some F-score provides a bounded number with a clear interpretation. However, in regression problems most scores, like \\(R^2\\), are not bounded because regressors can be arbitrarily bad. This leads to great variability in the utility for low sample sizes, and hence unreliable Monte Carlo approximations to the values. Nevertheless, in practice it is only the ranking of samples that matters, and this tends to be accurate (wrt. to the true ranking) despite inaccurate values.

      Squashing scores

      pyDVL offers a dedicated function composition for scorer functions which can be used to squash a score. The following is defined in module score:

      import numpy as np\nfrom pydvl.utils import compose_score\ndef sigmoid(x: float) -> float:\nreturn float(1 / (1 + np.exp(-x)))\nsquashed_r2 = compose_score(\"r2\", sigmoid, \"squashed r2\")\nsquashed_variance = compose_score(\n\"explained_variance\", sigmoid, \"squashed explained variance\"\n)\n
      These squashed scores can prove useful in regression problems, but they can also introduce issues in the low-value regime.

    • High variance utility: Classical applications of game theoretic value concepts operate with deterministic utilities, but in ML we use an evaluation of the model on a validation set as a proxy for the true risk. Even if the utility is bounded, if it has high variance then values will also have high variance, as will their Monte Carlo estimates. One workaround in pyDVL is to configure the caching system to allow multiple evaluations of the utility for every index set. A moving average is computed and returned once the standard error is small, see MemcachedConfig. (Wang and Jia, 2022)5 prove that by relaxing one of the Shapley axioms and considering the general class of semi-values, of which Shapley is an instance, one can prove that a choice of constant weights is the best one can do in a utility-agnostic setting. This method, dubbed Data Banzhaf, is available in pyDVL as compute_banzhaf_semivalues.

    • Data set size: Computing exact Shapley values is NP-hard, and Monte Carlo approximations can converge slowly. Massive datasets are thus impractical, at least with game-theoretical methods. A workaround is to group samples and investigate their value together. You can do this using GroupedDataset. There is a fully worked-out example here. Some algorithms also provide different sampling strategies to reduce the variance, but due to a no-free-lunch-type theorem, no single strategy can be optimal for all utilities.

    • Model size: Since every evaluation of the utility entails retraining the whole model on a subset of the data, large models require great amounts of computation. But also, they will effortlessly interpolate small to medium datasets, leading to great variance in the evaluation of performance on the dedicated validation set. One mitigation for this problem is cross-validation, but this would incur massive computational cost. As of v.0.7.0 there are no facilities in pyDVL for cross-validating the utility (note that this would require cross-validating the whole value computation).

    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning, in: Proceedings of the 36th International Conference on Machine Learning, PMLR. Presented at the International Conference on Machine Learning (ICML 2019), PMLR, pp. 2242\u20132251.\u00a0\u21a9

    2. Kwon, Y., Rivas, M.A., Zou, J., 2021. Efficient Computation and Analysis of Distributional Shapley Values, in: Proceedings of the 24th International Conference on Artificial Intelligence and Statistics. Presented at the International Conference on Artificial Intelligence and Statistics, PMLR, pp. 793\u2013801.\u00a0\u21a9

    3. Schoch, S., Xu, H., Ji, Y., 2022. CS-Shapley: Class-wise Shapley Values for Data Valuation in Classification, in: Proc. Of the Thirty-Sixth Conference on Neural Information Processing Systems (NeurIPS). Presented at the Advances in Neural Information Processing Systems (NeurIPS 2022).\u00a0\u21a9

    4. Kwon, Y., Zou, J., 2022. Beta Shapley: A Unified and Noise-reduced Data Valuation Framework for Machine Learning, in: Proceedings of the 25th International Conference on Artificial Intelligence and Statistics (AISTATS) 2022,. Presented at the AISTATS 2022, PMLR.\u00a0\u21a9

    5. Wang, J.T., Jia, R., 2022. Data Banzhaf: A Robust Data Valuation Framework for Machine Learning [WWW Document]. https://doi.org/10.48550/arXiv.2205.15466 \u21a9\u21a9

    6. Yan, T., Procaccia, A.D., 2021. If You Like Shapley Then You\u2019ll Love the Core, in: Proceedings of the 35th AAAI Conference on Artificial Intelligence, 2021. Presented at the AAAI Conference on Artificial Intelligence, Association for the Advancement of Artificial Intelligence, pp. 5751\u20135759. https://doi.org/10.1609/aaai.v35i6.16721 \u21a9

    7. Wang, T., Yang, Y., Jia, R., 2022. Improving Cooperative Game Theory-based Data Valuation via Data Utility Learning. Presented at the International Conference on Learning Representations (ICLR 2022). Workshop on Socially Responsible Machine Learning, arXiv. https://doi.org/10.48550/arXiv.2107.06336 \u21a9\u21a9

    8. Wu, Z., Shu, Y., Low, B.K.H., 2022. DAVINZ: Data Valuation using Deep Neural Networks at Initialization, in: Proceedings of the 39th International Conference on Machine Learning. Presented at the International Conference on Machine Learning, PMLR, pp. 24150\u201324176.\u00a0\u21a9

    9. Just, H.A., Kang, F., Wang, T., Zeng, Y., Ko, M., Jin, M., Jia, R., 2023. LAVA: Data Valuation without Pre-Specified Learning Algorithms. Presented at the The Eleventh International Conference on Learning Representations (ICLR 2023).\u00a0\u21a9

    "},{"location":"value/notation/","title":"Notation for valuation","text":"

    The following notation is used throughout the documentation:

    Let \\(D = \\{x_1, \\ldots, x_n\\}\\) be a training set of \\(n\\) samples.

    The utility function \\(u:\\mathcal{D} \\rightarrow \\mathbb{R}\\) maps subsets of \\(D\\) to real numbers.

    The value \\(v\\) of the \\(i\\)-th sample in dataset \\(D\\) wrt. utility \\(u\\) is denoted as \\(v_u(x_i)\\) or simply \\(v(i)\\).

    For any \\(S \\subseteq D\\), we donote by \\(S_{-i}\\) the set of samples in \\(D\\) excluding \\(x_i\\), and \\(S_{+i}\\) denotes the set \\(S\\) with \\(x_i\\) added.

    The marginal utility of adding sample \\(x_i\\) to a subset \\(S\\) is denoted as \\(\\delta(i) := u(S_{+i}) - u(S)\\).

    The set \\(D_{-i}^{(k)}\\) contains all subsets of \\(D\\) of size \\(k\\) that do not include sample \\(x_i\\).

    "},{"location":"value/semi-values/","title":"Semi-values","text":"

    SV is a particular case of a more general concept called semi-value, which is a generalization to different weighting schemes. A semi-value is any valuation function with the form:

    \\[ v_\\text{semi}(i) = \\sum_{i=1}^n w(k) \\sum_{S \\subset D_{-i}^{(k)}} [u(S_{+i}) - u(S)], \\]

    where the coefficients \\(w(k)\\) satisfy the property:

    \\[\\sum_{k=1}^n w(k) = 1,\\]

    the set \\(D_{-i}^{(k)}\\) contains all subsets of \\(D\\) of size \\(k\\) that do not include sample \\(x_i\\), \\(S_{+i}\\) is the set \\(S\\) with \\(x_i\\) added, and \\(u\\) is the utility function.

    Two instances of this are Banzhaf indices (Wang and Jia, 2022)1, and Beta Shapley (Kwon and Zou, 2022)2, with better numerical and rank stability in certain situations.

    Note

    Shapley values are a particular case of semi-values and can therefore also be computed with the methods described here. However, as of version 0.7.0, we recommend using compute_shapley_values instead, in particular because it implements truncation policies for TMCS.

    "},{"location":"value/semi-values/#beta-shapley","title":"Beta Shapley","text":"

    For some machine learning applications, where the utility is typically the performance when trained on a set \\(S \\subset D\\), diminishing returns are often observed when computing the marginal utility of adding a new data point.

    Beta Shapley is a weighting scheme that uses the Beta function to place more weight on subsets deemed to be more informative. The weights are defined as:

    \\[ w(k) := \\frac{B(k+\\beta, n-k+1+\\alpha)}{B(\\alpha, \\beta)}, \\]

    where \\(B\\) is the Beta function, and \\(\\alpha\\) and \\(\\beta\\) are parameters that control the weighting of the subsets. Setting both to 1 recovers Shapley values, and setting \\(\\alpha = 1\\), and \\(\\beta = 16\\) is reported in (Kwon and Zou, 2022)2 to be a good choice for some applications. Beta Shapley values are available in pyDVL through compute_beta_shapley_semivalues:

    from pydvl.value import *\nutility = Utility(model, data)\nvalues = compute_beta_shapley_semivalues(\nu=utility, done=AbsoluteStandardError(threshold=1e-4), alpha=1, beta=16\n)\n

    See however the Banzhaf indices section for an alternative choice of weights which is reported to work better.

    "},{"location":"value/semi-values/#banzhaf-indices","title":"Banzhaf indices","text":"

    As noted in the section Problems of Data Values, the Shapley value can be very sensitive to variance in the utility function. For machine learning applications, where the utility is typically the performance when trained on a set \\(S \\subset D\\), this variance is often largest for smaller subsets \\(S\\). It is therefore reasonable to try reducing the relative contribution of these subsets with adequate weights.

    One such choice of weights is the Banzhaf index, which is defined as the constant:

    \\[w(k) := 2^{n-1},\\]

    for all set sizes \\(k\\). The intuition for picking a constant weight is that for any choice of weight function \\(w\\), one can always construct a utility with higher variance where \\(w\\) is greater. Therefore, in a worst-case sense, the best one can do is to pick a constant weight.

    The authors of (Wang and Jia, 2022)1 show that Banzhaf indices are more robust to variance in the utility function than Shapley and Beta Shapley values. They are available in pyDVL through compute_banzhaf_semivalues:

    from pydvl.value import *\nutility = Utility(model, data)\nvalues = compute_banzhaf_semivalues(\nu=utility, done=AbsoluteStandardError(threshold=1e-4), alpha=1, beta=16\n)\n
    "},{"location":"value/semi-values/#general-semi-values","title":"General semi-values","text":"

    As explained above, both Beta Shapley and Banzhaf indices are special cases of semi-values. In pyDVL we provide a general method for computing these with any combination of the three ingredients that define a semi-value:

    • A utility function \\(u\\).
    • A sampling method
    • A weighting scheme \\(w\\).

    You can construct any combination of these three ingredients with compute_generic_semivalues. The utility function is the same as for Shapley values, and the sampling method can be any of the types defined in the samplers module. For instance, the following snippet is equivalent to the above:

    from pydvl.value import *\ndata = Dataset(...)\nutility = Utility(model, data)\nvalues = compute_generic_semivalues(\nsampler=PermutationSampler(data.indices),\nu=utility,\ncoefficient=beta_coefficient(alpha=1, beta=16),\ndone=AbsoluteStandardError(threshold=1e-4),\n)\n

    Allowing any coefficient can help when experimenting with models which are more sensitive to changes in training set size. However, Data Banzhaf indices are proven to be the most robust to variance in the utility function, in the sense of rank stability, across a range of models and datasets (Wang and Jia, 2022)1.

    Careful with permutation sampling

    This generic implementation of semi-values allowing for any combination of sampling and weighting schemes is very flexible and, in principle, it recovers the original Shapley value, so that compute_shapley_values is no longer necessary. However, it loses the optimization in permutation sampling that reuses the utility computation from the last iteration when iterating over a permutation. This doubles the computation requirements (and slightly increases variance) when using permutation sampling, unless the cache is enabled. In addition, as mentioned above, truncation policies are not supported by this generic implementation (as of v0.7.0). For these reasons it is preferable to use compute_shapley_values whenever not computing other semi-values.

    1. Wang, J.T., Jia, R., 2022. Data Banzhaf: A Robust Data Valuation Framework for Machine Learning [WWW Document]. https://doi.org/10.48550/arXiv.2205.15466 \u21a9\u21a9\u21a9

    2. Kwon, Y., Zou, J., 2022. Beta Shapley: A Unified and Noise-reduced Data Valuation Framework for Machine Learning, in: Proceedings of the 25th International Conference on Artificial Intelligence and Statistics (AISTATS) 2022,. Presented at the AISTATS 2022, PMLR.\u00a0\u21a9\u21a9

    "},{"location":"value/shapley/","title":"Shapley value","text":""},{"location":"value/shapley/#shapley-value","title":"Shapley value","text":"

    The Shapley method is an approach to compute data values originating in cooperative game theory. Shapley values are a common way of assigning payoffs to each participant in a cooperative game (i.e. one in which players can form coalitions) in a way that ensures that certain axioms are fulfilled.

    pyDVL implements several methods for the computation and approximation of Shapley values. They can all be accessed via the facade function compute_shapley_values. The supported methods are enumerated in ShapleyMode.

    Empirically, the most useful method is the so-called Truncated Monte Carlo Shapley (Ghorbani and Zou, 2019)1, which is a Monte Carlo approximation of the permutation Shapley value.

    "},{"location":"value/shapley/#combinatorial-shapley","title":"Combinatorial Shapley","text":"

    The first algorithm is just a verbatim implementation of the definition. As such it returns as exact a value as the utility function allows (see what this means in Problems of Data Values).

    The value \\(v\\) of the \\(i\\)-th sample in dataset \\(D\\) wrt. utility \\(u\\) is computed as a weighted sum of its marginal utility wrt. every possible coalition of training samples within the training set:

    \\[ v(i) = \\frac{1}{n} \\sum_{S \\subseteq D_{-i}} \\binom{n-1}{ | S | }^{-1} [u(S_{+i}) \u2212 u(S)] ,\\]

    where \\(D_{-i}\\) denotes the set of samples in \\(D\\) excluding \\(x_i\\), and \\(S_{+i}\\) denotes the set \\(S\\) with \\(x_i\\) added.

    from pydvl.value import compute_shapley_values\nvalues = compute_shapley_values(utility, mode=\"combinatorial_exact\")\ndf = values.to_dataframe(column='value')\n

    We can convert the return value to a pandas.DataFrame. and name the column with the results as value. Please refer to the documentation in shapley and ValuationResult for more information.

    "},{"location":"value/shapley/#monte-carlo-combinatorial-shapley","title":"Monte Carlo Combinatorial Shapley","text":"

    Because the number of subsets \\(S \\subseteq D_{-i}\\) is \\(2^{ | D | - 1 }\\), one typically must resort to approximations. The simplest one is done via Monte Carlo sampling of the powerset \\(\\mathcal{P}(D)\\). In pyDVL this simple technique is called \"Monte Carlo Combinatorial\". The method has very poor converge rate and others are preferred, but if desired, usage follows the same pattern:

    from pydvl.value import compute_shapley_values, MaxUpdates\nvalues = compute_shapley_values(\nutility, mode=\"combinatorial_montecarlo\", done=MaxUpdates(1000)\n)\ndf = values.to_dataframe(column='cmc')\n

    The DataFrames returned by most Monte Carlo methods will contain approximate standard errors as an additional column, in this case named cmc_stderr.

    Note the usage of the object MaxUpdates as the stop condition. This is an instance of a StoppingCriterion. Other examples are MaxTime and AbsoluteStandardError.

    "},{"location":"value/shapley/#owen-sampling","title":"Owen sampling","text":"

    Owen Sampling (Okhrati and Lipani, 2021)2 is a practical algorithm based on the combinatorial definition. It uses a continuous extension of the utility from \\(\\{0,1\\}^n\\), where a 1 in position \\(i\\) means that sample \\(x_i\\) is used to train the model, to \\([0,1]^n\\). The ensuing expression for Shapley value uses integration instead of discrete weights:

    \\[ v_u(i) = \\int_0^1 \\mathbb{E}_{S \\sim P_q(D_{-i})} [u(S_{+i}) - u(S)]. \\]

    Using Owen sampling follows the same pattern as every other method for Shapley values in pyDVL. First construct the dataset and utility, then call compute_shapley_values:

    from pydvl.value import compute_shapley_values\nvalues = compute_shapley_values(\nu=utility, mode=\"owen\", n_iterations=4, max_q=200\n)\n

    There are more details on Owen sampling, and its variant Antithetic Owen Sampling in the documentation for the function doing the work behind the scenes: owen_sampling_shapley.

    Note that in this case we do not pass a StoppingCriterion to the function, but instead the number of iterations and the maximum number of samples to use in the integration.

    "},{"location":"value/shapley/#permutation-shapley","title":"Permutation Shapley","text":"

    An equivalent way of computing Shapley values (ApproShapley) appeared in (Castro et al., 2009)3 and is the basis for the method most often used in practice. It uses permutations over indices instead of subsets:

    \\[ v_u(x_i) = \\frac{1}{n!} \\sum_{\\sigma \\in \\Pi(n)} [u(\\sigma_{:i} \\cup \\{x_i\\}) \u2212 u(\\sigma_{:i})], \\]

    where \\(\\sigma_{:i}\\) denotes the set of indices in permutation sigma before the position where \\(i\\) appears. To approximate this sum (which has \\(\\mathcal{O}(n!)\\) terms!) one uses Monte Carlo sampling of permutations, something which has surprisingly low sample complexity. One notable difference wrt. the combinatorial approach above is that the approximations always fulfill the efficiency axiom of Shapley, namely \\(\\sum_{i=1}^n \\hat{v}_i = u(D)\\) (see (Castro et al., 2009)3, Proposition 3.2).

    By adding two types of early stopping, the result is the so-called Truncated Monte Carlo Shapley (Ghorbani and Zou, 2019)1, which is efficient enough to be useful in applications. The first is simply a convergence criterion, of which there are several to choose from. The second is a criterion to truncate the iteration over single permutations. RelativeTruncation chooses to stop iterating over samples in a permutation when the marginal utility becomes too small.

    from pydvl.value import compute_shapley_values, MaxUpdates, RelativeTruncation\nvalues = compute_shapley_values(\nu=utility,\nmode=\"permutation_montecarlo\",\ndone=MaxUpdates(1000),\ntruncation=RelativeTruncation(utility, rtol=0.01)\n)\n

    You can see this method in action in this example using the Spotify dataset.

    "},{"location":"value/shapley/#exact-shapley-for-knn","title":"Exact Shapley for KNN","text":"

    It is possible to exploit the local structure of K-Nearest Neighbours to reduce the amount of subsets to consider: because no sample besides the K closest affects the score, most are irrelevant and it is possible to compute a value in linear time. This method was introduced by (Jia et al., 2019)4, and can be used in pyDVL with:

    from pydvl.utils import Dataset, Utility\nfrom pydvl.value import compute_shapley_values\nfrom sklearn.neighbors import KNeighborsClassifier\nmodel = KNeighborsClassifier(n_neighbors=5)\ndata = Dataset(...)\nutility = Utility(model, data)\nvalues = compute_shapley_values(u=utility, mode=\"knn\")\n
    "},{"location":"value/shapley/#group-testing","title":"Group testing","text":"

    An alternative approach introduced in (Jia et al., 2019)4 first approximates the differences of values with a Monte Carlo sum. With

    \\[\\hat{\\Delta}_{i j} \\approx v_i - v_j,\\]

    one then solves the following linear constraint satisfaction problem (CSP) to infer the final values:

    \\[ \\begin{array}{lll} \\sum_{i = 1}^N v_i & = & U (D)\\\\ | v_i - v_j - \\hat{\\Delta}_{i j} | & \\leqslant & \\frac{\\varepsilon}{2 \\sqrt{N}} \\end{array} \\]

    Warning

    We have reproduced this method in pyDVL for completeness and benchmarking, but we don't advocate its use because of the speed and memory cost. Despite our best efforts, the number of samples required in practice for convergence can be several orders of magnitude worse than with e.g. TMCS. Additionally, the CSP can sometimes turn out to be infeasible.

    Usage follows the same pattern as every other Shapley method, but with the addition of an epsilon parameter required for the solution of the CSP. It should be the same value used to compute the minimum number of samples required. This can be done with num_samples_eps_delta, but note that the number returned will be huge! In practice, fewer samples can be enough, but the actual number will strongly depend on the utility, in particular its variance.

    from pydvl.utils import Dataset, Utility\nfrom pydvl.value import compute_shapley_values\nmodel = ...\ndata = Dataset(...)\nutility = Utility(model, data, score_range=(_min, _max))\nmin_iterations = num_samples_eps_delta(epsilon, delta, n, utility.score_range)\nvalues = compute_shapley_values(\nu=utility, mode=\"group_testing\", n_iterations=min_iterations, eps=eps\n)\n
    1. Ghorbani, A., Zou, J., 2019. Data Shapley: Equitable Valuation of Data for Machine Learning, in: Proceedings of the 36th International Conference on Machine Learning, PMLR. Presented at the International Conference on Machine Learning (ICML 2019), PMLR, pp. 2242\u20132251.\u00a0\u21a9\u21a9

    2. Okhrati, R., Lipani, A., 2021. A Multilinear Sampling Algorithm to Estimate Shapley Values, in: 2020 25th International Conference on Pattern Recognition (ICPR). Presented at the 2020 25th International Conference on Pattern Recognition (ICPR), IEEE, pp. 7992\u20137999. https://doi.org/10.1109/ICPR48806.2021.9412511 \u21a9

    3. Castro, J., G\u00f3mez, D., Tejada, J., 2009. Polynomial calculation of the Shapley value based on sampling. Computers & Operations Research, Selected papers presented at the Tenth International Symposium on Locational Decisions (ISOLDE X) 36, 1726\u20131730. https://doi.org/10.1016/j.cor.2008.04.004 \u21a9\u21a9

    4. Jia, R., Dao, D., Wang, B., Hubis, F.A., Gurel, N.M., Li, B., Zhang, C., Spanos, C., Song, D., 2019. Efficient task-specific data valuation for nearest neighbor algorithms. Proc. VLDB Endow. 12, 1610\u20131623. https://doi.org/10.14778/3342263.3342637 \u21a9\u21a9

    "},{"location":"value/the-core/","title":"Core values","text":"

    The Shapley values define a fair way to distribute payoffs amongst all participants when they form a grand coalition. But they do not consider the question of stability: under which conditions do all participants form the grand coalition? Would the participants be willing to form the grand coalition given how the payoffs are assigned, or would some of them prefer to form smaller coalitions?

    The Core is another approach to computing data values originating in cooperative game theory that attempts to ensure this stability. It is the set of feasible payoffs that cannot be improved upon by a coalition of the participants.

    It satisfies the following 2 properties:

    • Efficiency: The payoffs are distributed such that it is not possible to make any participant better off without making another one worse off. \\(\\(\\sum_{i\\in D} v(i) = u(D)\\,\\)\\)

    • Coalitional rationality: The sum of payoffs to the agents in any coalition S is at least as large as the amount that these agents could earn by forming a coalition on their own. \\(\\(\\sum_{i \\in S} v(i) \\geq u(S), \\forall S \\subset D\\,\\)\\)

    The second property states that the sum of payoffs to the agents in any subcoalition \\(S\\) is at least as large as the amount that these agents could earn by forming a coalition on their own.

    "},{"location":"value/the-core/#least-core-values","title":"Least Core values","text":"

    Unfortunately, for many cooperative games the Core may be empty. By relaxing the coalitional rationality property by a subsidy \\(e \\gt 0\\), we are then able to find approximate payoffs:

    \\[ \\sum_{i\\in S} v(i) + e \\geq u(S), \\forall S \\subset D, S \\neq \\emptyset \\ ,\\]

    The least core value \\(v\\) of the \\(i\\)-th sample in dataset \\(D\\) wrt. utility \\(u\\) is computed by solving the following Linear Program:

    \\[ \\begin{array}{lll} \\text{minimize} & e & \\\\ \\text{subject to} & \\sum_{i\\in D} v(i) = u(D) & \\\\ & \\sum_{i\\in S} v(i) + e \\geq u(S) &, \\forall S \\subset D, S \\neq \\emptyset \\\\ \\end{array} \\]"},{"location":"value/the-core/#exact-least-core","title":"Exact Least Core","text":"

    This first algorithm is just a verbatim implementation of the definition. As such it returns as exact a value as the utility function allows (see what this means in Problems of Data Values][problems-of-data-values]).

    from pydvl.value import compute_least_core_values\nvalues = compute_least_core_values(utility, mode=\"exact\")\n
    "},{"location":"value/the-core/#monte-carlo-least-core","title":"Monte Carlo Least Core","text":"

    Because the number of subsets \\(S \\subseteq D \\setminus \\{i\\}\\) is \\(2^{ | D | - 1 }\\), one typically must resort to approximations.

    The simplest approximation consists in using a fraction of all subsets for the constraints. (Yan and Procaccia, 2021)1 show that a quantity of order \\(\\mathcal{O}((n - \\log \\Delta ) / \\delta^2)\\) is enough to obtain a so-called \\(\\delta\\)-approximate least core with high probability. I.e. the following property holds with probability \\(1-\\Delta\\) over the choice of subsets:

    \\[ \\mathbb{P}_{S\\sim D}\\left[\\sum_{i\\in S} v(i) + e^{*} \\geq u(S)\\right] \\geq 1 - \\delta, \\]

    where \\(e^{*}\\) is the optimal least core subsidy.

    from pydvl.value import compute_least_core_values\nvalues = compute_least_core_values(\nutility, mode=\"montecarlo\", n_iterations=n_iterations\n)\n

    Note

    Although any number is supported, it is best to choose n_iterations to be at least equal to the number of data points.

    Because computing the Least Core values requires the solution of a linear and a quadratic problem after computing all the utility values, we offer the possibility of splitting the latter from the former. This is useful when running multiple experiments: use mclc_prepare_problem to prepare a list of problems to solve, then solve them in parallel with lc_solve_problems.

    from pydvl.value.least_core import mclc_prepare_problem, lc_solve_problems\nn_experiments = 10\nproblems = [mclc_prepare_problem(utility, n_iterations=n_iterations)\nfor _ in range(n_experiments)]\nvalues = lc_solve_problems(problems)\n
    "},{"location":"value/the-core/#method-comparison","title":"Method comparison","text":"

    The TransferLab team reproduced the results of the original paper in a publication for the 2022 MLRC (Benmerzoug and Delgado, 2023)2.

    Best sample removal on binary image classification

    Roughly speaking, MCLC performs better in identifying high value points, as measured by best-sample removal tasks. In all other aspects, it performs worse or similarly to TMCS at comparable sample budgets. But using an equal number of subsets is more computationally expensive because of the need to solve large linear and quadratic optimization problems.

    Worst sample removal on binary image classification

    For these reasons we recommend some variation of SV like TMCS for outlier detection, data cleaning and pruning, and perhaps MCLC for the selection of interesting points to be inspected for the improvement of data collection or model design.

    1. Yan, T., Procaccia, A.D., 2021. If You Like Shapley Then You\u2019ll Love the Core, in: Proceedings of the 35th AAAI Conference on Artificial Intelligence, 2021. Presented at the AAAI Conference on Artificial Intelligence, Association for the Advancement of Artificial Intelligence, pp. 5751\u20135759. https://doi.org/10.1609/aaai.v35i6.16721 \u21a9

    2. Benmerzoug, A., Delgado, M. de B., 2023. [Re] If you like Shapley, then you\u2019ll love the core. ReScience C 9. https://doi.org/10.5281/zenodo.8173733 \u21a9

    "}]} \ No newline at end of file diff --git a/devel/sitemap.xml b/devel/sitemap.xml index 54281c5ba..763aad46e 100644 --- a/devel/sitemap.xml +++ b/devel/sitemap.xml @@ -2,372 +2,372 @@ https://aai-institute.github.io/pyDVL/stable/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/CHANGELOG/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/influence/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/influence/general/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/influence/inversion/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/influence/twice_differentiable/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/influence/torch/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/influence/torch/functional/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/influence/torch/torch_differentiable/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/influence/torch/util/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/parallel/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/parallel/backend/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/parallel/config/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/parallel/map_reduce/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/parallel/backends/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/parallel/backends/joblib/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/parallel/backends/ray/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/parallel/futures/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/parallel/futures/ray/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/reporting/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/reporting/plots/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/reporting/scores/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/caching/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/config/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/dataset/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/functional/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/numeric/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/parallel/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/progress/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/score/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/status/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/types/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/utils/utility/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/result/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/sampler/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/semivalues/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/stopping/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/least_core/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/least_core/common/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/least_core/montecarlo/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/least_core/naive/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/loo/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/loo/loo/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/loo/naive/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/oob/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/oob/oob/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/shapley/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/shapley/common/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/shapley/gt/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/shapley/knn/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/shapley/montecarlo/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/shapley/naive/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/shapley/owen/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/shapley/truncated/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/api/pydvl/value/shapley/types/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/examples/data_oob/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/examples/influence_imagenet/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/examples/influence_synthetic/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/examples/influence_wine/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/examples/least_core_basic/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/examples/shapley_basic_spotify/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/examples/shapley_knn_flowers/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/examples/shapley_utility_learning/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/getting-started/first-steps/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/getting-started/installation/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/influence/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/value/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/value/notation/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/value/semi-values/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/value/shapley/ - 2023-09-18 + 2023-09-20 daily https://aai-institute.github.io/pyDVL/stable/value/the-core/ - 2023-09-18 + 2023-09-20 daily \ No newline at end of file diff --git a/devel/sitemap.xml.gz b/devel/sitemap.xml.gz index 53f3efa26..59fdd8f0e 100644 Binary files a/devel/sitemap.xml.gz and b/devel/sitemap.xml.gz differ diff --git a/devel/value/index.html b/devel/value/index.html index 43cc01e56..7ee0148c0 100644 --- a/devel/value/index.html +++ b/devel/value/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2956,11 +2956,11 @@

    Problems of data values Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/value/notation/index.html b/devel/value/notation/index.html index 972b5cfdb..c30210ee7 100644 --- a/devel/value/notation/index.html +++ b/devel/value/notation/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2469,11 +2469,11 @@

    Notation for valuation Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/value/semi-values/index.html b/devel/value/semi-values/index.html index a134d89f5..dcf91f261 100644 --- a/devel/value/semi-values/index.html +++ b/devel/value/semi-values/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2667,11 +2667,11 @@

    General semi-values2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/value/shapley/index.html b/devel/value/shapley/index.html index eb73660d6..37c027cd0 100644 --- a/devel/value/shapley/index.html +++ b/devel/value/shapley/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2788,11 +2788,11 @@

    Group testing Last update: - 2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20 diff --git a/devel/value/the-core/index.html b/devel/value/the-core/index.html index 04b36c1c9..e327cc0ca 100644 --- a/devel/value/the-core/index.html +++ b/devel/value/the-core/index.html @@ -18,7 +18,7 @@ - + @@ -26,7 +26,7 @@ - + @@ -2674,11 +2674,11 @@

    Method comparison2023-09-18 + 2023-09-20
    Created: - 2023-09-18 + 2023-09-20