Skip to content

Commit

Permalink
Add and improve docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
schroedk committed May 22, 2024
1 parent 3b7289c commit b0e297a
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 9 deletions.
8 changes: 0 additions & 8 deletions src/pydvl/influence/base_influence_function_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,6 @@ class ComposableInfluence(

block_mapper: BlockMapperType

@property
def n_parameters(self):
return super().n_parameters()

@property
def is_thread_safe(self) -> bool:
return False

@property
def is_fitted(self):
try:
Expand Down
36 changes: 36 additions & 0 deletions src/pydvl/influence/torch/influence_function_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1812,6 +1812,19 @@ class TorchOperatorGradientComposition(
torch.Tensor, TorchBatch, TorchOperator, TorchPerSampleGradientProvider
]
):
"""
Representing a composable block that integrates an [TorchOperator]
[pydvl.influence.torch.operator.base.TorchOperator] and
a [TorchPerSampleGradientProvider]
[pydvl.influence.torch.operator.gradient_provider.TorchPerSampleGradientProvider]
This block is designed to be flexible, handling different computational modes via
an abstract operator and gradient provider.
"""

def __init__(self, op: TorchOperator, gp: TorchPerSampleGradientProvider):
super().__init__(op, gp)

def to(self, device: torch.device):
self.gp = self.gp.to(device)
self.op = self.op.to(device)
Expand All @@ -1821,6 +1834,20 @@ def to(self, device: torch.device):
class TorchBlockMapper(
BlockMapper[torch.Tensor, TorchBatch, TorchOperatorGradientComposition]
):
"""
Class for mapping operations across multiple compositional blocks represented by
instances of [TorchOperatorGradientComposition]
[pydvl.influence.torch.influence_function_model.TorchOperatorGradientComposition].
This class takes a dictionary of compositional blocks and applies their methods to
batches or tensors, and aggregates the results.
"""

def __init__(
self, composable_block_dict: OrderedDict[str, TorchOperatorGradientComposition]
):
super().__init__(composable_block_dict)

def _split_to_blocks(
self, z: torch.Tensor, dim: int = -1
) -> OrderedDict[str, torch.Tensor]:
Expand All @@ -1844,6 +1871,7 @@ def to(self, device: torch.device):
class TorchComposableInfluence(
ComposableInfluence[torch.Tensor, TorchBatch, DataLoader, TorchBlockMapper],
ModelInfoMixin,
ABC,
):
def __init__(
self,
Expand Down Expand Up @@ -1949,6 +1977,14 @@ def __init__(
self.gradient_provider_factory = TorchPerSampleAutoGrad
self.loss = loss

@property
def n_parameters(self):
return super().n_parameters()

@property
def is_thread_safe(self) -> bool:
return False

@staticmethod
def _validate_regularization(
block_name: str, value: Optional[float]
Expand Down
103 changes: 103 additions & 0 deletions src/pydvl/influence/torch/operator/gradient_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,29 @@
class TorchPerSampleGradientProvider(
PerSampleGradientProvider[TorchBatch, torch.Tensor], ABC
):
r"""
Abstract base class for calculating per-sample gradients of a function defined by
a [torch.nn.Module][torch.nn.Module] and a loss function.
This class must be subclassed with implementations for its abstract methods tailored
to specific gradient computation needs, e.g. using [torch.autograd][torch.autograd]
or stochastic finite differences.
Consider a function
$$ \ell: \mathbb{R}^{d_1} \times \mathbb{R}^{d_2} \times \mathbb{R}^{n} \times
\mathbb{R}^{n}, \quad \ell(\omega_1, \omega_2, x, y) =
\operatorname{loss}(f(\omega_1, \omega_2; x), y) $$
e.g. a two layer neural network $f$ with a loss function, then this object should
compute the expressions:
$$ \nabla_{\omega_{i}}\ell(\omega_1, \omega_2, x, y),
\nabla_{\omega_{i}}\nabla_{x}\ell(\omega_1, \omega_2, x, y),
\nabla_{\omega}\ell(\omega_1, \omega_2, x, y) \cdot v$$
"""

def __init__(
self,
model: torch.nn.Module,
Expand Down Expand Up @@ -76,12 +99,52 @@ def _detach_dict(tensor_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor
return {k: g.detach() if g.requires_grad else g for k, g in tensor_dict.items()}

def per_sample_gradient_dict(self, batch: TorchBatch) -> Dict[str, torch.Tensor]:
r"""
Computes and returns a dictionary mapping gradient names to their respective
per-sample gradients. Given the example in the class docstring, this means
$$ \text{result}[\omega_i] = \nabla_{\omega_{i}}\ell(\omega_1, \omega_2,
\text{batch.x}, \text{batch.y}), $$
where the first dimension of the resulting tensors is always considered to be
the batch dimension, so the shape of the resulting tensors are $(N, d_i)$,
where $N$ is the number of samples in the batch.
Args:
batch: The batch of data for which to compute gradients.
Returns:
A dictionary where keys are gradient identifiers and values are the
gradients computed per sample.
"""
gradient_dict = self._per_sample_gradient_dict(batch.to(self.device))
return self._detach_dict(gradient_dict)

def per_sample_mixed_gradient_dict(
self, batch: TorchBatch
) -> Dict[str, torch.Tensor]:
r"""
Computes and returns a dictionary mapping gradient names to their respective
per-sample mixed gradients. In this context, mixed gradients refer to computing
gradients with respect to the instance definition in addition to
compute derivatives with respect to the input batch.
Given the example in the class docstring, this means
$$ \text{result}[\omega_i] = \nabla_{\omega_{i}}\nabla_{x}\ell(\omega_1,
\omega_2, \text{batch.x}, \text{batch.y}), $$
where the first dimension of the resulting tensors is always considered to be
the batch dimension and the last to be the non-batch input related derivatives.
So the shape of the resulting tensors are $(N, n, d_i)$,
where $N$ is the number of samples in the batch.
Args:
batch: The batch of data for which to compute mixed gradients.
Returns:
A dictionary where keys are gradient identifiers and values are the
mixed gradients computed per sample.
"""
gradient_dict = self._per_sample_mixed_gradient_dict(batch.to(self.device))
return self._detach_dict(gradient_dict)

Expand All @@ -90,6 +153,26 @@ def matrix_jacobian_product(
batch: TorchBatch,
g: torch.Tensor,
) -> torch.Tensor:
r"""
Computes the matrix-Jacobian product for the provided batch and input tensor.
Given the example in the class docstring, this means
$$ (\nabla_{\omega_{1}}\ell(\omega_1, \omega_2,
\text{batch.x}, \text{batch.y}),
\nabla_{\omega_{2}}\ell(\omega_1, \omega_2,
\text{batch.x}, \text{batch.y})) \cdot g^T$$
where g must be a tensor of shape $(K, d_1+d_2)$, so the resulting tensor
is of shape $(N, K)$.
Args:
batch: The batch of data for which to compute the Jacobian.
g: The tensor to be used in the matrix-Jacobian product
calculation.
Returns:
The resulting tensor from the matrix-Jacobian product computation.
"""
result = self._matrix_jacobian_product(batch.to(self.device), g.to(self.device))
if result.requires_grad:
result = result.detach()
Expand All @@ -108,6 +191,26 @@ def per_sample_flat_mixed_gradient(self, batch: TorchBatch) -> torch.Tensor:


class TorchPerSampleAutoGrad(TorchPerSampleGradientProvider):
r"""
Compute per-sample gradients of a function defined by
a [torch.nn.Module][torch.nn.Module] and a loss function using
[torch.func][torch.func].
Consider a function
$$ \ell: \mathbb{R}^{d_1} \times \mathbb{R}^{d_2} \times \mathbb{R}^{n} \times
\mathbb{R}^{n}, \quad \ell(\omega_1, \omega_2, x, y) =
\operatorname{loss}(f(\omega_1, \omega_2; x), y) $$
e.g. a two layer neural network $f$ with a loss function, then this object should
compute the expressions:
$$ \nabla_{\omega_{i}}\ell(\omega_1, \omega_2, x, y),
\nabla_{\omega_{i}}\nabla_{x}\ell(\omega_1, \omega_2, x, y),
\nabla_{\omega}\ell(\omega_1, \omega_2, x, y) \cdot v$$
"""

def __init__(
self,
model: torch.nn.Module,
Expand Down
2 changes: 1 addition & 1 deletion src/pydvl/influence/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ class OperatorGradientComposition(
):
"""
Generic base class representing a composable block that integrates an operator and
a gradient provider to compute influences between batches of data.
a gradient provider to compute interactions between batches of data.
This block is designed to be flexible, handling different computational modes via
an abstract operator and gradient provider.
Expand Down

0 comments on commit b0e297a

Please sign in to comment.