From 099930e8cfa67e5ec28e98ed31573b413926198f Mon Sep 17 00:00:00 2001 From: Kristof Schroeder Date: Fri, 14 Jun 2024 17:17:04 +0200 Subject: [PATCH] Rename parameter use_block_cg -> solve_simultaneously to not get confused with block-diagonal approximation --- CHANGELOG.md | 3 +- docs/influence/influence_function_model.md | 38 +++++++++++-------- notebooks/influence_wine.ipynb | 2 +- .../torch/influence_function_model.py | 16 +++----- tests/influence/test_influence_calculator.py | 2 +- tests/influence/torch/test_influence_model.py | 4 +- 6 files changed, 35 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6435bb856..2a21b74d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,7 +58,8 @@ [PR #598](https://github.com/aai-institute/pyDVL/pull/598) - Renaming of parameters of `CgInfluence`, `hessian_regularization` -> `regularization` (modify type annotation), - `pre_conditioner` -> `preconditioner` + `pre_conditioner` -> `preconditioner`, + `use_block_cg` -> `solve_simultaneously` [PR #601](https://github.com/aai-institute/pyDVL/pull/601) - Remove parameter `x0` from `CgInfluence` [PR #601](https://github.com/aai-institute/pyDVL/pull/601) diff --git a/docs/influence/influence_function_model.md b/docs/influence/influence_function_model.md index fcfa0fa27..c4642d5aa 100644 --- a/docs/influence/influence_function_model.md +++ b/docs/influence/influence_function_model.md @@ -23,37 +23,45 @@ gradient method, defined in [@ji_breakdownfree_2017], which solves several right hand sides simultaneously. Optionally, the user can provide a pre-conditioner to improve convergence, such -as a [Jacobi pre-conditioner -][pydvl.influence.torch.pre_conditioner.JacobiPreConditioner], which +as a [Jacobi preconditioner +][pydvl.influence.torch.preconditioner.JacobiPreconditioner], which is a simple [diagonal pre-conditioner]( https://en.wikipedia.org/wiki/Preconditioner#Jacobi_(or_diagonal)_preconditioner) based on Hutchinson's diagonal estimator [@bekas_estimator_2007], -or a [Nyström approximation based pre-conditioner -][pydvl.influence.torch.pre_conditioner.NystroemPreConditioner], +or a [Nyström approximation based preconditioner +][pydvl.influence.torch.preconditioner.NystroemPreconditioner], described in [@frangella_randomized_2023]. ```python -from pydvl.influence.torch import CgInfluence +from pydvl.influence.torch import CgInfluence, BlockMode, SecondOrderMode from pydvl.influence.torch.preconditioner import NystroemPreconditioner if_model = CgInfluence( model, loss, - hessian_regularization=0.0, + regularization=0.0, rtol=1e-7, atol=1e-7, maxiter=None, - use_block_cg=True, - pre_conditioner=NystroemPreconditioner(rank=10) + solve_simultaneously=True, + preconditioner=NystroemPreconditioner(rank=10), + block_structure=BlockMode.FULL, + second_order_mode=SecondOrderMode.HESSIAN ) if_model.fit(train_loader) ``` -The additional optional parameters `rtol`, `atol`, `maxiter`, `use_block_cg` and -`pre_conditioner` are respectively, the relative +The additional optional parameters `rtol`, `atol`, `maxiter`, +`solve_simultaneously` and `preconditioner` are respectively, the relative tolerance, the absolute tolerance, the maximum number of iterations, -a flag indicating whether to use block variant of cg and an optional -pre-conditioner. +a flag indicating whether to use a variant of cg to +simultaneously solving the system for several right hand sides and an optional +preconditioner. + +This implementation is capable of using a block-diagonal +approximation, see +[Block-diagonal approximation](#block-diagonal-approximation), and can handle +[Gauss-Newton approximation](#gauss-newton-approximation). ### Linear time Stochastic Second-Order Approximation (LiSSA) @@ -78,7 +86,7 @@ from pydvl.influence.torch import LissaInfluence, BlockMode, SecondOrderMode if_model = LissaInfluence( model, loss, - regularization=0.0 + regularization=0.0, maxiter=1000, dampen=0.0, scale=10.0, @@ -114,7 +122,7 @@ the Hessian and \(V\) contains the corresponding eigenvectors. See also [@schioppa_scaling_2022]. ```python -from pydvl.influence.torch import ArnoldiInfluence +from pydvl.influence.torch import ArnoldiInfluence, BlockMode, SecondOrderMode if_model = ArnoldiInfluence( model, loss, @@ -207,7 +215,7 @@ see also [@hataya_nystrom_2023] and [@frangella_randomized_2023]. The essential parameter is the rank of the approximation. ```python -from pydvl.influence.torch import NystroemSketchInfluence +from pydvl.influence.torch import NystroemSketchInfluence, BlockMode, SecondOrderMode if_model = NystroemSketchInfluence( model, loss, diff --git a/notebooks/influence_wine.ipynb b/notebooks/influence_wine.ipynb index a0810e303..7ec902438 100644 --- a/notebooks/influence_wine.ipynb +++ b/notebooks/influence_wine.ipynb @@ -750,7 +750,7 @@ " F.cross_entropy,\n", " regularization=0.1,\n", " progress=True,\n", - " use_block_cg=True,\n", + " solve_simultaneously=True,\n", " preconditioner=NystroemPreconditioner(rank=5),\n", ")\n", "cg_influence_model = cg_influence_model.fit(training_data_loader)\n", diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py index acdfdbffa..9e7d96325 100644 --- a/src/pydvl/influence/torch/influence_function_model.py +++ b/src/pydvl/influence/torch/influence_function_model.py @@ -458,14 +458,10 @@ class CgInfluence(TorchComposableInfluence[CgOperator]): maxiter: Maximum number of iterations. If None, defaults to 10*len(b). progress: If True, display progress bars for computing in the non-block mode (use_block_cg=False). - precompute_grad: If True, the full data gradient is precomputed and kept - in memory, which can speed up the hessian vector product computation. - Set this to False, if you can't afford to keep the full computation graph - in memory. - pre_conditioner: Optional pre-conditioner to improve convergence of conjugate + preconditioner: Optional preconditioner to improve convergence of conjugate gradient method - use_block_cg: If True, use block variant of conjugate gradient method, which - solves several right hand sides simultaneously + solve_simultaneously: If True, use a variant of conjugate gradient method to + simultaneously solve for several right hand sides. warn_on_max_iteration: If True, logs a warning, if the desired tolerance is not achieved within `maxiter` iterations. If False, the log level for this information is `logging.DEBUG` @@ -485,7 +481,7 @@ def __init__( progress: bool = False, precompute_grad: bool = False, preconditioner: Optional[Preconditioner] = None, - use_block_cg: bool = False, + solve_simultaneously: bool = False, warn_on_max_iteration: bool = True, block_structure: Union[BlockMode, OrderedDict[str, List[str]]] = BlockMode.FULL, second_order_mode: SecondOrderMode = SecondOrderMode.HESSIAN, @@ -493,7 +489,7 @@ def __init__( super().__init__(model, block_structure, regularization) self.loss = loss self.warn_on_max_iteration = warn_on_max_iteration - self.use_block_cg = use_block_cg + self.solve_simultaneously = solve_simultaneously self.preconditioner = preconditioner self.precompute_grad = precompute_grad self.progress = progress @@ -547,7 +543,7 @@ def _create_block( maxiter=self.maxiter, progress=self.progress, preconditioner=preconditioner, - use_block_cg=self.use_block_cg, + use_block_cg=self.solve_simultaneously, warn_on_max_iteration=self.warn_on_max_iteration, ) gp = TorchGradientProvider(self.model, self.loss, restrict_to=block_params) diff --git a/tests/influence/test_influence_calculator.py b/tests/influence/test_influence_calculator.py index 221cb746d..bfd976e2a 100644 --- a/tests/influence/test_influence_calculator.py +++ b/tests/influence/test_influence_calculator.py @@ -71,7 +71,7 @@ def influence_model(model_and_data, test_case, influence_factory): model, loss, hessian_reg, - use_block_cg=True, + solve_simultaneously=True, ).fit(train_dataLoader), lambda model, loss, train_dataLoader, hessian_reg: DirectInfluence( model, loss, hessian_reg diff --git a/tests/influence/torch/test_influence_model.py b/tests/influence/torch/test_influence_model.py index dd998b4f7..94374e796 100644 --- a/tests/influence/torch/test_influence_model.py +++ b/tests/influence/torch/test_influence_model.py @@ -398,7 +398,7 @@ def direct_influences_from_factors( loss, regularization=hessian_reg, preconditioner=NystroemPreconditioner(10), - use_block_cg=True, + solve_simultaneously=True, ).fit(train_dataLoader), 1e-4, ], @@ -776,7 +776,7 @@ def test_influences_cg( test_case.hessian_reg, maxiter=5, preconditioner=preconditioner, - use_block_cg=use_block_cg, + solve_simultaneously=use_block_cg, ) influence_model = influence_model.fit(train_dataloader)