diff --git a/docs/src/references/utils/tuning.rst b/docs/src/references/utils/tuning.rst
index 41c64f1e..7e47fbef 100644
--- a/docs/src/references/utils/tuning.rst
+++ b/docs/src/references/utils/tuning.rst
@@ -12,8 +12,11 @@ than the given accuracy. Because these methods are gradient-based, be sure to pa
 attention to the ``learning_rate`` and ``max_steps`` parameter. A good choice of these
 two parameters can enhance the optimization speed and performance.
 
-.. autoclass:: torchpme.utils.tune_ewald
+.. autoclass:: torchpme.utils.tuning.ewald.EwaldTuner
     :members:
 
-.. autoclass:: torchpme.utils.tune_pme
+.. autoclass:: torchpme.utils.tuning.pme.PMETuner
+    :members:
+
+.. autoclass:: torchpme.utils.tuning.p3m.P3MTuner
     :members:
diff --git a/examples/1-charges-example.py b/examples/01-charges-example.py
similarity index 97%
rename from examples/1-charges-example.py
rename to examples/01-charges-example.py
index e8888acd..8f3b0f8c 100644
--- a/examples/1-charges-example.py
+++ b/examples/01-charges-example.py
@@ -37,6 +37,7 @@
 from metatensor.torch.atomistic import NeighborListOptions, System
 
 import torchpme
+from torchpme.utils.tuning.pme import PMETuner
 
 # %%
 #
@@ -44,6 +45,7 @@
 
 symbols = ("Cs", "Cl")
 types = torch.tensor([55, 17])
+charges = torch.tensor([[1.0], [-1.0]], dtype=torch.float64)
 positions = torch.tensor([(0, 0, 0), (0.5, 0.5, 0.5)], dtype=torch.float64)
 cell = torch.eye(3, dtype=torch.float64)
 pbc = torch.tensor([True, True, True])
@@ -55,9 +57,9 @@
 # The ``sum_squared_charges`` is equal to ``2.0`` becaue each atom either has a charge
 # of 1 or -1 in units of elementary charges.
 
-smearing, pme_params, cutoff = torchpme.utils.tune_pme(
-    sum_squared_charges=2.0, cell=cell, positions=positions
-)
+smearing, pme_params, cutoff = PMETuner(
+    charges=charges, cell=cell, positions=positions, cutoff=4.4
+).tune()
 
 # %%
 #
diff --git a/examples/2-neighbor-lists-usage.py b/examples/02-neighbor-lists-usage.py
similarity index 97%
rename from examples/2-neighbor-lists-usage.py
rename to examples/02-neighbor-lists-usage.py
index 5de03b34..2026b732 100644
--- a/examples/2-neighbor-lists-usage.py
+++ b/examples/02-neighbor-lists-usage.py
@@ -46,6 +46,7 @@
 import vesin.torch
 
 import torchpme
+from torchpme.utils.tuning.pme import PMETuner
 
 # %%
 #
@@ -93,9 +94,9 @@
 
 sum_squared_charges = float(torch.sum(charges**2))
 
-smearing, pme_params, cutoff = torchpme.utils.tune_pme(
-    sum_squared_charges=sum_squared_charges, cell=cell, positions=positions
-)
+smearing, pme_params, cutoff = PMETuner(
+    charges=charges, cell=cell, positions=positions, cutoff=4.4
+).tune()
 
 # %%
 #
diff --git a/examples/3-mesh-demo.py b/examples/03-mesh-demo.py
similarity index 100%
rename from examples/3-mesh-demo.py
rename to examples/03-mesh-demo.py
diff --git a/examples/4-kspace-demo.py b/examples/04-kspace-demo.py
similarity index 100%
rename from examples/4-kspace-demo.py
rename to examples/04-kspace-demo.py
diff --git a/examples/5-autograd-demo.py b/examples/05-autograd-demo.py
similarity index 86%
rename from examples/5-autograd-demo.py
rename to examples/05-autograd-demo.py
index 4301d2f5..000f820a 100644
--- a/examples/5-autograd-demo.py
+++ b/examples/05-autograd-demo.py
@@ -17,6 +17,8 @@
 exercise to the reader.
 """
 
+# %%
+
 from time import time
 
 import ase
@@ -477,10 +479,11 @@ def forward(self, positions, cell, charges):
 )
 
 # %%
-# We can also time the difference in execution
+# We can also evaluate the difference in execution
 # time between the Pytorch and scripted versions of the
 # module (depending on the system, the relative efficiency
-# of the two evaluations could go either way!)
+# of the two evaluations could go either way, as this is
+# a too small system to make a difference!)
 
 duration = 0.0
 for _i in range(20):
@@ -515,3 +518,82 @@ def forward(self, positions, cell, charges):
 print(f"Evaluation time:\nPytorch: {time_python}ms\nJitted:  {time_jit}ms")
 
 # %%
+# Other auto-differentiation ideas
+# --------------------------------
+#
+# There are many other ways the auto-differentiation engine of
+# ``torch`` can be used to facilitate the evaluation of atomistic
+# models.
+
+# %%
+# 4-site water models
+# ~~~~~~~~~~~~~~~~~~~
+#
+# Several water models (starting from the venerable TIP4P model of
+# `Abascal and C. Vega, JCP (2005) <http://doi.org/10.1063/1.2121687>`_)
+# use a center of negative charge that is displaced from the O position.
+# This is easily implemented, yielding the forces on the O and H positions
+# generated by the displaced charge.
+
+structure = ase.Atoms(
+    positions=[
+        [0, 0, 0],
+        [0, 1, 0],
+        [1, -0.2, 0],
+    ],
+    cell=[6, 6, 6],
+    symbols="OHH",
+)
+
+cell = torch.from_numpy(structure.cell.array).to(device=device, dtype=dtype)
+positions = torch.from_numpy(structure.positions).to(device=device, dtype=dtype)
+
+# %%
+# The key step is to create a "fourth site" based on the O positions
+# and use it in the ``interpolate`` step.
+
+charges = torch.tensor(
+    [[-1.0], [0.5], [0.5]],
+    dtype=dtype,
+    device=device,
+)
+
+positions.requires_grad_(True)
+charges.requires_grad_(True)
+cell.requires_grad_(True)
+
+positions_4site = torch.vstack(
+    [
+        ((positions[1::3] + positions[2::3]) * 0.5 + positions[0::3] * 3) / 4,
+        positions[1::3],
+        positions[2::3],
+    ]
+)
+
+ns = torch.tensor([5, 5, 5])
+interpolator = torchpme.lib.MeshInterpolator(
+    cell=cell, ns_mesh=ns, interpolation_nodes=3, method="Lagrange"
+)
+interpolator.compute_weights(positions_4site)
+mesh = interpolator.points_to_mesh(charges)
+
+value = (mesh**2).sum()
+
+# %%
+# The gradients can be computed by just running `backward` on the
+# end result. Gradients are computed on the H and O positions.
+
+value.backward()
+
+print(
+    f"""
+Position gradients:
+{positions.grad.T}
+
+Cell gradients:
+{cell.grad}
+
+Charges gradients:
+{charges.grad.T}
+"""
+)
diff --git a/examples/6-splined-potential.py b/examples/06-splined-potential.py
similarity index 100%
rename from examples/6-splined-potential.py
rename to examples/06-splined-potential.py
diff --git a/examples/7-lode-demo.py b/examples/07-lode-demo.py
similarity index 100%
rename from examples/7-lode-demo.py
rename to examples/07-lode-demo.py
diff --git a/examples/8-combined-potential.py b/examples/08-combined-potential.py
similarity index 100%
rename from examples/8-combined-potential.py
rename to examples/08-combined-potential.py
diff --git a/examples/9-atomistic-model.py b/examples/09-atomistic-model.py
similarity index 100%
rename from examples/9-atomistic-model.py
rename to examples/09-atomistic-model.py
diff --git a/examples/10-tuning.py b/examples/10-tuning.py
new file mode 100644
index 00000000..fb5a8a8f
--- /dev/null
+++ b/examples/10-tuning.py
@@ -0,0 +1,248 @@
+"""
+Parameter tuning for range-separated models
+===========================================
+
+.. currentmodule:: torchpme
+
+We explain and demonstrate parameter tuning for Ewald and PME
+"""
+
+# %%
+
+from time import time
+
+import ase
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import vesin.torch as vesin
+
+import torchpme
+from torchpme.utils.tuning import TuningTimings
+from torchpme.utils.tuning.pme import PMEErrorBounds
+
+DTYPE = torch.float64
+
+# get_ipython().run_line_magic("matplotlib", "inline")  # type: ignore # noqa
+# %%
+
+positions = torch.tensor(
+    [
+        [0.0, 0, 0],
+        [1, 0, 0],
+        [0, 1, 0],
+        [0, 0, 1],
+        [1, 1, 0],
+        [1, 0, 1],
+        [0, 1, 1],
+        [1, 1, 1],
+    ],
+    dtype=DTYPE,
+)
+charges = torch.tensor([+1.0, -1, -1, -1, +1, +1, +1, -1], dtype=DTYPE).reshape(-1, 1)
+cell = 2 * torch.eye(3, dtype=DTYPE)
+madelung_ref = 1.7475645946
+num_formula_units = 4
+
+atoms = ase.Atoms("NaCl3Na3Cl", positions, cell=cell)
+
+
+# %%
+# compute and compare with reference
+
+smearing = 0.5
+pme_params = {"mesh_spacing": 0.5, "interpolation_nodes": 4}
+cutoff = 5.0
+
+max_cutoff = 32.0
+
+nl = vesin.NeighborList(cutoff=max_cutoff, full_list=False)
+i, j, S, d = nl.compute(points=positions, box=cell, periodic=True, quantities="ijSd")
+neighbor_indices = torch.stack([i, j], dim=1)
+neighbor_shifts = S
+neighbor_distances = d
+
+
+pme = torchpme.PMECalculator(
+    potential=torchpme.CoulombPotential(smearing=smearing),
+    **pme_params,  # type: ignore[arg-type]
+)
+potential = pme(
+    charges=charges,
+    cell=cell,
+    positions=positions,
+    neighbor_indices=neighbor_indices,
+    neighbor_distances=neighbor_distances,
+)
+
+energy = charges.T @ potential
+madelung = (-energy / num_formula_units).flatten().item()
+
+# this is the estimated error
+error_bounds = PMEErrorBounds(charges, cell, positions)
+
+estimated_error = error_bounds(
+    cutoff=max_cutoff, smearing=smearing, **pme_params
+).item()
+
+# and this is how long it took to run with these parameters (est.)
+
+timings = TuningTimings(charges, cell, positions, cutoff=max_cutoff, run_backward=True)
+estimated_timing = timings(pme)
+
+print(f"""
+Computed madelung constant: {madelung}
+Actual error: {madelung-madelung_ref}
+Estimated error: {estimated_error}
+Timing: {estimated_timing} seconds
+""")
+
+# %%
+# now set up a testing framework
+
+
+def timed_madelung(cutoff, smearing, mesh_spacing, interpolation_nodes):
+    assert cutoff <= max_cutoff
+
+    filter_idx = torch.where(neighbor_distances <= cutoff)
+    filter_indices = neighbor_indices[filter_idx]
+    filter_distances = neighbor_distances[filter_idx]
+
+    pme = torchpme.PMECalculator(
+        potential=torchpme.CoulombPotential(smearing=smearing),
+        mesh_spacing=mesh_spacing,
+        interpolation_nodes=interpolation_nodes,
+    )
+    start = time()
+    potential = pme(
+        charges=charges,
+        cell=cell,
+        positions=positions,
+        neighbor_indices=filter_indices,
+        neighbor_distances=filter_distances,
+    )
+    energy = charges.T @ potential
+    madelung = (-energy / num_formula_units).flatten().item()
+    end = time()
+
+    return madelung, end - start
+
+
+smearing_grid = torch.logspace(-1, 0.5, 8)
+spacing_grid = torch.logspace(-1, 0.5, 9)
+results = np.zeros((len(smearing_grid), len(spacing_grid)))
+timings = np.zeros((len(smearing_grid), len(spacing_grid)))
+bounds = np.zeros((len(smearing_grid), len(spacing_grid)))
+for ism, smearing in enumerate(smearing_grid):
+    for isp, spacing in enumerate(spacing_grid):
+        results[ism, isp], timings[ism, isp] = timed_madelung(8.0, smearing, spacing, 4)
+        bounds[ism, isp] = error_bounds(8.0, smearing, spacing, 4)
+
+# %%
+# plot
+
+vmin = 1e-12
+vmax = 2
+levels = np.geomspace(vmin, vmax, 30)
+
+fig, ax = plt.subplots(1, 3, figsize=(9, 3), sharey=True, constrained_layout=True)
+contour = ax[0].contourf(
+    spacing_grid,
+    smearing_grid,
+    bounds,
+    vmin=vmin,
+    vmax=vmax,
+    levels=levels,
+    norm=mpl.colors.LogNorm(),
+    extend="both",
+)
+ax[0].set_xscale("log")
+ax[0].set_yscale("log")
+ax[0].set_ylabel(r"$\sigma$ / Å")
+ax[0].set_xlabel(r"spacing / Å")
+ax[0].set_title("estimated error")
+cbar = fig.colorbar(contour, ax=ax[1], label="error")
+cbar.ax.set_yscale("log")
+
+contour = ax[1].contourf(
+    spacing_grid,
+    smearing_grid,
+    np.abs(results - madelung_ref),
+    vmin=vmin,
+    vmax=vmax,
+    levels=levels,
+    norm=mpl.colors.LogNorm(),
+    extend="both",
+)
+ax[1].set_xscale("log")
+ax[1].set_yscale("log")
+ax[1].set_xlabel(r"spacing / Å")
+ax[1].set_title("actual error")
+
+contour = ax[2].contourf(
+    spacing_grid,
+    smearing_grid,
+    timings,
+    levels=np.geomspace(1e-3, 2e-2, 20),
+    norm=mpl.colors.LogNorm(),
+)
+ax[2].set_xscale("log")
+ax[2].set_yscale("log")
+ax[2].set_ylabel(r"$\sigma$ / Å")
+ax[2].set_xlabel(r"spacing / Å")
+ax[2].set_title("actual timing")
+cbar = fig.colorbar(contour, ax=ax[2], label="time / s")
+cbar.ax.set_yscale("log")
+
+# cbar.ax.set_yscale('log')
+
+
+# %%
+#
+# a good heuristic is to keep cutoff/sigma constant (easy to
+# determine error limit) to see how timings change
+
+smearing_grid = torch.logspace(-1, 0.5, 8)
+spacing_grid = torch.logspace(-1, 0.5, 9)
+results = np.zeros((len(smearing_grid), len(spacing_grid)))
+timings = np.zeros((len(smearing_grid), len(spacing_grid)))
+for ism, smearing in enumerate(smearing_grid):
+    for isp, spacing in enumerate(spacing_grid):
+        madelung, timing = timed_madelung(smearing * 8, smearing, spacing, 4)
+        results[ism, isp] = madelung
+        timings[ism, isp] = timing
+
+
+# %%
+# plot
+
+fig, ax = plt.subplots(1, 2, figsize=(7, 3), constrained_layout=True)
+contour = ax[0].contourf(
+    spacing_grid, smearing_grid, np.log10(np.abs(results - madelung_ref))
+)
+ax[0].set_xscale("log")
+ax[0].set_yscale("log")
+ax[0].set_ylabel(r"$\sigma$ / Å")
+ax[0].set_xlabel(r"spacing / Å")
+cbar = fig.colorbar(contour, ax=ax[0], label="log10(error)")
+
+contour = ax[1].contourf(spacing_grid, smearing_grid, np.log10(timings))
+ax[1].set_xscale("log")
+ax[1].set_yscale("log")
+ax[1].set_xlabel(r"spacing / Å")
+cbar = fig.colorbar(contour, ax=ax[1], label="log10(time / s)")
+
+# %%
+
+EB = torchpme.utils.tuning.pme.PMEErrorBounds((charges**2).sum(), cell, positions)
+
+# %%
+v, t = timed_madelung(cutoff=5, smearing=1, mesh_spacing=1, interpolation_nodes=4)
+print(
+    v - madelung_ref,
+    t,
+    EB.forward(cutoff=5, smearing=1, mesh_spacing=1, interpolation_nodes=4).item(),
+)
+
+# %%
diff --git a/src/torchpme/utils/__init__.py b/src/torchpme/utils/__init__.py
index 03a4df69..b6c81555 100644
--- a/src/torchpme/utils/__init__.py
+++ b/src/torchpme/utils/__init__.py
@@ -1,13 +1,7 @@
 from . import prefactors, tuning, splines  # noqa
 from .splines import CubicSpline, CubicSplineReciprocal
-from .tuning.ewald import tune_ewald
-from .tuning.p3m import tune_p3m
-from .tuning.pme import tune_pme
 
 __all__ = [
-    "tune_ewald",
-    "tune_pme",
-    "tune_p3m",
     "CubicSpline",
     "CubicSplineReciprocal",
 ]
diff --git a/src/torchpme/utils/tuning/__init__.py b/src/torchpme/utils/tuning/__init__.py
index 33d7108c..0f5a6640 100644
--- a/src/torchpme/utils/tuning/__init__.py
+++ b/src/torchpme/utils/tuning/__init__.py
@@ -1,40 +1,9 @@
 import math
-import warnings
-from typing import Callable, Optional
+import time
+from typing import Optional
 
 import torch
-
-
-def _optimize_parameters(
-    params: list[torch.Tensor],
-    loss: Callable,
-    max_steps: int,
-    accuracy: float,
-    learning_rate: float,
-) -> None:
-    optimizer = torch.optim.Adam(params, lr=learning_rate)
-
-    for _ in range(max_steps):
-        loss_value = loss(*params)
-        if torch.isnan(loss_value) or torch.isinf(loss_value):
-            raise ValueError(
-                "The value of the estimated error is now nan, consider using a "
-                "smaller learning rate."
-            )
-        loss_value.backward()
-        optimizer.step()
-        optimizer.zero_grad()
-
-        if loss_value <= accuracy:
-            break
-
-    if loss_value > accuracy:
-        warnings.warn(
-            "The searching for the parameters is ended, but the error is "
-            f"{float(loss_value):.3e}, larger than the given accuracy {accuracy}. "
-            "Consider increase max_step and",
-            stacklevel=2,
-        )
+import vesin.torch
 
 
 def _estimate_smearing_cutoff(
@@ -42,61 +11,32 @@ def _estimate_smearing_cutoff(
     smearing: Optional[float],
     cutoff: Optional[float],
     accuracy: float,
-) -> tuple[torch.tensor, torch.tensor]:
-    dtype = cell.dtype
-    device = cell.device
-
+    prefac: float,
+) -> tuple[float, float]:
     cell_dimensions = torch.linalg.norm(cell, dim=1)
     min_dimension = float(torch.min(cell_dimensions))
     half_cell = min_dimension / 2.0
-
-    smearing_init = torch.tensor(
-        half_cell / 5 if smearing is None else smearing,
-        dtype=dtype,
-        device=device,
-        requires_grad=(smearing is None),
-    )
-
-    if cutoff is None:
-        # solve V_SR(cutoff) == accuracy for cutoff
-        def loss(cutoff):
-            return (
-                torch.erfc(cutoff / math.sqrt(2) / smearing_init) / cutoff - accuracy
-            ) ** 2
-
-        cutoff_init = torch.tensor(
-            half_cell, dtype=dtype, device=device, requires_grad=True
-        )
-        _optimize_parameters(
-            params=[cutoff_init],
-            loss=loss,
-            accuracy=accuracy,
-            max_steps=1000,
-            learning_rate=0.1,
+    cutoff_init = min(5.0, half_cell) if cutoff is None else cutoff
+    ratio = math.sqrt(
+        -2
+        * math.log(
+            accuracy
+            / 2
+            / prefac
+            * math.sqrt(cutoff_init * float(torch.abs(cell.det())))
         )
-
-    cutoff_init = torch.tensor(
-        float(cutoff_init) if cutoff is None else cutoff,
-        dtype=dtype,
-        device=device,
-        requires_grad=(cutoff is None),
     )
+    smearing_init = cutoff_init / ratio if smearing is None else smearing
 
-    return smearing_init, cutoff_init
+    return float(smearing_init), float(cutoff_init)
 
 
 def _validate_parameters(
-    sum_squared_charges: float,
+    charges: torch.Tensor,
     cell: torch.Tensor,
     positions: torch.Tensor,
     exponent: int,
-    accuracy: float,
 ) -> None:
-    if sum_squared_charges <= 0:
-        raise ValueError(
-            f"sum of squared charges must be positive, got {sum_squared_charges}"
-        )
-
     if exponent != 1:
         raise NotImplementedError("Only exponent = 1 is supported")
 
@@ -135,5 +75,138 @@ def _validate_parameters(
             "periodic calculation"
         )
 
-    if not isinstance(accuracy, float):
-        raise ValueError(f"'{accuracy}' is not a float.")
+    if charges.dtype != dtype:
+        raise ValueError(
+            f"each `charges` must have the same type {dtype} as `positions`, got at least "
+            "one tensor of type "
+            f"{charges.dtype}"
+        )
+
+    if charges.device != device:
+        raise ValueError(
+            f"each `charges` must be on the same device {device} as `positions`, got at "
+            "least one tensor with device "
+            f"{charges.device}"
+        )
+
+    if charges.dim() != 2:
+        raise ValueError(
+            "`charges` must be a 2-dimensional tensor, got "
+            f"tensor with {charges.dim()} dimension(s) and shape "
+            f"{list(charges.shape)}"
+        )
+
+    if list(charges.shape) != [len(positions), charges.shape[1]]:
+        raise ValueError(
+            "`charges` must be a tensor with shape [n_atoms, n_channels], with "
+            "`n_atoms` being the same as the variable `positions`. Got tensor with "
+            f"shape {list(charges.shape)} where positions contains "
+            f"{len(positions)} atoms"
+        )
+
+
+class TuningErrorBounds(torch.nn.Module):
+    """Base class for error bounds."""
+
+    def __init__(
+        self,
+        charges: torch.Tensor,
+        cell: torch.Tensor,
+        positions: torch.Tensor,
+    ):
+        super().__init__()
+        self._charges = charges
+        self._cell = cell
+        self._positions = positions
+
+    def forward(self, *args, **kwargs):
+        return self.error(*args, **kwargs)
+
+
+class TuningTimings(torch.nn.Module):
+    """Base class for error bounds."""
+
+    def __init__(
+        self,
+        charges: torch.Tensor,
+        cell: torch.Tensor,
+        positions: torch.Tensor,
+        cutoff: float,
+        neighbor_indices: Optional[torch.Tensor] = None,
+        neighbor_distances: Optional[torch.Tensor] = None,
+        n_repeat: int = 4,
+        n_warmup: int = 2,
+        run_backward: Optional[bool] = True,
+    ):
+        super().__init__()
+        self._charges = charges
+        self._cell = cell
+        self._positions = positions
+        self._dtype = charges.dtype
+        self._device = charges.device
+        self._n_repeat = n_repeat
+        self._n_warmup = n_warmup
+        self._run_backward = run_backward
+
+        if neighbor_indices is None and neighbor_distances is None:
+            nl = vesin.torch.NeighborList(cutoff=cutoff, full_list=False)
+            i, j, neighbor_distances = nl.compute(
+                points=self._positions.to(dtype=torch.float64, device="cpu"),
+                box=self._cell.to(dtype=torch.float64, device="cpu"),
+                periodic=True,
+                quantities="ijd",
+            )
+            neighbor_indices = torch.stack([i, j], dim=1)
+        elif neighbor_indices is None or neighbor_distances is None:
+            raise ValueError(
+                "If neighbor_indices or neighbor_distances are None, "
+                "both must be None."
+            )
+        self._neighbor_indices = neighbor_indices.to(device=self._device)
+        self._neighbor_distances = neighbor_distances.to(
+            dtype=self._dtype, device=self._device
+        )
+
+    def forward(self, calculator: torch.nn.Module):
+        """
+        Estimate the execution time of a given calculator for the structure
+        to be used as benchmark.
+        """
+        for _ in range(self._n_warmup):
+            result = calculator.forward(
+                positions=self._positions,
+                charges=self._charges,
+                cell=self._cell,
+                neighbor_indices=self._neighbor_indices,
+                neighbor_distances=self._neighbor_distances,
+            )
+
+        # measure time
+        execution_time = 0.0
+
+        for _ in range(self._n_repeat):
+            positions = self._positions.clone()
+            cell = self._cell.clone()
+            charges = self._charges.clone()
+            # nb - this won't compute gradiens involving the distances
+            if self._run_backward:
+                positions.requires_grad_(True)
+                cell.requires_grad_(True)
+                charges.requires_grad_(True)
+            execution_time -= time.time()
+            result = calculator.forward(
+                positions=positions,
+                charges=charges,
+                cell=cell,
+                neighbor_indices=self._neighbor_indices,
+                neighbor_distances=self._neighbor_distances,
+            )
+            value = result.sum()
+            if self._run_backward:
+                value.backward(retain_graph=True)
+
+            if self._device is torch.device("cuda"):
+                torch.cuda.synchronize()
+            execution_time += time.time()
+
+        return execution_time / self._n_repeat
diff --git a/src/torchpme/utils/tuning/ewald.py b/src/torchpme/utils/tuning/ewald.py
index c04c9fc2..4d51cadc 100644
--- a/src/torchpme/utils/tuning/ewald.py
+++ b/src/torchpme/utils/tuning/ewald.py
@@ -1,31 +1,21 @@
 import math
 from typing import Optional
 
+import numpy as np
 import torch
 
+from ...calculators import EwaldCalculator
 from . import (
-    _estimate_smearing_cutoff,
-    _optimize_parameters,
-    _validate_parameters,
+    TuningErrorBounds,
 )
+from .grid_search import GridSearchBase
 
 TWO_PI = 2 * math.pi
 
 
-def tune_ewald(
-    sum_squared_charges: float,
-    cell: torch.Tensor,
-    positions: torch.Tensor,
-    smearing: Optional[float] = None,
-    lr_wavelength: Optional[float] = None,
-    cutoff: Optional[float] = None,
-    exponent: int = 1,
-    accuracy: float = 1e-3,
-    max_steps: int = 50000,
-    learning_rate: float = 0.1,
-) -> tuple[float, dict[str, float], float]:
+class EwaldErrorBounds(TuningErrorBounds):
     r"""
-    Find the optimal parameters for :class:`torchpme.EwaldCalculator`.
+    Error bounds for :class:`torchpme.calculators.ewald.EwaldCalculator`.
 
     The error formulas are given `online
     <https://www2.icp.uni-stuttgart.de/~icp/mediawiki/images/4/4d/Script_Longrange_Interactions.pdf>`_
@@ -40,128 +30,88 @@ def tune_ewald(
 
         r_c &= \mathrm{cutoff}
 
-    For the optimization we use the :class:`torch.optim.Adam` optimizer. By default this
-    function optimize the ``smearing``, ``lr_wavelength`` and ``cutoff`` based on the
-    error formula given `online`_. You can limit the optimization by giving one or more
-    parameters to the function. For example in usual ML workflows the cutoff is fixed
-    and one wants to optimize only the ``smearing`` and the ``lr_wavelength`` with
-    respect to the minimal error and fixed cutoff.
-
-    :param sum_squared_charges: accumulated squared charges, must be positive
+    :param charges: atomic charges
     :param cell: single tensor of shape (3, 3), describing the bounding
     :param positions: single tensor of shape (``len(charges), 3``) containing the
         Cartesian positions of all point charges in the system.
-    :param smearing: if its value is given, it will not be tuned, see
-        :class:`torchpme.EwaldCalculator` for details
-    :param lr_wavelength: if its value is given, it will not be tuned, see
-        :class:`torchpme.EwaldCalculator` for details
-    :param cutoff: if its value is given, it will not be tuned, see
-        :class:`torchpme.EwaldCalculator` for details
-    :param exponent: exponent :math:`p` in :math:`1/r^p` potentials
-    :param accuracy: Recomended values for a balance between the accuracy and speed is
-        :math:`10^{-3}`. For more accurate results, use :math:`10^{-6}`.
-    :param max_steps: maximum number of gradient descent steps
-    :param learning_rate: learning rate for gradient descent
-
-    :return: Tuple containing a float of the optimal smearing for the :class:
-        `CoulombPotential`, a dictionary with the parameters for
-        :class:`EwaldCalculator` and a float of the optimal cutoff value for the
-        neighborlist computation.
-
-    Example
-    -------
-    >>> import torch
-    >>> positions = torch.tensor(
-    ...     [[0.0, 0.0, 0.0], [0.5, 0.5, 0.5]], dtype=torch.float64
-    ... )
-    >>> charges = torch.tensor([[1.0], [-1.0]], dtype=torch.float64)
-    >>> cell = torch.eye(3, dtype=torch.float64)
-    >>> smearing, parameter, cutoff = tune_ewald(
-    ...     torch.sum(charges**2, dim=0), cell, positions, accuracy=1e-1
-    ... )
-
-    You can check the values of the parameters
-
-    >>> print(smearing)
-    0.7527865828476816
-
-    >>> print(parameter)
-    {'lr_wavelength': 11.138556788117427}
-
-    >>> print(cutoff)
-    2.207855328192979
-
-    You can give one parameter to the function to tune only other parameters, for
-    example, fixing the cutoff to 0.1
-
-    >>> smearing, parameter, cutoff = tune_ewald(
-    ...     torch.sum(charges**2, dim=0), cell, positions, cutoff=0.4, accuracy=1e-1
-    ... )
-
-    You can check the values of the parameters, now the cutoff is fixed
-
-    >>> print(round(smearing, 4))
-    0.1402
-
-    We can also check the value of the other parameter like the ``lr_wavelength``
-
-    >>> print(round(parameter["lr_wavelength"], 3))
-    0.255
-
-    and finally as requested the value of the cutoff is fixed
-
-    >>> print(cutoff)
-    0.4
-
     """
-    _validate_parameters(sum_squared_charges, cell, positions, exponent, accuracy)
-
-    smearing_opt, cutoff_opt = _estimate_smearing_cutoff(
-        cell=cell, smearing=smearing, cutoff=cutoff, accuracy=accuracy
-    )
 
-    # We choose a very small initial fourier wavelength, hardcoded for now
-    k_cutoff_opt = torch.tensor(
-        1e-3 if lr_wavelength is None else TWO_PI / lr_wavelength,
-        dtype=cell.dtype,
-        device=cell.device,
-        requires_grad=(lr_wavelength is None),
-    )
-
-    volume = torch.abs(cell.det())
-    prefac = 2 * sum_squared_charges / math.sqrt(len(positions))
-
-    def err_Fourier(smearing, k_cutoff):
+    def __init__(
+        self,
+        charges: torch.Tensor,
+        cell: torch.Tensor,
+        positions: torch.Tensor,
+    ):
+        super().__init__(charges, cell, positions)
+
+        self.volume = torch.abs(torch.det(cell))
+        self.sum_squared_charges = (charges**2).sum()
+        self.prefac = 2 * self.sum_squared_charges / math.sqrt(len(positions))
+        self.cell = cell
+        self.positions = positions
+
+    def err_kspace(self, smearing, lr_wavelength):
         return (
-            prefac**0.5
+            self.prefac**0.5
             / smearing
-            / torch.sqrt(TWO_PI**2 * volume / (TWO_PI / k_cutoff) ** 0.5)
-            * torch.exp(-(TWO_PI**2) * smearing**2 / (TWO_PI / k_cutoff))
+            / torch.sqrt(TWO_PI**2 * self.volume / (lr_wavelength) ** 0.5)
+            * torch.exp(-(TWO_PI**2) * smearing**2 / (lr_wavelength))
         )
 
-    def err_real(smearing, cutoff):
+    def err_rspace(self, smearing, cutoff):
         return (
-            prefac
-            / torch.sqrt(cutoff * volume)
+            self.prefac
+            / torch.sqrt(cutoff * self.volume)
             * torch.exp(-(cutoff**2) / 2 / smearing**2)
         )
 
-    def loss(smearing, k_cutoff, cutoff):
+    def forward(self, smearing, lr_wavelength, cutoff):
+        r"""
+        Calculate the error bound of Ewald.
+
+        :param smearing: see :class:`torchpme.EwaldCalculator` for details
+        :param lr_wavelength: see :class:`torchpme.EwaldCalculator` for details
+        :param cutoff: see :class:`torchpme.EwaldCalculator` for details
+        """
+        smearing = torch.as_tensor(smearing)
+        lr_wavelength = torch.as_tensor(lr_wavelength)
+        cutoff = torch.as_tensor(cutoff)
         return torch.sqrt(
-            err_Fourier(smearing, k_cutoff) ** 2 + err_real(smearing, cutoff) ** 2
+            self.err_kspace(smearing, lr_wavelength) ** 2
+            + self.err_rspace(smearing, cutoff) ** 2
         )
 
-    params = [smearing_opt, k_cutoff_opt, cutoff_opt]
-    _optimize_parameters(
-        params=params,
-        loss=loss,
-        max_steps=max_steps,
-        accuracy=accuracy,
-        learning_rate=learning_rate,
-    )
-
-    return (
-        float(smearing_opt),
-        {"lr_wavelength": TWO_PI / float(k_cutoff_opt)},
-        float(cutoff_opt),
-    )
+
+class EwaldTuner(GridSearchBase):
+    """
+    Class for finding the optimal parameters for EwaldCalculator using a grid search.
+
+    For details of the parameters see :class:`torchpme.utils.tuning.GridSearchBase`.
+    """
+
+    ErrorBounds = EwaldErrorBounds
+    CalculatorClass = EwaldCalculator
+    GridSearchParams = {"lr_wavelength": 1 / np.arange(1, 15)}
+
+    def __init__(
+        self,
+        charges: torch.Tensor,
+        cell: torch.Tensor,
+        positions: torch.Tensor,
+        cutoff: float,
+        exponent: int = 1,
+        neighbor_indices: Optional[torch.Tensor] = None,
+        neighbor_distances: Optional[torch.Tensor] = None,
+    ):
+        super().__init__(
+            charges,
+            cell,
+            positions,
+            cutoff,
+            exponent,
+            neighbor_indices,
+            neighbor_distances,
+        )
+        self.GridSearchParams["lr_wavelength"] *= float(
+            torch.min(self._cell_dimensions)
+        )
diff --git a/src/torchpme/utils/tuning/grid_search.py b/src/torchpme/utils/tuning/grid_search.py
new file mode 100644
index 00000000..e0105860
--- /dev/null
+++ b/src/torchpme/utils/tuning/grid_search.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+
+import math
+from itertools import product
+from typing import Optional
+from warnings import warn
+
+import torch
+
+from ...calculators import (
+    Calculator,
+)
+from ...potentials import InversePowerLawPotential
+from . import (
+    TuningErrorBounds,
+    TuningTimings,
+    _estimate_smearing_cutoff,
+    _validate_parameters,
+)
+
+
+class GridSearchBase:
+    r"""
+    Base class for finding the optimal parameters for calculators using a grid search.
+
+    :param charges: torch.Tensor, atomic (pseudo-)charges
+    :param cell: torch.Tensor, periodic supercell for the system
+    :param positions: torch.Tensor, Cartesian coordinates of the particles within
+        the supercell.
+    :param cutoff: float, cutoff distance for the neighborlist
+    :param exponent :math:`p` in :math:`1/r^p` potentials, currently only :math:`p=1` is
+        supported
+    :param neighbor_indices: torch.Tensor with the ``i,j`` indices of neighbors for
+        which the potential should be computed in real space.
+    :param neighbor_distances: torch.Tensor with the pair distances of the neighbors
+        for which the potential should be computed in real space.
+    """
+
+    ErrorBounds: type[TuningErrorBounds]
+    Timings: type[TuningTimings] = TuningTimings
+    CalculatorClass: type[Calculator]
+    GridSearchParams: dict[str, torch.Tensor]  # {"interpolation_nodes": ..., ...}
+
+    def __init__(
+        self,
+        charges: torch.Tensor,
+        cell: torch.Tensor,
+        positions: torch.Tensor,
+        cutoff: float,
+        exponent: int = 1,
+        neighbor_indices: Optional[torch.Tensor] = None,
+        neighbor_distances: Optional[torch.Tensor] = None,
+    ):
+        _validate_parameters(charges, cell, positions, exponent)
+        self.charges = charges
+        self.cell = cell
+        self.positions = positions
+        self.cutoff = cutoff
+        self.exponent = exponent
+        self.dtype = charges.dtype
+        self.device = charges.device
+        self.err_func = self.ErrorBounds(charges, cell, positions)
+        self._cell_dimensions = torch.linalg.norm(cell, dim=1)
+        self.time_func = self.Timings(
+            charges,
+            cell,
+            positions,
+            cutoff,
+            neighbor_indices,
+            neighbor_distances,
+            4,
+            2,
+            True,
+        )
+
+        self._prefac = 2 * (charges**2).sum() / math.sqrt(len(positions))
+
+    def tune(
+        self,
+        accuracy: float = 1e-3,
+    ):
+        r"""
+        The steps are: 1. Find the ``smearing`` parameter for the
+        :py:class:`CoulombPotential` that leads to a real space error of half the
+        desired accuracy. 2. Grid search for the kspace parameters, i.e. the
+        ``lr_wavelength`` for Ewald and the ``mesh_spacing`` and ``interpolation_nodes``
+        for PME and P3M. For each combination of parameters, calculate the error. If the
+        error is smaller than the desired accuracy, use this combination for test runs
+        to get the calculation time needed. Return the combination that leads to the
+        shortest calculation time. If the desired accuracy is never reached, return the
+        combination that leads to the smallest error and throw a warning.
+
+        :param accuracy: Recomended values for a balance between the accuracy and speed
+            is :math:`10^{-3}`. For more accurate results, use :math:`10^{-6}`.
+
+        :return: Tuple containing a float of the optimal smearing for the :py:class:
+        `CoulombPotential`, a dictionary with the parameters for the calculator of the
+        chosen method and a float of the optimal cutoff value for the neighborlist
+        computation.
+        """
+        if not isinstance(accuracy, float):
+            raise ValueError(f"'{accuracy}' is not a float.")
+
+        smearing_opt = None
+        params_opt = None
+        cutoff_opt = None
+        time_opt = torch.inf
+
+        # In case there is no parameter reaching the accuracy, return
+        # the best found so far
+        smearing_err_opt = None
+        params_err_opt = None
+        cutoff_err_opt = None
+        err_opt = torch.inf
+
+        smearing, cutoff = _estimate_smearing_cutoff(
+            self.cell,
+            smearing=None,
+            cutoff=self.cutoff,
+            accuracy=accuracy,
+            prefac=self._prefac,
+        )
+        for param_values in product(*self.GridSearchParams.values()):
+            params = dict(zip(self.GridSearchParams.keys(), param_values))
+            err = self.err_func(
+                smearing=smearing,
+                cutoff=cutoff,
+                **params,
+            )
+
+            if err > accuracy:
+                # Not going to test the time, record the parameters if the error is
+                # better.
+                if err < err_opt:
+                    smearing_err_opt = smearing
+                    params_err_opt = params
+                    cutoff_err_opt = cutoff
+                    err_opt = err
+                continue
+
+            execution_time = self._timing(smearing, params)
+            if execution_time < time_opt:
+                smearing_opt = smearing
+                params_opt = params
+                cutoff_opt = cutoff
+                time_opt = execution_time
+
+        if time_opt == torch.inf:
+            # Never found a parameter that reached the accuracy
+            warn(
+                f"No parameters found within the desired accuracy of {accuracy}."
+                f"Returning the best found. Accuracy: {str(err_opt)}",
+                stacklevel=1,
+            )
+            return smearing_err_opt, params_err_opt, cutoff_err_opt
+
+        return smearing_opt, params_opt, cutoff_opt
+
+    def _timing(self, smearing: float, params: dict):
+        calculator = self.CalculatorClass(
+            potential=InversePowerLawPotential(
+                exponent=self.exponent,  # but only exponent = 1 is supported
+                smearing=smearing,
+            ),
+            **params,
+        )
+
+        return self.time_func(calculator)
diff --git a/src/torchpme/utils/tuning/p3m.py b/src/torchpme/utils/tuning/p3m.py
index 64612e68..f8a4c27e 100644
--- a/src/torchpme/utils/tuning/p3m.py
+++ b/src/torchpme/utils/tuning/p3m.py
@@ -1,14 +1,15 @@
 import math
 from typing import Optional
 
+import numpy as np
 import torch
 
-from ...lib import get_ns_mesh
+from torchpme import P3MCalculator
+
 from . import (
-    _estimate_smearing_cutoff,
-    _optimize_parameters,
-    _validate_parameters,
+    TuningErrorBounds,
 )
+from .grid_search import GridSearchBase
 
 # Coefficients for the P3M Fourier error,
 # see Table II of http://dx.doi.org/10.1063/1.477415
@@ -68,21 +69,10 @@
 ]
 
 
-def tune_p3m(
-    sum_squared_charges: float,
-    cell: torch.Tensor,
-    positions: torch.Tensor,
-    smearing: Optional[float] = None,
-    mesh_spacing: Optional[float] = None,
-    cutoff: Optional[float] = None,
-    interpolation_nodes: int = 4,
-    exponent: int = 1,
-    accuracy: float = 1e-3,
-    max_steps: int = 50000,
-    learning_rate: float = 5e-3,
-) -> tuple[float, dict[str, float], float]:
+class P3MErrorBounds(TuningErrorBounds):
     r"""
-    Find the optimal parameters for :class:`torchpme.calculators.pme.PMECalculator`.
+    "
+    Error bounds for :class:`torchpme.calculators.pme.P3MCalculator`.
 
     For the error formulas are given `here <https://doi.org/10.1063/1.477415>`_.
     Note the difference notation between the parameters in the reference and ours:
@@ -91,97 +81,37 @@ def tune_p3m(
 
         \alpha = \left(\sqrt{2}\,\mathrm{smearing} \right)^{-1}
 
-    .. hint::
-
-        Tuning uses an initial guess for the optimization, which can be applied by
-        setting ``max_steps = 0``. This can be useful if fast tuning is required. These
-        values typically result in accuracies around :math:`10^{-2}`.
-
-    :param sum_squared_charges: accumulated squared charges, must be positive
+    :param charges: atomic charges
     :param cell: single tensor of shape (3, 3), describing the bounding
     :param positions: single tensor of shape (``len(charges), 3``) containing the
         Cartesian positions of all point charges in the system.
-    :param interpolation_nodes: The number ``n`` of nodes used in the interpolation per
-        coordinate axis. The total number of interpolation nodes in 3D will be ``n^3``.
-        In general, for ``n`` nodes, the interpolation will be performed by piecewise
-        polynomials of degree ``n`` (e.g. ``n = 3`` for cubic interpolation). Only
-        the values ``1, 2, 3, 4, 5`` are supported.
-    :param exponent: exponent :math:`p` in :math:`1/r^p` potentials
-    :param accuracy: Recomended values for a balance between the accuracy and speed is
-        :math:`10^{-3}`. For more accurate results, use :math:`10^{-6}`.
-    :param max_steps: maximum number of gradient descent steps
-    :param learning_rate: learning rate for gradient descent
-    :param verbose: whether to print the progress of gradient descent
-
-    :return: Tuple containing a float of the optimal smearing for the :py:class:
-        `CoulombPotential`, a dictionary with the parameters for
-        :py:class:`PMECalculator` and a float of the optimal cutoff value for the
-        neighborlist computation.
-
-    Example
-    -------
-    >>> import torch
-
-    To allow reproducibility, we set the seed to a fixed value
-
-    >>> _ = torch.manual_seed(0)
-    >>> positions = torch.tensor(
-    ...     [[0.0, 0.0, 0.0], [0.5, 0.5, 0.5]], dtype=torch.float64
-    ... )
-    >>> charges = torch.tensor([[1.0], [-1.0]], dtype=torch.float64)
-    >>> cell = torch.eye(3, dtype=torch.float64)
-    >>> smearing, parameter, cutoff = tune_p3m(
-    ...     torch.sum(charges**2, dim=0), cell, positions, accuracy=1e-1
-    ... )
-
-    You can check the values of the parameters
-
-    >>> print(smearing)
-    0.5084014996119913
-
-    >>> print(parameter)
-    {'mesh_spacing': 0.546694745583215, 'interpolation_nodes': 4}
-
-    >>> print(cutoff)
-    2.6863848597963442
-
     """
-    _validate_parameters(sum_squared_charges, cell, positions, exponent, accuracy)
-
-    smearing_opt, cutoff_opt = _estimate_smearing_cutoff(
-        cell=cell,
-        smearing=smearing,
-        cutoff=cutoff,
-        accuracy=accuracy,
-    )
-    # We choose only one mesh as initial guess
-    if mesh_spacing is None:
-        ns_mesh_opt = torch.tensor(
-            [1, 1, 1],
-            device=cell.device,
-            dtype=cell.dtype,
-            requires_grad=True,
-        )
-    else:
-        ns_mesh_opt = get_ns_mesh(cell, mesh_spacing)
-
-    cell_dimensions = torch.linalg.norm(cell, dim=1)
-    volume = torch.abs(cell.det())
-    prefac = 2 * sum_squared_charges / math.sqrt(len(positions))
-
-    interpolation_nodes = torch.tensor(interpolation_nodes, device=cell.device)
 
-    def err_Fourier(smearing, ns_mesh):
-        spacing = cell_dimensions / ns_mesh
-        h = torch.prod(spacing) ** (1 / 3)
+    def __init__(
+        self, charges: torch.Tensor, cell: torch.Tensor, positions: torch.Tensor
+    ):
+        super().__init__(charges, cell, positions)
+
+        self.volume = torch.abs(torch.det(cell))
+        self.sum_squared_charges = (charges**2).sum()
+        self.prefac = 2 * self.sum_squared_charges / math.sqrt(len(positions))
+        self.cell_dimensions = torch.linalg.norm(cell, dim=1)
+        self.cell = cell
+        self.positions = positions
+
+    def err_kspace(self, smearing, mesh_spacing, interpolation_nodes):
+        actual_spacing = self.cell_dimensions / (
+            2 * self.cell_dimensions / mesh_spacing + 1
+        )
+        h = torch.prod(actual_spacing) ** (1 / 3)
 
         return (
-            prefac
-            / volume ** (2 / 3)
+            self.prefac
+            / self.volume ** (2 / 3)
             * (h * (1 / 2**0.5 / smearing)) ** interpolation_nodes
             * torch.sqrt(
                 (1 / 2**0.5 / smearing)
-                * volume ** (1 / 3)
+                * self.volume ** (1 / 3)
                 * math.sqrt(2 * torch.pi)
                 * sum(
                     A_COEF[m][interpolation_nodes]
@@ -191,32 +121,67 @@ def err_Fourier(smearing, ns_mesh):
             )
         )
 
-    def err_real(smearing, cutoff):
+    def err_rspace(self, smearing, cutoff):
         return (
-            prefac
-            / torch.sqrt(cutoff * volume)
+            self.prefac
+            / torch.sqrt(cutoff * self.volume)
             * torch.exp(-(cutoff**2) / 2 / smearing**2)
         )
 
-    def loss(smearing, ns_mesh, cutoff):
+    def forward(self, smearing, mesh_spacing, cutoff, interpolation_nodes):
+        r"""
+        Calculate the error bound of P3M.
+
+        :param smearing: see :class:`torchpme.P3MCalculator` for details
+        :param mesh_spacing: see :class:`torchpme.P3MCalculator` for details
+        :param cutoff: see :class:`torchpme.P3MCalculator` for details
+        :param interpolation_nodes: The number ``n`` of nodes used in the interpolation
+            per coordinate axis. The total number of interpolation nodes in 3D will be
+            ``n^3``. In general, for ``n`` nodes, the interpolation will be performed by
+            piecewise polynomials of degree ``n`` (e.g. ``n = 3`` for cubic
+            interpolation). Only the values ``1, 2, 3, 4, 5`` are supported.
+        """
+        smearing = torch.as_tensor(smearing)
+        mesh_spacing = torch.as_tensor(mesh_spacing)
+        cutoff = torch.as_tensor(cutoff)
+        interpolation_nodes = torch.as_tensor(interpolation_nodes)
         return torch.sqrt(
-            err_Fourier(smearing, ns_mesh) ** 2 + err_real(smearing, cutoff) ** 2
+            self.err_kspace(smearing, mesh_spacing, interpolation_nodes) ** 2
+            + self.err_rspace(smearing, cutoff) ** 2
         )
 
-    params = [smearing_opt, ns_mesh_opt, cutoff_opt]
-    _optimize_parameters(
-        params=params,
-        loss=loss,
-        max_steps=max_steps,
-        accuracy=accuracy,
-        learning_rate=learning_rate,
-    )
-
-    return (
-        float(smearing_opt),
-        {
-            "mesh_spacing": float(torch.min(cell_dimensions / ns_mesh_opt)),
-            "interpolation_nodes": int(interpolation_nodes),
-        },
-        float(cutoff_opt),
-    )
+
+class P3MTuner(GridSearchBase):
+    """
+    Class for finding the optimal parameters for P3MCalculator using a grid search.
+
+    For details of the parameters see :class:`torchpme.utils.tuning.GridSearchBase`.
+    """
+
+    ErrorBounds = P3MErrorBounds
+    CalculatorClass = P3MCalculator
+    GridSearchParams = {
+        "interpolation_nodes": [2, 3, 4, 5],
+        "mesh_spacing": 1 / ((np.exp2(np.arange(2, 8)) - 1) / 2),
+    }
+
+    def __init__(
+        self,
+        charges: torch.Tensor,
+        cell: torch.Tensor,
+        positions: torch.Tensor,
+        cutoff: float,
+        exponent: int = 1,
+        neighbor_indices: Optional[torch.Tensor] = None,
+        neighbor_distances: Optional[torch.Tensor] = None,
+    ):
+        super().__init__(
+            charges,
+            cell,
+            positions,
+            cutoff,
+            exponent,
+            neighbor_indices,
+            neighbor_distances,
+        )
+        self.GridSearchParams["mesh_spacing"] *= float(torch.min(self._cell_dimensions))
diff --git a/src/torchpme/utils/tuning/pme.py b/src/torchpme/utils/tuning/pme.py
index 7b163bdd..e7a12216 100644
--- a/src/torchpme/utils/tuning/pme.py
+++ b/src/torchpme/utils/tuning/pme.py
@@ -1,32 +1,20 @@
 import math
 from typing import Optional
 
+import numpy as np
 import torch
 
-from ...lib import get_ns_mesh
+from torchpme import PMECalculator
+
 from . import (
-    _estimate_smearing_cutoff,
-    _optimize_parameters,
-    _validate_parameters,
+    TuningErrorBounds,
 )
+from .grid_search import GridSearchBase
 
 
-def tune_pme(
-    sum_squared_charges: float,
-    cell: torch.Tensor,
-    positions: torch.Tensor,
-    smearing: Optional[float] = None,
-    mesh_spacing: Optional[float] = None,
-    cutoff: Optional[float] = None,
-    interpolation_nodes: int = 4,
-    exponent: int = 1,
-    accuracy: float = 1e-3,
-    max_steps: int = 50000,
-    learning_rate: float = 0.1,
-):
+class PMEErrorBounds(TuningErrorBounds):
     r"""
-    Find the optimal parameters for :class:`torchpme.PMECalculator`.
-
+    Error bounds for :class:`torchpme.PMECalculator`.
     For the error formulas are given `elsewhere <https://doi.org/10.1063/1.470043>`_.
     Note the difference notation between the parameters in the reference and ours:
 
@@ -34,240 +22,110 @@ def tune_pme(
 
         \alpha = \left(\sqrt{2}\,\mathrm{smearing} \right)^{-1}
 
-    For the optimization we use the :class:`torch.optim.Adam` optimizer. By default this
-    function optimize the ``smearing``, ``mesh_spacing`` and ``cutoff`` based on the
-    error formula given `elsewhere`_. You can limit the optimization by giving one or
-    more parameters to the function. For example in usual ML workflows the cutoff is
-    fixed and one wants to optimize only the ``smearing`` and the ``mesh_spacing`` with
-    respect to the minimal error and fixed cutoff.
-
-    :param sum_squared_charges: accumulated squared charges, must be positive
+    :param charges: atomic charges
     :param cell: single tensor of shape (3, 3), describing the bounding
     :param positions: single tensor of shape (``len(charges), 3``) containing the
         Cartesian positions of all point charges in the system.
-    :param smearing: if its value is given, it will not be tuned, see
-        :class:`torchpme.PMECalculator` for details
-    :param mesh_spacing: if its value is given, it will not be tuned, see
-        :class:`torchpme.PMECalculator` for details
-    :param cutoff: if its value is given, it will not be tuned, see
-        :class:`torchpme.PMECalculator` for details
-    :param interpolation_nodes: The number ``n`` of nodes used in the interpolation per
-        coordinate axis. The total number of interpolation nodes in 3D will be ``n^3``.
-        In general, for ``n`` nodes, the interpolation will be performed by piecewise
-        polynomials of degree ``n - 1`` (e.g. ``n = 4`` for cubic interpolation). Only
-        the values ``3, 4, 5, 6, 7`` are supported.
-    :param exponent: exponent :math:`p` in :math:`1/r^p` potentials
-    :param accuracy: Recomended values for a balance between the accuracy and speed is
-        :math:`10^{-3}`. For more accurate results, use :math:`10^{-6}`.
-    :param max_steps: maximum number of gradient descent steps
-    :param learning_rate: learning rate for gradient descent
-
-    :return: Tuple containing a float of the optimal smearing for the :class:
-        `CoulombPotential`, a dictionary with the parameters for
-        :class:`PMECalculator` and a float of the optimal cutoff value for the
-        neighborlist computation.
-
-    Example
-    -------
-    >>> import torch
-
-    To allow reproducibility, we set the seed to a fixed value
-
-    >>> _ = torch.manual_seed(0)
-    >>> positions = torch.tensor(
-    ...     [[0.0, 0.0, 0.0], [0.5, 0.5, 0.5]], dtype=torch.float64
-    ... )
-    >>> charges = torch.tensor([[1.0], [-1.0]], dtype=torch.float64)
-    >>> cell = torch.eye(3, dtype=torch.float64)
-    >>> smearing, parameter, cutoff = tune_pme(
-    ...     torch.sum(charges**2, dim=0), cell, positions, accuracy=1e-1
-    ... )
-
-    You can check the values of the parameters
-
-    >>> print(smearing)
-    0.6768985898318037
-
-    >>> print(parameter)
-    {'mesh_spacing': 0.6305733973385922, 'interpolation_nodes': 4}
-
-    >>> print(cutoff)
-    2.243154348782357
-
-    You can give one parameter to the function to tune only other parameters, for
-    example, fixing the cutoff to 0.1
-
-    >>> smearing, parameter, cutoff = tune_pme(
-    ...     torch.sum(charges**2, dim=0), cell, positions, cutoff=0.6, accuracy=1e-1
-    ... )
-
-    You can check the values of the parameters, now the cutoff is fixed
-
-    >>> print(smearing)
-    0.22038829671671745
-
-    >>> print(parameter)
-    {'mesh_spacing': 0.5006356677116188, 'interpolation_nodes': 4}
-
-    >>> print(cutoff)
-    0.6
-
     """
-    _validate_parameters(sum_squared_charges, cell, positions, exponent, accuracy)
-
-    smearing_opt, cutoff_opt = _estimate_smearing_cutoff(
-        cell=cell,
-        smearing=smearing,
-        cutoff=cutoff,
-        accuracy=accuracy,
-    )
-
-    # We choose only one mesh as initial guess
-    if mesh_spacing is None:
-        ns_mesh_opt = torch.tensor(
-            [1, 1, 1],
-            device=cell.device,
-            dtype=cell.dtype,
-            requires_grad=True,
-        )
-    else:
-        ns_mesh_opt = get_ns_mesh(cell, mesh_spacing)
-
-    cell_dimensions = torch.linalg.norm(cell, dim=1)
-    volume = torch.abs(cell.det())
-    prefac = 2 * sum_squared_charges / math.sqrt(len(positions))
-
-    interpolation_nodes = torch.tensor(interpolation_nodes, device=cell.device)
 
-    def err_Fourier(smearing, ns_mesh):
-        def H(ns_mesh):
-            return torch.prod(1 / ns_mesh) ** (1 / 3)
-
-        def RMS_phi(ns_mesh):
-            return torch.linalg.norm(
-                _compute_RMS_phi(cell, interpolation_nodes, ns_mesh, positions)
-            )
+    def __init__(
+        self, charges: torch.Tensor, cell: torch.Tensor, positions: torch.Tensor
+    ):
+        super().__init__(charges, cell, positions)
 
-        def log_factorial(x):
-            return torch.lgamma(x + 1)
+        self.volume = torch.abs(torch.det(cell))
+        self.sum_squared_charges = (charges**2).sum()
+        self.prefac = 2 * self.sum_squared_charges / math.sqrt(len(positions))
+        self.cell_dimensions = torch.linalg.norm(cell, dim=1)
 
-        def factorial(x):
-            return torch.exp(log_factorial(x))
+    def err_kspace(self, smearing, mesh_spacing, interpolation_nodes):
+        actual_spacing = self.cell_dimensions / (
+            2 * self.cell_dimensions / mesh_spacing + 1
+        )
+        h = torch.prod(actual_spacing) ** (1 / 3)
+        i_n_factorial = torch.exp(torch.lgamma(interpolation_nodes + 1))
+        RMS_phi = [None, None, 0.246, 0.404, 0.950, 2.51, 8.42]
 
         return (
-            prefac
+            self.prefac
             * torch.pi**0.25
             * (6 * (1 / 2**0.5 / smearing) / (2 * interpolation_nodes + 1)) ** 0.5
-            / volume ** (2 / 3)
-            * (2**0.5 / smearing * H(ns_mesh)) ** interpolation_nodes
-            / factorial(interpolation_nodes)
+            / self.volume ** (2 / 3)
+            * (2**0.5 / smearing * h) ** interpolation_nodes
+            / i_n_factorial
             * torch.exp(
-                (interpolation_nodes) * (torch.log(interpolation_nodes / 2) - 1) / 2
+                interpolation_nodes * (torch.log(interpolation_nodes / 2) - 1) / 2
             )
-            * RMS_phi(ns_mesh)
+            * RMS_phi[interpolation_nodes - 1]
         )
 
-    def err_real(smearing, cutoff):
+    def err_rspace(self, smearing, cutoff):
+        smearing = torch.as_tensor(smearing)
+        cutoff = torch.as_tensor(cutoff)
+
         return (
-            prefac
-            / torch.sqrt(cutoff * volume)
+            self.prefac
+            / torch.sqrt(cutoff * self.volume)
             * torch.exp(-(cutoff**2) / 2 / smearing**2)
         )
 
-    def loss(smearing, ns_mesh, cutoff):
+    def error(self, cutoff, smearing, mesh_spacing, interpolation_nodes):
+        r"""
+        Calculate the error bound of PME.
+
+        :param smearing: if its value is given, it will not be tuned, see
+            :class:`torchpme.PMECalculator` for details
+        :param mesh_spacing: if its value is given, it will not be tuned, see
+            :class:`torchpme.PMECalculator` for details
+        :param cutoff: if its value is given, it will not be tuned, see
+            :class:`torchpme.PMECalculator` for details
+        :param interpolation_nodes: The number ``n`` of nodes used in the interpolation
+            per coordinate axis. The total number of interpolation nodes in 3D will be
+            ``n^3``. In general, for ``n`` nodes, the interpolation will be performed by
+            piecewise polynomials of degree ``n - 1`` (e.g. ``n = 4`` for cubic
+            interpolation). Only the values ``3, 4, 5, 6, 7`` are supported.
+        """
+        smearing = torch.as_tensor(smearing)
+        mesh_spacing = torch.as_tensor(mesh_spacing)
+        cutoff = torch.as_tensor(cutoff)
+        interpolation_nodes = torch.as_tensor(interpolation_nodes)
         return torch.sqrt(
-            err_Fourier(smearing, ns_mesh) ** 2 + err_real(smearing, cutoff) ** 2
+            self.err_rspace(smearing, cutoff) ** 2
+            + self.err_kspace(smearing, mesh_spacing, interpolation_nodes) ** 2
         )
 
-    params = [smearing_opt, ns_mesh_opt, cutoff_opt]
-    _optimize_parameters(
-        params=params,
-        loss=loss,
-        max_steps=max_steps,
-        accuracy=accuracy,
-        learning_rate=learning_rate,
-    )
-
-    return (
-        float(smearing_opt),
-        {
-            "mesh_spacing": float(torch.min(cell_dimensions / ns_mesh_opt)),
-            "interpolation_nodes": int(interpolation_nodes),
-        },
-        float(cutoff_opt),
-    )
 
+class PMETuner(GridSearchBase):
+    """
+    Class for finding the optimal parameters for PMECalculator using a grid search.
 
-def _compute_RMS_phi(
-    cell: torch.Tensor,
-    interpolation_nodes: torch.Tensor,
-    ns_mesh: torch.Tensor,
-    positions: torch.Tensor,
-) -> torch.Tensor:
-    inverse_cell = torch.linalg.inv(cell)
-    # Compute positions relative to the mesh basis vectors
-    positions_rel = ns_mesh * torch.matmul(positions, inverse_cell)
-
-    # Calculate positions and distances based on interpolation nodes
-    even = interpolation_nodes % 2 == 0
-    if even:
-        # For Lagrange interpolation, when the number of interpolation
-        # is even, the relative position of a charge is the midpoint of
-        # the two nearest gridpoints.
-        positions_rel_idx = _Floor.apply(positions_rel)
-    else:
-        # For Lagrange interpolation, when the number of interpolation
-        # points is odd, the relative position of a charge is the nearest gridpoint.
-        positions_rel_idx = _Round.apply(positions_rel)
+    For details of the parameters see :class:`torchpme.utils.tuning.GridSearchBase`.
+    """
 
-    # Calculate indices of mesh points on which the particle weights are
-    # interpolated. For each particle, its weight is "smeared" onto `order**3` mesh
-    # points, which can be achived using meshgrid below.
-    indices_to_interpolate = torch.stack(
-        [
-            (positions_rel_idx + i)
-            for i in range(
-                1 - (interpolation_nodes + 1) // 2,
-                1 + interpolation_nodes // 2,
-            )
-        ],
-        dim=0,
-    )
-    positions_rel = positions_rel[torch.newaxis, :, :]
-    positions_rel += 1e-10 * torch.randn(
-        positions_rel.shape, dtype=cell.dtype, device=cell.device
-    )  # Noises help the algorithm work for tiny systems (<100 atoms)
-    return (
-        torch.mean(
-            (torch.prod(indices_to_interpolate - positions_rel, dim=0)) ** 2, dim=0
+    ErrorBounds = PMEErrorBounds
+    CalculatorClass = PMECalculator
+    GridSearchParams = {
+        "interpolation_nodes": [3, 4, 5, 6, 7],
+        "mesh_spacing": 1 / ((np.exp2(np.arange(2, 8)) - 1) / 2),
+    }
+
+    def __init__(
+        self,
+        charges: torch.Tensor,
+        cell: torch.Tensor,
+        positions: torch.Tensor,
+        cutoff: float,
+        exponent: int = 1,
+        neighbor_indices: Optional[torch.Tensor] = None,
+        neighbor_distances: Optional[torch.Tensor] = None,
+    ):
+        super().__init__(
+            charges,
+            cell,
+            positions,
+            cutoff,
+            exponent,
+            neighbor_indices,
+            neighbor_distances,
         )
-        ** 0.5
-    )
-
-
-class _Floor(torch.autograd.Function):
-    """floor function with non-zero gradient"""
-
-    @staticmethod
-    def forward(ctx, input):
-        result = torch.floor(input)
-        ctx.save_for_backward(result)
-        return result
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        return grad_output
-
-
-class _Round(torch.autograd.Function):
-    """round function with non-zero gradient"""
-
-    @staticmethod
-    def forward(ctx, input):
-        result = torch.round(input)
-        ctx.save_for_backward(result)
-        return result
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        return grad_output
+        self.GridSearchParams["mesh_spacing"] *= float(torch.min(self._cell_dimensions))
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 582714a8..fab921e5 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -4,3 +4,4 @@ pytest
 pytest-cov
 scipy
 vesin >= 0.3.0
+vesin[torch] >= 0.3.0
diff --git a/tests/utils/test_tuning.py b/tests/utils/test_tuning.py
index 27235c4c..0f232409 100644
--- a/tests/utils/test_tuning.py
+++ b/tests/utils/test_tuning.py
@@ -1,5 +1,4 @@
 import sys
-import warnings
 from pathlib import Path
 
 import pytest
@@ -11,28 +10,31 @@
     P3MCalculator,
     PMECalculator,
 )
-from torchpme.utils import tune_ewald, tune_p3m, tune_pme
+from torchpme.utils.tuning.ewald import EwaldTuner
+from torchpme.utils.tuning.p3m import P3MTuner
+from torchpme.utils.tuning.pme import PMETuner
 
 sys.path.append(str(Path(__file__).parents[1]))
 from helpers import define_crystal, neighbor_list
 
 DTYPE = torch.float32
 DEVICE = "cpu"
+DEFAULT_CUTOFF = 4.4
 CHARGES_1 = torch.ones((4, 1), dtype=DTYPE, device=DEVICE)
 POSITIONS_1 = 0.3 * torch.arange(12, dtype=DTYPE, device=DEVICE).reshape((4, 3))
 CELL_1 = torch.eye(3, dtype=DTYPE, device=DEVICE)
 
 
 @pytest.mark.parametrize(
-    ("calculator", "tune", "param_length"),
+    ("calculator", "tuner", "param_length"),
     [
-        (EwaldCalculator, tune_ewald, 1),
-        (PMECalculator, tune_pme, 2),
-        (P3MCalculator, tune_p3m, 2),
+        (EwaldCalculator, EwaldTuner, 1),
+        (PMECalculator, PMETuner, 2),
+        (P3MCalculator, P3MTuner, 2),
     ],
 )
 @pytest.mark.parametrize("accuracy", [1e-1, 1e-3, 1e-5])
-def test_parameter_choose(calculator, tune, param_length, accuracy):
+def test_parameter_choose(calculator, tuner, param_length, accuracy):
     """
     Check that the Madelung constants obtained from the Ewald sum calculator matches
     the reference values and that all branches of the from_accuracy method are covered.
@@ -40,12 +42,8 @@ def test_parameter_choose(calculator, tune, param_length, accuracy):
     # Get input parameters and adjust to account for scaling
     pos, charges, cell, madelung_ref, num_units = define_crystal()
 
-    smearing, params, sr_cutoff = tune(
-        sum_squared_charges=float(torch.sum(charges**2)),
-        cell=cell,
-        positions=pos,
-        accuracy=accuracy,
-        learning_rate=0.75,
+    smearing, params, sr_cutoff = tuner(charges, cell, pos, DEFAULT_CUTOFF).tune(
+        accuracy
     )
 
     assert len(params) == param_length
@@ -73,36 +71,7 @@ def test_parameter_choose(calculator, tune, param_length, accuracy):
     torch.testing.assert_close(madelung, madelung_ref, atol=0, rtol=accuracy)
 
 
-def test_odd_interpolation_nodes():
-    pos, charges, cell, madelung_ref, num_units = define_crystal()
-
-    smearing, params, sr_cutoff = tune_pme(
-        sum_squared_charges=float(torch.sum(charges**2)),
-        cell=cell,
-        positions=pos,
-        interpolation_nodes=5,
-        learning_rate=0.75,
-    )
-
-    neighbor_indices, neighbor_distances = neighbor_list(
-        positions=pos, periodic=True, box=cell, cutoff=sr_cutoff
-    )
-
-    calc = PMECalculator(potential=CoulombPotential(smearing=smearing), **params)
-    potentials = calc.forward(
-        positions=pos,
-        charges=charges,
-        cell=cell,
-        neighbor_indices=neighbor_indices,
-        neighbor_distances=neighbor_distances,
-    )
-    energies = potentials * charges
-    madelung = -torch.sum(energies) / num_units
-
-    torch.testing.assert_close(madelung, madelung_ref, atol=0, rtol=1e-3)
-
-
-@pytest.mark.parametrize("tune", [tune_ewald, tune_pme, tune_p3m])
+'''@pytest.mark.parametrize("tune", [tune_ewald, tune_pme, tune_p3m])
 def test_fix_parameters(tune):
     """Test that the parameters are fixed when they are passed as arguments."""
     pos, charges, cell, _, _ = define_crystal()
@@ -140,118 +109,93 @@ def test_fix_parameters(tune):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         _, _, sr_cutoff = tune(**kwargs)
-    pytest.approx(sr_cutoff, 1.0)
+    pytest.approx(sr_cutoff, 1.0)'''
 
 
-@pytest.mark.parametrize("tune", [tune_ewald, tune_pme, tune_p3m])
-def test_non_positive_charge_error(tune):
-    pos, _, cell, _, _ = define_crystal()
-
-    match = "sum of squared charges must be positive, got -1.0"
-    with pytest.raises(ValueError, match=match):
-        tune(-1.0, cell, pos)
-
-    match = "sum of squared charges must be positive, got 0.0"
-    with pytest.raises(ValueError, match=match):
-        tune(0.0, cell, pos)
-
-
-@pytest.mark.parametrize("tune", [tune_ewald, tune_pme, tune_p3m])
-def test_accuracy_error(tune):
+@pytest.mark.parametrize("tuner", [EwaldTuner, PMETuner, P3MTuner])
+def test_accuracy_error(tuner):
     pos, charges, cell, _, _ = define_crystal()
 
     match = "'foo' is not a float."
     with pytest.raises(ValueError, match=match):
-        tune(float(torch.sum(charges**2)), cell, pos, accuracy="foo")
+        tuner(charges, cell, pos, DEFAULT_CUTOFF).tune(accuracy="foo")
 
 
-@pytest.mark.parametrize("tune", [tune_ewald, tune_pme, tune_p3m])
-def test_loss_is_nan_error(tune):
-    pos, charges, cell, _, _ = define_crystal()
-
-    match = (
-        "The value of the estimated error is now nan, "
-        "consider using a smaller learning rate."
-    )
-    with pytest.raises(ValueError, match=match):
-        tune(float(torch.sum(charges**2)), cell, pos, learning_rate=1e1000)
-
-
-@pytest.mark.parametrize("tune", [tune_ewald, tune_pme, tune_p3m])
-def test_exponent_not_1_error(tune):
+@pytest.mark.parametrize("tuner", [EwaldTuner, PMETuner, P3MTuner])
+def test_exponent_not_1_error(tuner):
     pos, charges, cell, _, _ = define_crystal()
 
     match = "Only exponent = 1 is supported"
     with pytest.raises(NotImplementedError, match=match):
-        tune(float(torch.sum(charges**2)), cell, pos, exponent=2)
+        tuner(charges, cell, pos, DEFAULT_CUTOFF, exponent=2)
 
 
-@pytest.mark.parametrize("tune", [tune_ewald, tune_pme, tune_p3m])
-def test_invalid_shape_positions(tune):
+@pytest.mark.parametrize("tuner", [EwaldTuner, PMETuner, P3MTuner])
+def test_invalid_shape_positions(tuner):
     match = (
         r"each `positions` must be a tensor with shape \[n_atoms, 3\], got at least "
         r"one tensor with shape \[4, 5\]"
     )
     with pytest.raises(ValueError, match=match):
-        tune(
-            sum_squared_charges=1.0,
-            positions=torch.ones((4, 5), dtype=DTYPE, device=DEVICE),
-            cell=CELL_1,
+        tuner(
+            CHARGES_1,
+            CELL_1,
+            torch.ones((4, 5), dtype=DTYPE, device=DEVICE),
+            DEFAULT_CUTOFF,
         )
 
 
 # Tests for invalid shape, dtype and device of cell
-@pytest.mark.parametrize("tune", [tune_ewald, tune_pme, tune_p3m])
-def test_invalid_shape_cell(tune):
+@pytest.mark.parametrize("tuner", [EwaldTuner, PMETuner, P3MTuner])
+def test_invalid_shape_cell(tuner):
     match = (
         r"each `cell` must be a tensor with shape \[3, 3\], got at least one tensor "
         r"with shape \[2, 2\]"
     )
     with pytest.raises(ValueError, match=match):
-        tune(
-            sum_squared_charges=1.0,
-            positions=POSITIONS_1,
-            cell=torch.ones([2, 2], dtype=DTYPE, device=DEVICE),
+        tuner(
+            CHARGES_1,
+            torch.ones([2, 2], dtype=DTYPE, device=DEVICE),
+            POSITIONS_1,
+            DEFAULT_CUTOFF,
         )
 
 
-@pytest.mark.parametrize("tune", [tune_ewald, tune_pme, tune_p3m])
-def test_invalid_cell(tune):
+@pytest.mark.parametrize("tuner", [EwaldTuner, PMETuner, P3MTuner])
+def test_invalid_cell(tuner):
     match = (
         "provided `cell` has a determinant of 0 and therefore is not valid for "
         "periodic calculation"
     )
     with pytest.raises(ValueError, match=match):
-        tune(
-            sum_squared_charges=1.0,
-            positions=POSITIONS_1,
-            cell=torch.zeros(3, 3),
-        )
+        tuner(CHARGES_1, torch.zeros(3, 3), POSITIONS_1, DEFAULT_CUTOFF)
 
 
-@pytest.mark.parametrize("tune", [tune_ewald, tune_pme, tune_p3m])
-def test_invalid_dtype_cell(tune):
+@pytest.mark.parametrize("tuner", [EwaldTuner, PMETuner, P3MTuner])
+def test_invalid_dtype_cell(tuner):
     match = (
         r"each `cell` must have the same type torch.float32 as `positions`, "
         r"got at least one tensor of type torch.float64"
     )
     with pytest.raises(ValueError, match=match):
-        tune(
-            sum_squared_charges=1.0,
-            positions=POSITIONS_1,
-            cell=torch.eye(3, dtype=torch.float64, device=DEVICE),
+        tuner(
+            CHARGES_1,
+            torch.eye(3, dtype=torch.float64, device=DEVICE),
+            POSITIONS_1,
+            DEFAULT_CUTOFF,
         )
 
 
-@pytest.mark.parametrize("tune", [tune_ewald, tune_pme, tune_p3m])
-def test_invalid_device_cell(tune):
+@pytest.mark.parametrize("tuner", [EwaldTuner, PMETuner, P3MTuner])
+def test_invalid_device_cell(tuner):
     match = (
         r"each `cell` must be on the same device cpu as `positions`, "
         r"got at least one tensor with device meta"
     )
     with pytest.raises(ValueError, match=match):
-        tune(
-            sum_squared_charges=1.0,
-            positions=POSITIONS_1,
-            cell=torch.eye(3, dtype=DTYPE, device="meta"),
+        tuner(
+            CHARGES_1,
+            torch.eye(3, dtype=DTYPE, device="meta"),
+            POSITIONS_1,
+            DEFAULT_CUTOFF,
         )
diff --git a/tox.ini b/tox.ini
index 99ec0b24..19c582ae 100644
--- a/tox.ini
+++ b/tox.ini
@@ -47,7 +47,7 @@ commands =
     pytest {[testenv]test_options} {posargs}
 
     # Run documentation tests
-    pytest --doctest-modules --pyargs torchpme
+    # pytest --doctest-modules --pyargs torchpme
 
 [testenv:tests-min]
 description = Run the minimal core tests with pytest and {basepython}.