diff --git a/ivmodels/tests/lagrange_multiplier.py b/ivmodels/tests/lagrange_multiplier.py
index 0fa0e6b..5d0ec0f 100644
--- a/ivmodels/tests/lagrange_multiplier.py
+++ b/ivmodels/tests/lagrange_multiplier.py
@@ -1,66 +1,218 @@
 import numpy as np
 import scipy
 
-from ivmodels.models.kclass import KClass
 from ivmodels.tests.utils import _check_test_inputs
 from ivmodels.utils import oproj, proj
 
 
-def _LM(X, X_proj, Y, Y_proj, W, W_proj, beta):
+class _LM:
     """
-    Compute the Lagrange multiplier test statistic and its derivative at ``beta``.
+    Helper class to compute the Lagrange multiplier test statistic and its derivative.
 
     Parameters
     ----------
     X: np.ndarray of dimension (n, mx)
         Regressors.
-    X_proj: np.ndarray of dimension (n, mx)
-        Projection of ``X`` onto the column space of ``Z``.
-    Y: np.ndarray of dimension (n,)
+    Y: np.ndarray of dimension (n,) or (n, 1)
         Outcomes.
-    Y_proj: np.ndarray of dimension (n,)
-        Projection of ``Y`` onto the column space of ``Z``.
     W: np.ndarray of dimension (n, mw)
-        Regressors.
-    W_proj: np.ndarray of dimension (n, mw)
+        Regressors to minimize over.
+    dof: int
+        Degrees of freedom for variance computation. Equal to n - k - mc.
+    Z: np.ndarray of dimension (n, k), optional, default=None
+        Instruments. Either ``Z`` or ``X_proj``, ``Y_proj``, and ``W_proj`` must be
+        provided.
+    X_proj: np.ndarray of dimension (n, mx), optional, default=None
+        Projection of ``X`` onto the column space of ``Z``.
+    Y_proj: np.ndarray of dimension (n,) or (n, 1), optional, default=None
+        Projection of ``Y`` onto the column space of ``Z``.
+    W_proj: np.ndarray of dimension (n, mw), optional, default=None
         Projection of ``W`` onto the column space of ``Z``.
-    beta: np.ndarray of dimension (mx,)
-        Coefficients to test.
-
-    Returns
-    -------
-    lm: float
-        The Lagrange multiplier test statistic.
-    d_lm: float
-        The derivative of the Lagrange multiplier test statistic at ``beta``.
     """
-    n = X.shape[0]
 
-    residuals = Y - X @ beta
-    residuals_proj = Y_proj - X_proj @ beta
-    residuals_orth = residuals - residuals_proj
+    def __init__(self, X, y, W, dof, Z=None, X_proj=None, y_proj=None, W_proj=None):
+
+        self.X = X
+        self.y = y.reshape(-1, 1)
+        self.W = W
+        if X_proj is None or y_proj is None or W_proj is None:
+            if Z is None:
+                raise ValueError("Z must be provided to compute the projection.")
+            else:
+                X_proj, y_proj, W_proj = proj(Z, self.X, self.y, self.W)
+
+        self.X_proj = X_proj
+        self.X_orth = X - X_proj
+        self.y_proj = y_proj.reshape(-1, 1)
+        self.y_orth = self.y - self.y_proj
+        self.W_proj = W_proj
+        self.W_orth = W - W_proj
+        self.yS = np.hstack((self.y, X, W))
+        self.yS_proj = np.hstack((self.y_proj, X_proj, W_proj))
+        self.yS_orth = np.hstack((self.y_orth, self.X_orth, self.W_orth))
+
+        self.n, self.mx = X.shape
+        self.mw = W.shape[1]
+        self.dof = dof
+
+        # precomputation for Sigma, sigma_hat, and Omega_hat for liml
+        self.yS_orth_at_yS = self.yS_orth.T @ self.yS_orth
+        # for liml
+        self.yS_proj_at_yS = self.yS_proj.T @ self.yS_proj
+
+    def derivative(self, beta, gamma=None):
+        """Return LM and derivative of LM at beta, gamma w.r.t. (beta, gamma)."""
+        if gamma is not None:
+            one_beta_gamma = np.hstack(([1], -beta.flatten(), -gamma.flatten()))
+        else:
+            one_beta_gamma = np.hstack(([1], -beta.flatten()))
+
+        residuals = self.yS @ one_beta_gamma
+        residuals_proj = self.yS_proj @ one_beta_gamma
+        # residuals_orth = residuals - residuals_proj
+
+        Sigma = one_beta_gamma.T @ self.yS_orth_at_yS
+        sigma_hat = Sigma @ one_beta_gamma
+        Sigma = Sigma[1:] / sigma_hat
+
+        St = self.yS[:, 1:] - np.outer(residuals, Sigma)
+        St_proj = self.yS_proj[:, 1:] - np.outer(residuals_proj, Sigma)
+
+        solved = np.linalg.solve(St_proj.T @ St_proj, St_proj.T @ residuals_proj)
+        residuals_proj_St = St_proj @ solved
+
+        lm = residuals_proj_St.T @ residuals_proj_St / sigma_hat
+        ar = residuals_proj.T @ residuals_proj / sigma_hat
+        kappa = ar - lm
+
+        first_term = -St_proj.T @ residuals_proj
+        second_term = kappa * (St - St_proj).T @ St @ solved
+
+        d_lm = 2 * (first_term + second_term) / sigma_hat
+
+        return (self.dof * lm.item(), self.dof * d_lm.flatten())
+
+    def lm(self, beta):
+        beta = beta.reshape(-1, 1)
+
+        one_beta_id = np.zeros((1 + self.mx + self.mw, 1 + self.mw))
+        one_beta_id[0, 0] = 1
+        one_beta_id[1 : (1 + self.mx), 0] = -beta[:, 0]
+        one_beta_id[(1 + self.mx) :, 1:] = np.diag(np.ones(self.mw))
+
+        gamma_liml = scipy.linalg.eigh(
+            b=one_beta_id.T @ self.yS_orth_at_yS @ one_beta_id,
+            a=one_beta_id.T @ self.yS_proj_at_yS @ one_beta_id,
+            subset_by_index=[0, 0],
+        )[1][:, 0]
+        gamma_liml = -gamma_liml[1:] / gamma_liml[0]
+
+        def _derivative(gamma):
+            result = self.derivative(beta, gamma)
+            return (result[0], result[1][self.mx :])
+
+        res1 = scipy.optimize.minimize(
+            _derivative,
+            jac=True,
+            x0=gamma_liml,
+        )
+
+        res2 = scipy.optimize.minimize(
+            _derivative,
+            jac=True,
+            x0=np.zeros_like(gamma_liml),
+        )
+
+        return min(res1.fun, res2.fun)
+
+    def lm_and_derivative(self, betas, gamma_0):
+        assert len(betas.shape) == 2
+        assert betas.shape[1] == self.mx
+
+        lms = np.zeros(betas.shape[0])
+        derivatives = np.zeros((betas.shape[0], self.mx))
+
+        class _derivatives:
+            """Helper class to recover last derivative called in the optimization."""
 
-    sigma_hat = residuals_orth.T @ residuals_orth
+            def __init__(self, beta):
+                self.beta = beta
+                self.jac = None
 
-    S = np.hstack((X, W))
-    S_proj = np.hstack((X_proj, W_proj))
-    Sigma = residuals_orth.T @ S / sigma_hat
-    St = S - np.outer(residuals, Sigma)
-    St_proj = S_proj - np.outer(residuals_proj, Sigma)
+            def __call__(self, gamma):
+                self.derivative = self.derivative(self.beta, gamma)
+                return (self.derivative[0], self.derivative[1][self.mx :])
 
-    solved = np.linalg.solve(St_proj.T @ St_proj, St_proj.T @ residuals_proj)
-    residuals_proj_St = St_proj @ solved
+        for idx in betas.shape[0]:
+            _derivative = _derivatives(betas[idx, :])
+            res = scipy.optimize.minimize(
+                _derivative,
+                jac=True,
+                x0=gamma_0,
+            )
 
-    lm = residuals_proj_St.T @ residuals_proj_St / sigma_hat
-    ar = residuals_proj.T @ residuals_proj / sigma_hat
-    kappa = ar - lm
+            lms[idx] = res.fun
+            derivatives[idx, :] = _derivative.derivative[: self.mx]
+            gamma_0 = res.x
 
-    first_term = -St_proj[:, : X.shape[1]].T @ residuals_proj
-    second_term = kappa * (St - St_proj)[:, : X.shape[1]].T @ St @ solved
+        return lms, derivatives
 
-    d_lm = 2 * (first_term + second_term) / sigma_hat
 
-    return (n * lm.item(), n * d_lm.flatten())
+# def _LM(X, X_proj, Y, Y_proj, W, W_proj, beta):
+#     """
+#     Compute the Lagrange multiplier test statistic and its derivative at ``beta``.
+
+#     Parameters
+#     ----------
+#     X: np.ndarray of dimension (n, mx)
+#         Regressors.
+#     X_proj: np.ndarray of dimension (n, mx)
+#         Projection of ``X`` onto the column space of ``Z``.
+#     Y: np.ndarray of dimension (n,)
+#         Outcomes.
+#     Y_proj: np.ndarray of dimension (n,)
+#         Projection of ``Y`` onto the column space of ``Z``.
+#     W: np.ndarray of dimension (n, mw)
+#         Regressors.
+#     W_proj: np.ndarray of dimension (n, mw)
+#         Projection of ``W`` onto the column space of ``Z``.
+#     beta: np.ndarray of dimension (mx,)
+#         Coefficients to test.
+
+#     Returns
+#     -------
+#     lm: float
+#         The Lagrange multiplier test statistic.
+#     d_lm: float
+#         The derivative of the Lagrange multiplier test statistic at ``beta``.
+#     """
+#     n = X.shape[0]
+
+#     residuals = Y - X @ beta
+#     residuals_proj = Y_proj - X_proj @ beta
+#     residuals_orth = residuals - residuals_proj
+
+#     sigma_hat = residuals_orth.T @ residuals_orth
+
+#     S = np.hstack((X, W))
+#     S_proj = np.hstack((X_proj, W_proj))
+#     Sigma = residuals_orth.T @ S / sigma_hat
+#     St = S - np.outer(residuals, Sigma)
+#     St_proj = S_proj - np.outer(residuals_proj, Sigma)
+
+#     solved = np.linalg.solve(St_proj.T @ St_proj, St_proj.T @ residuals_proj)
+#     residuals_proj_St = St_proj @ solved
+
+#     lm = residuals_proj_St.T @ residuals_proj_St / sigma_hat
+#     ar = residuals_proj.T @ residuals_proj / sigma_hat
+#     kappa = ar - lm
+
+#     first_term = -St_proj[:, : X.shape[1]].T @ residuals_proj
+#     second_term = kappa * (St - St_proj)[:, : X.shape[1]].T @ St @ solved
+
+#     d_lm = 2 * (first_term + second_term) / sigma_hat
+
+#     return (n * lm.item(), n * d_lm.flatten())
 
 
 def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True):
@@ -130,45 +282,21 @@ def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True):
     if C.shape[1] > 0:
         X, y, Z, W = oproj(C, X, y, Z, W)
 
-    residuals = y - X @ beta
-
-    X_proj, W_proj, residuals_proj = proj(Z, X, W, residuals)
-
     if W.shape[1] > 0:
-        gamma_hat = KClass(kappa="liml").fit(X=W, y=y - X @ beta, Z=Z).coef_
-        res = scipy.optimize.minimize(
-            lambda gamma: _LM(
-                X=W,
-                X_proj=W_proj,
-                Y=residuals,
-                Y_proj=residuals_proj,
-                W=X,
-                W_proj=X_proj,
-                beta=gamma,
-            ),
-            jac=True,
-            x0=gamma_hat,
-        )
-
-        res2 = scipy.optimize.minimize(
-            lambda gamma: _LM(
-                X=W,
-                X_proj=W_proj,
-                Y=residuals,
-                Y_proj=residuals_proj,
-                W=X,
-                W_proj=X_proj,
-                beta=gamma,
-            ),
-            jac=True,
-            x0=np.zeros_like(gamma_hat),
-        )
-
-        statistic = min(res.fun, res2.fun) / n * (n - k - C.shape[1])
+        statistic = _LM(
+            X=X,
+            y=y,
+            W=W,
+            Z=Z,
+            dof=n - k - C.shape[1],
+        ).lm(beta)
 
         p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx)
 
     else:
+        residuals = y - X @ beta
+        residuals_proj, X_proj = proj(Z, residuals, X)
+
         orth_residuals = residuals - residuals_proj
 
         sigma_hat = residuals.T @ orth_residuals
diff --git a/tests/tests/test_lagrange_multiplier.py b/tests/tests/test_lagrange_multiplier.py
index 6b41013..bc0c891 100644
--- a/tests/tests/test_lagrange_multiplier.py
+++ b/tests/tests/test_lagrange_multiplier.py
@@ -2,46 +2,88 @@
 import pytest
 import scipy
 
+from ivmodels.simulate import simulate_gaussian_iv
 from ivmodels.tests.lagrange_multiplier import _LM
 from ivmodels.utils import proj
 
 
 @pytest.mark.parametrize(
-    "n, mx, mc, k, u",
-    [(100, 1, 1, 2, 1), (100, 1, 2, 5, 2), (1000, 2, 5, 10, 2), (1000, 5, 2, 10, 2)],
+    "n, mx, mw",
+    [(100, 1, 1)],
 )
-def test_lm_gradient(n, mx, mc, k, u):
+def test__LM_raises(n, mx, mw):
     rng = np.random.RandomState(0)
+    X, W, y = rng.randn(n, mx), rng.randn(n, mw), rng.randn(n)
 
-    delta_X = rng.normal(0, 1, (u, mx))
-    delta_W = rng.normal(0, 1, (u, mc))
-    delta_y = rng.normal(0, 1, (u, 1))
+    with pytest.raises(ValueError, match="Z must"):
+        _LM(X=X, y=y, W=W, dof=7)
 
-    beta = rng.normal(0, 0.1, (mx, 1))
-    gamma = 10 * rng.normal(0, 0.1, (mc, 1))
 
-    Pi_X = rng.normal(0, 1, (k, mx))
-    Pi_W = rng.normal(0, 1, (k, mc))
+@pytest.mark.parametrize(
+    "n, mx, mw, k",
+    [(100, 1, 1, 2), (100, 1, 2, 5), (1000, 2, 5, 10), (1000, 5, 2, 10)],
+)
+def test__LM__init__(n, mx, mw, k):
+    rng = np.random.RandomState(0)
+    X, W, y, Z = rng.randn(n, mx), rng.randn(n, mw), rng.randn(n), rng.randn(n, k)
+
+    X_proj, W_proj, y_proj = proj(Z, X, W, y)
 
-    U = rng.normal(0, 1, (n, u))
+    lm1 = _LM(X=X, y=y, W=W, dof=7, X_proj=X_proj, y_proj=y_proj, W_proj=W_proj)
+    lm2 = _LM(X=X, y=y, W=W, dof=7, Z=Z)
+    assert lm1.__dict__.keys() == lm2.__dict__.keys()
+    assert all(
+        [
+            np.all(np.isclose(lm1.__dict__[k], lm2.__dict__[k]))
+            for k in lm1.__dict__.keys()
+        ]
+    )
 
-    Z = rng.normal(0, 1, (n, k))
-    X = Z @ Pi_X + U @ delta_X + rng.normal(0, 1, (n, mx))
-    W = Z @ Pi_W + U @ delta_W + rng.normal(0, 1, (n, mc))
-    y = X @ beta + W @ gamma + U @ delta_y + rng.normal(0, 1, (n, 1))
 
-    X_proj = proj(Z, X)
-    W_proj = proj(Z, W)
-    y_proj = proj(Z, y)
+@pytest.mark.parametrize(
+    "n, mx, mw, k",
+    [(100, 1, 1, 2), (100, 1, 2, 5), (1000, 2, 5, 10), (1000, 5, 2, 10)],
+)
+def test_lm_gradient(n, mx, mw, k):
+    Z, X, y, _, W = simulate_gaussian_iv(
+        n=n, mx=mx, k=k, mw=mw, include_intercept=False
+    )
+    lm = _LM(X=X, y=y, W=W, dof=7, Z=Z)
 
+    rng = np.random.RandomState(0)
     for _ in range(5):
-        beta_test = rng.normal(0, 0.1, (mx, 1))
+        beta_gamma_test = rng.normal(0, 1, (mx + mw))
 
         grad_approx = scipy.optimize.approx_fprime(
-            beta_test.flatten(),
-            lambda b: _LM(X, X_proj, y, y_proj, W, W_proj, b.reshape(-1, 1))[0],
+            beta_gamma_test,
+            lambda b: lm.derivative(b)[0],
             1e-6,
         )
-        grad = _LM(X, X_proj, y, y_proj, W, W_proj, beta_test.reshape(-1, 1))[1]
+        grad = lm.derivative(beta_gamma_test)[1]
+
+        assert np.allclose(grad, grad_approx, rtol=5e-5, atol=5e-5)
+
+
+@pytest.mark.parametrize(
+    "n, mx, mw, k",
+    [(100, 2, 5, 10), (100, 5, 2, 10)],
+)
+def test_lm_gradient_beta_gamma(n, mx, mw, k):
+    Z, X, y, _, W = simulate_gaussian_iv(
+        n=n, mx=mx, k=k, mw=mw, include_intercept=False
+    )
+    lm = _LM(X=X, y=y, W=W, dof=7, Z=Z)
+
+    rng = np.random.RandomState(0)
+    beta = rng.normal(0, 1, mx)
+    gamma = rng.normal(0, 1, mw)
+
+    assert np.allclose(
+        lm.derivative(np.concatenate([beta, gamma]))[0],
+        lm.derivative(beta, gamma)[0],
+    )
 
-        assert np.allclose(grad.flatten(), grad_approx, rtol=1e-4, atol=1e-4)
+    assert np.allclose(
+        lm.derivative(np.concatenate([beta, gamma]))[1],
+        lm.derivative(beta, gamma)[1],
+    )