New class: _LM

mlondschien · Jul 3, 2024 · dc4d60a · dc4d60a
1 parent 5a0390c
commit dc4d60a
Show file tree

Hide file tree

Showing 2 changed files with 265 additions and 95 deletions.
diff --git a/ivmodels/tests/lagrange_multiplier.py b/ivmodels/tests/lagrange_multiplier.py
@@ -1,66 +1,218 @@
 import numpy as np
 import scipy
 
-from ivmodels.models.kclass import KClass
 from ivmodels.tests.utils import _check_test_inputs
 from ivmodels.utils import oproj, proj
 
 
-def _LM(X, X_proj, Y, Y_proj, W, W_proj, beta):
+class _LM:
     """
-    Compute the Lagrange multiplier test statistic and its derivative at ``beta``.
+    Helper class to compute the Lagrange multiplier test statistic and its derivative.
 
     Parameters
     ----------
     X: np.ndarray of dimension (n, mx)
         Regressors.
-    X_proj: np.ndarray of dimension (n, mx)
-        Projection of ``X`` onto the column space of ``Z``.
-    Y: np.ndarray of dimension (n,)
+    Y: np.ndarray of dimension (n,) or (n, 1)
         Outcomes.
-    Y_proj: np.ndarray of dimension (n,)
-        Projection of ``Y`` onto the column space of ``Z``.
     W: np.ndarray of dimension (n, mw)
-        Regressors.
-    W_proj: np.ndarray of dimension (n, mw)
+        Regressors to minimize over.
+    dof: int
+        Degrees of freedom for variance computation. Equal to n - k - mc.
+    Z: np.ndarray of dimension (n, k), optional, default=None
+        Instruments. Either ``Z`` or ``X_proj``, ``Y_proj``, and ``W_proj`` must be
+        provided.
+    X_proj: np.ndarray of dimension (n, mx), optional, default=None
+        Projection of ``X`` onto the column space of ``Z``.
+    Y_proj: np.ndarray of dimension (n,) or (n, 1), optional, default=None
+        Projection of ``Y`` onto the column space of ``Z``.
+    W_proj: np.ndarray of dimension (n, mw), optional, default=None
         Projection of ``W`` onto the column space of ``Z``.
-    beta: np.ndarray of dimension (mx,)
-        Coefficients to test.
-
-    Returns
-    -------
-    lm: float
-        The Lagrange multiplier test statistic.
-    d_lm: float
-        The derivative of the Lagrange multiplier test statistic at ``beta``.
     """
-    n = X.shape[0]
 
-    residuals = Y - X @ beta
-    residuals_proj = Y_proj - X_proj @ beta
-    residuals_orth = residuals - residuals_proj
+    def __init__(self, X, y, W, dof, Z=None, X_proj=None, y_proj=None, W_proj=None):
+
+        self.X = X
+        self.y = y.reshape(-1, 1)
+        self.W = W
+        if X_proj is None or y_proj is None or W_proj is None:
+            if Z is None:
+                raise ValueError("Z must be provided to compute the projection.")
+            else:
+                X_proj, y_proj, W_proj = proj(Z, self.X, self.y, self.W)
+
+        self.X_proj = X_proj
+        self.X_orth = X - X_proj
+        self.y_proj = y_proj.reshape(-1, 1)
+        self.y_orth = self.y - self.y_proj
+        self.W_proj = W_proj
+        self.W_orth = W - W_proj
+        self.yS = np.hstack((self.y, X, W))
+        self.yS_proj = np.hstack((self.y_proj, X_proj, W_proj))
+        self.yS_orth = np.hstack((self.y_orth, self.X_orth, self.W_orth))
+
+        self.n, self.mx = X.shape
+        self.mw = W.shape[1]
+        self.dof = dof
+
+        # precomputation for Sigma, sigma_hat, and Omega_hat for liml
+        self.yS_orth_at_yS = self.yS_orth.T @ self.yS_orth
+        # for liml
+        self.yS_proj_at_yS = self.yS_proj.T @ self.yS_proj
+
+    def derivative(self, beta, gamma=None):
+        """Return LM and derivative of LM at beta, gamma w.r.t. (beta, gamma)."""
+        if gamma is not None:
+            one_beta_gamma = np.hstack(([1], -beta.flatten(), -gamma.flatten()))
+        else:
+            one_beta_gamma = np.hstack(([1], -beta.flatten()))
+
+        residuals = self.yS @ one_beta_gamma
+        residuals_proj = self.yS_proj @ one_beta_gamma
+        # residuals_orth = residuals - residuals_proj
+
+        Sigma = one_beta_gamma.T @ self.yS_orth_at_yS
+        sigma_hat = Sigma @ one_beta_gamma
+        Sigma = Sigma[1:] / sigma_hat
+
+        St = self.yS[:, 1:] - np.outer(residuals, Sigma)
+        St_proj = self.yS_proj[:, 1:] - np.outer(residuals_proj, Sigma)
+
+        solved = np.linalg.solve(St_proj.T @ St_proj, St_proj.T @ residuals_proj)
+        residuals_proj_St = St_proj @ solved
+
+        lm = residuals_proj_St.T @ residuals_proj_St / sigma_hat
+        ar = residuals_proj.T @ residuals_proj / sigma_hat
+        kappa = ar - lm
+
+        first_term = -St_proj.T @ residuals_proj
+        second_term = kappa * (St - St_proj).T @ St @ solved
+
+        d_lm = 2 * (first_term + second_term) / sigma_hat
+
+        return (self.dof * lm.item(), self.dof * d_lm.flatten())
+
+    def lm(self, beta):
+        beta = beta.reshape(-1, 1)
+
+        one_beta_id = np.zeros((1 + self.mx + self.mw, 1 + self.mw))
+        one_beta_id[0, 0] = 1
+        one_beta_id[1 : (1 + self.mx), 0] = -beta[:, 0]
+        one_beta_id[(1 + self.mx) :, 1:] = np.diag(np.ones(self.mw))
+
+        gamma_liml = scipy.linalg.eigh(
+            b=one_beta_id.T @ self.yS_orth_at_yS @ one_beta_id,
+            a=one_beta_id.T @ self.yS_proj_at_yS @ one_beta_id,
+            subset_by_index=[0, 0],
+        )[1][:, 0]
+        gamma_liml = -gamma_liml[1:] / gamma_liml[0]
+
+        def _derivative(gamma):
+            result = self.derivative(beta, gamma)
+            return (result[0], result[1][self.mx :])
+
+        res1 = scipy.optimize.minimize(
+            _derivative,
+            jac=True,
+            x0=gamma_liml,
+        )
+
+        res2 = scipy.optimize.minimize(
+            _derivative,
+            jac=True,
+            x0=np.zeros_like(gamma_liml),
+        )
+
+        return min(res1.fun, res2.fun)
+
+    def lm_and_derivative(self, betas, gamma_0):
+        assert len(betas.shape) == 2
+        assert betas.shape[1] == self.mx
+
+        lms = np.zeros(betas.shape[0])
+        derivatives = np.zeros((betas.shape[0], self.mx))
+
+        class _derivatives:
+            """Helper class to recover last derivative called in the optimization."""
 
-    sigma_hat = residuals_orth.T @ residuals_orth
+            def __init__(self, beta):
+                self.beta = beta
+                self.jac = None
 
-    S = np.hstack((X, W))
-    S_proj = np.hstack((X_proj, W_proj))
-    Sigma = residuals_orth.T @ S / sigma_hat
-    St = S - np.outer(residuals, Sigma)
-    St_proj = S_proj - np.outer(residuals_proj, Sigma)
+            def __call__(self, gamma):
+                self.derivative = self.derivative(self.beta, gamma)
+                return (self.derivative[0], self.derivative[1][self.mx :])
 
-    solved = np.linalg.solve(St_proj.T @ St_proj, St_proj.T @ residuals_proj)
-    residuals_proj_St = St_proj @ solved
+        for idx in betas.shape[0]:
+            _derivative = _derivatives(betas[idx, :])
+            res = scipy.optimize.minimize(
+                _derivative,
+                jac=True,
+                x0=gamma_0,
+            )
 
-    lm = residuals_proj_St.T @ residuals_proj_St / sigma_hat
-    ar = residuals_proj.T @ residuals_proj / sigma_hat
-    kappa = ar - lm
+            lms[idx] = res.fun
+            derivatives[idx, :] = _derivative.derivative[: self.mx]
+            gamma_0 = res.x
 
-    first_term = -St_proj[:, : X.shape[1]].T @ residuals_proj
-    second_term = kappa * (St - St_proj)[:, : X.shape[1]].T @ St @ solved
+        return lms, derivatives
 
-    d_lm = 2 * (first_term + second_term) / sigma_hat
 
-    return (n * lm.item(), n * d_lm.flatten())
+# def _LM(X, X_proj, Y, Y_proj, W, W_proj, beta):
+#     """
+#     Compute the Lagrange multiplier test statistic and its derivative at ``beta``.
+
+#     Parameters
+#     ----------
+#     X: np.ndarray of dimension (n, mx)
+#         Regressors.
+#     X_proj: np.ndarray of dimension (n, mx)
+#         Projection of ``X`` onto the column space of ``Z``.
+#     Y: np.ndarray of dimension (n,)
+#         Outcomes.
+#     Y_proj: np.ndarray of dimension (n,)
+#         Projection of ``Y`` onto the column space of ``Z``.
+#     W: np.ndarray of dimension (n, mw)
+#         Regressors.
+#     W_proj: np.ndarray of dimension (n, mw)
+#         Projection of ``W`` onto the column space of ``Z``.
+#     beta: np.ndarray of dimension (mx,)
+#         Coefficients to test.
+
+#     Returns
+#     -------
+#     lm: float
+#         The Lagrange multiplier test statistic.
+#     d_lm: float
+#         The derivative of the Lagrange multiplier test statistic at ``beta``.
+#     """
+#     n = X.shape[0]
+
+#     residuals = Y - X @ beta
+#     residuals_proj = Y_proj - X_proj @ beta
+#     residuals_orth = residuals - residuals_proj
+
+#     sigma_hat = residuals_orth.T @ residuals_orth
+
+#     S = np.hstack((X, W))
+#     S_proj = np.hstack((X_proj, W_proj))
+#     Sigma = residuals_orth.T @ S / sigma_hat
+#     St = S - np.outer(residuals, Sigma)
+#     St_proj = S_proj - np.outer(residuals_proj, Sigma)
+
+#     solved = np.linalg.solve(St_proj.T @ St_proj, St_proj.T @ residuals_proj)
+#     residuals_proj_St = St_proj @ solved
+
+#     lm = residuals_proj_St.T @ residuals_proj_St / sigma_hat
+#     ar = residuals_proj.T @ residuals_proj / sigma_hat
+#     kappa = ar - lm
+
+#     first_term = -St_proj[:, : X.shape[1]].T @ residuals_proj
+#     second_term = kappa * (St - St_proj)[:, : X.shape[1]].T @ St @ solved
+
+#     d_lm = 2 * (first_term + second_term) / sigma_hat
+
+#     return (n * lm.item(), n * d_lm.flatten())
 
 
 def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True):
@@ -130,45 +282,21 @@ def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True):
     if C.shape[1] > 0:
         X, y, Z, W = oproj(C, X, y, Z, W)
 
-    residuals = y - X @ beta
-
-    X_proj, W_proj, residuals_proj = proj(Z, X, W, residuals)
-
     if W.shape[1] > 0:
-        gamma_hat = KClass(kappa="liml").fit(X=W, y=y - X @ beta, Z=Z).coef_
-        res = scipy.optimize.minimize(
-            lambda gamma: _LM(
-                X=W,
-                X_proj=W_proj,
-                Y=residuals,
-                Y_proj=residuals_proj,
-                W=X,
-                W_proj=X_proj,
-                beta=gamma,
-            ),
-            jac=True,
-            x0=gamma_hat,
-        )
-
-        res2 = scipy.optimize.minimize(
-            lambda gamma: _LM(
-                X=W,
-                X_proj=W_proj,
-                Y=residuals,
-                Y_proj=residuals_proj,
-                W=X,
-                W_proj=X_proj,
-                beta=gamma,
-            ),
-            jac=True,
-            x0=np.zeros_like(gamma_hat),
-        )
-
-        statistic = min(res.fun, res2.fun) / n * (n - k - C.shape[1])
+        statistic = _LM(
+            X=X,
+            y=y,
+            W=W,
+            Z=Z,
+            dof=n - k - C.shape[1],
+        ).lm(beta)
 
         p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx)
 
     else:
+        residuals = y - X @ beta
+        residuals_proj, X_proj = proj(Z, residuals, X)
+
         orth_residuals = residuals - residuals_proj
 
         sigma_hat = residuals.T @ orth_residuals