diff --git a/ivmodels/tests/lagrange_multiplier.py b/ivmodels/tests/lagrange_multiplier.py index 0fa0e6b..5d0ec0f 100644 --- a/ivmodels/tests/lagrange_multiplier.py +++ b/ivmodels/tests/lagrange_multiplier.py @@ -1,66 +1,218 @@ import numpy as np import scipy -from ivmodels.models.kclass import KClass from ivmodels.tests.utils import _check_test_inputs from ivmodels.utils import oproj, proj -def _LM(X, X_proj, Y, Y_proj, W, W_proj, beta): +class _LM: """ - Compute the Lagrange multiplier test statistic and its derivative at ``beta``. + Helper class to compute the Lagrange multiplier test statistic and its derivative. Parameters ---------- X: np.ndarray of dimension (n, mx) Regressors. - X_proj: np.ndarray of dimension (n, mx) - Projection of ``X`` onto the column space of ``Z``. - Y: np.ndarray of dimension (n,) + Y: np.ndarray of dimension (n,) or (n, 1) Outcomes. - Y_proj: np.ndarray of dimension (n,) - Projection of ``Y`` onto the column space of ``Z``. W: np.ndarray of dimension (n, mw) - Regressors. - W_proj: np.ndarray of dimension (n, mw) + Regressors to minimize over. + dof: int + Degrees of freedom for variance computation. Equal to n - k - mc. + Z: np.ndarray of dimension (n, k), optional, default=None + Instruments. Either ``Z`` or ``X_proj``, ``Y_proj``, and ``W_proj`` must be + provided. + X_proj: np.ndarray of dimension (n, mx), optional, default=None + Projection of ``X`` onto the column space of ``Z``. + Y_proj: np.ndarray of dimension (n,) or (n, 1), optional, default=None + Projection of ``Y`` onto the column space of ``Z``. + W_proj: np.ndarray of dimension (n, mw), optional, default=None Projection of ``W`` onto the column space of ``Z``. - beta: np.ndarray of dimension (mx,) - Coefficients to test. - - Returns - ------- - lm: float - The Lagrange multiplier test statistic. - d_lm: float - The derivative of the Lagrange multiplier test statistic at ``beta``. """ - n = X.shape[0] - residuals = Y - X @ beta - residuals_proj = Y_proj - X_proj @ beta - residuals_orth = residuals - residuals_proj + def __init__(self, X, y, W, dof, Z=None, X_proj=None, y_proj=None, W_proj=None): + + self.X = X + self.y = y.reshape(-1, 1) + self.W = W + if X_proj is None or y_proj is None or W_proj is None: + if Z is None: + raise ValueError("Z must be provided to compute the projection.") + else: + X_proj, y_proj, W_proj = proj(Z, self.X, self.y, self.W) + + self.X_proj = X_proj + self.X_orth = X - X_proj + self.y_proj = y_proj.reshape(-1, 1) + self.y_orth = self.y - self.y_proj + self.W_proj = W_proj + self.W_orth = W - W_proj + self.yS = np.hstack((self.y, X, W)) + self.yS_proj = np.hstack((self.y_proj, X_proj, W_proj)) + self.yS_orth = np.hstack((self.y_orth, self.X_orth, self.W_orth)) + + self.n, self.mx = X.shape + self.mw = W.shape[1] + self.dof = dof + + # precomputation for Sigma, sigma_hat, and Omega_hat for liml + self.yS_orth_at_yS = self.yS_orth.T @ self.yS_orth + # for liml + self.yS_proj_at_yS = self.yS_proj.T @ self.yS_proj + + def derivative(self, beta, gamma=None): + """Return LM and derivative of LM at beta, gamma w.r.t. (beta, gamma).""" + if gamma is not None: + one_beta_gamma = np.hstack(([1], -beta.flatten(), -gamma.flatten())) + else: + one_beta_gamma = np.hstack(([1], -beta.flatten())) + + residuals = self.yS @ one_beta_gamma + residuals_proj = self.yS_proj @ one_beta_gamma + # residuals_orth = residuals - residuals_proj + + Sigma = one_beta_gamma.T @ self.yS_orth_at_yS + sigma_hat = Sigma @ one_beta_gamma + Sigma = Sigma[1:] / sigma_hat + + St = self.yS[:, 1:] - np.outer(residuals, Sigma) + St_proj = self.yS_proj[:, 1:] - np.outer(residuals_proj, Sigma) + + solved = np.linalg.solve(St_proj.T @ St_proj, St_proj.T @ residuals_proj) + residuals_proj_St = St_proj @ solved + + lm = residuals_proj_St.T @ residuals_proj_St / sigma_hat + ar = residuals_proj.T @ residuals_proj / sigma_hat + kappa = ar - lm + + first_term = -St_proj.T @ residuals_proj + second_term = kappa * (St - St_proj).T @ St @ solved + + d_lm = 2 * (first_term + second_term) / sigma_hat + + return (self.dof * lm.item(), self.dof * d_lm.flatten()) + + def lm(self, beta): + beta = beta.reshape(-1, 1) + + one_beta_id = np.zeros((1 + self.mx + self.mw, 1 + self.mw)) + one_beta_id[0, 0] = 1 + one_beta_id[1 : (1 + self.mx), 0] = -beta[:, 0] + one_beta_id[(1 + self.mx) :, 1:] = np.diag(np.ones(self.mw)) + + gamma_liml = scipy.linalg.eigh( + b=one_beta_id.T @ self.yS_orth_at_yS @ one_beta_id, + a=one_beta_id.T @ self.yS_proj_at_yS @ one_beta_id, + subset_by_index=[0, 0], + )[1][:, 0] + gamma_liml = -gamma_liml[1:] / gamma_liml[0] + + def _derivative(gamma): + result = self.derivative(beta, gamma) + return (result[0], result[1][self.mx :]) + + res1 = scipy.optimize.minimize( + _derivative, + jac=True, + x0=gamma_liml, + ) + + res2 = scipy.optimize.minimize( + _derivative, + jac=True, + x0=np.zeros_like(gamma_liml), + ) + + return min(res1.fun, res2.fun) + + def lm_and_derivative(self, betas, gamma_0): + assert len(betas.shape) == 2 + assert betas.shape[1] == self.mx + + lms = np.zeros(betas.shape[0]) + derivatives = np.zeros((betas.shape[0], self.mx)) + + class _derivatives: + """Helper class to recover last derivative called in the optimization.""" - sigma_hat = residuals_orth.T @ residuals_orth + def __init__(self, beta): + self.beta = beta + self.jac = None - S = np.hstack((X, W)) - S_proj = np.hstack((X_proj, W_proj)) - Sigma = residuals_orth.T @ S / sigma_hat - St = S - np.outer(residuals, Sigma) - St_proj = S_proj - np.outer(residuals_proj, Sigma) + def __call__(self, gamma): + self.derivative = self.derivative(self.beta, gamma) + return (self.derivative[0], self.derivative[1][self.mx :]) - solved = np.linalg.solve(St_proj.T @ St_proj, St_proj.T @ residuals_proj) - residuals_proj_St = St_proj @ solved + for idx in betas.shape[0]: + _derivative = _derivatives(betas[idx, :]) + res = scipy.optimize.minimize( + _derivative, + jac=True, + x0=gamma_0, + ) - lm = residuals_proj_St.T @ residuals_proj_St / sigma_hat - ar = residuals_proj.T @ residuals_proj / sigma_hat - kappa = ar - lm + lms[idx] = res.fun + derivatives[idx, :] = _derivative.derivative[: self.mx] + gamma_0 = res.x - first_term = -St_proj[:, : X.shape[1]].T @ residuals_proj - second_term = kappa * (St - St_proj)[:, : X.shape[1]].T @ St @ solved + return lms, derivatives - d_lm = 2 * (first_term + second_term) / sigma_hat - return (n * lm.item(), n * d_lm.flatten()) +# def _LM(X, X_proj, Y, Y_proj, W, W_proj, beta): +# """ +# Compute the Lagrange multiplier test statistic and its derivative at ``beta``. + +# Parameters +# ---------- +# X: np.ndarray of dimension (n, mx) +# Regressors. +# X_proj: np.ndarray of dimension (n, mx) +# Projection of ``X`` onto the column space of ``Z``. +# Y: np.ndarray of dimension (n,) +# Outcomes. +# Y_proj: np.ndarray of dimension (n,) +# Projection of ``Y`` onto the column space of ``Z``. +# W: np.ndarray of dimension (n, mw) +# Regressors. +# W_proj: np.ndarray of dimension (n, mw) +# Projection of ``W`` onto the column space of ``Z``. +# beta: np.ndarray of dimension (mx,) +# Coefficients to test. + +# Returns +# ------- +# lm: float +# The Lagrange multiplier test statistic. +# d_lm: float +# The derivative of the Lagrange multiplier test statistic at ``beta``. +# """ +# n = X.shape[0] + +# residuals = Y - X @ beta +# residuals_proj = Y_proj - X_proj @ beta +# residuals_orth = residuals - residuals_proj + +# sigma_hat = residuals_orth.T @ residuals_orth + +# S = np.hstack((X, W)) +# S_proj = np.hstack((X_proj, W_proj)) +# Sigma = residuals_orth.T @ S / sigma_hat +# St = S - np.outer(residuals, Sigma) +# St_proj = S_proj - np.outer(residuals_proj, Sigma) + +# solved = np.linalg.solve(St_proj.T @ St_proj, St_proj.T @ residuals_proj) +# residuals_proj_St = St_proj @ solved + +# lm = residuals_proj_St.T @ residuals_proj_St / sigma_hat +# ar = residuals_proj.T @ residuals_proj / sigma_hat +# kappa = ar - lm + +# first_term = -St_proj[:, : X.shape[1]].T @ residuals_proj +# second_term = kappa * (St - St_proj)[:, : X.shape[1]].T @ St @ solved + +# d_lm = 2 * (first_term + second_term) / sigma_hat + +# return (n * lm.item(), n * d_lm.flatten()) def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True): @@ -130,45 +282,21 @@ def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True): if C.shape[1] > 0: X, y, Z, W = oproj(C, X, y, Z, W) - residuals = y - X @ beta - - X_proj, W_proj, residuals_proj = proj(Z, X, W, residuals) - if W.shape[1] > 0: - gamma_hat = KClass(kappa="liml").fit(X=W, y=y - X @ beta, Z=Z).coef_ - res = scipy.optimize.minimize( - lambda gamma: _LM( - X=W, - X_proj=W_proj, - Y=residuals, - Y_proj=residuals_proj, - W=X, - W_proj=X_proj, - beta=gamma, - ), - jac=True, - x0=gamma_hat, - ) - - res2 = scipy.optimize.minimize( - lambda gamma: _LM( - X=W, - X_proj=W_proj, - Y=residuals, - Y_proj=residuals_proj, - W=X, - W_proj=X_proj, - beta=gamma, - ), - jac=True, - x0=np.zeros_like(gamma_hat), - ) - - statistic = min(res.fun, res2.fun) / n * (n - k - C.shape[1]) + statistic = _LM( + X=X, + y=y, + W=W, + Z=Z, + dof=n - k - C.shape[1], + ).lm(beta) p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx) else: + residuals = y - X @ beta + residuals_proj, X_proj = proj(Z, residuals, X) + orth_residuals = residuals - residuals_proj sigma_hat = residuals.T @ orth_residuals diff --git a/tests/tests/test_lagrange_multiplier.py b/tests/tests/test_lagrange_multiplier.py index 6b41013..bc0c891 100644 --- a/tests/tests/test_lagrange_multiplier.py +++ b/tests/tests/test_lagrange_multiplier.py @@ -2,46 +2,88 @@ import pytest import scipy +from ivmodels.simulate import simulate_gaussian_iv from ivmodels.tests.lagrange_multiplier import _LM from ivmodels.utils import proj @pytest.mark.parametrize( - "n, mx, mc, k, u", - [(100, 1, 1, 2, 1), (100, 1, 2, 5, 2), (1000, 2, 5, 10, 2), (1000, 5, 2, 10, 2)], + "n, mx, mw", + [(100, 1, 1)], ) -def test_lm_gradient(n, mx, mc, k, u): +def test__LM_raises(n, mx, mw): rng = np.random.RandomState(0) + X, W, y = rng.randn(n, mx), rng.randn(n, mw), rng.randn(n) - delta_X = rng.normal(0, 1, (u, mx)) - delta_W = rng.normal(0, 1, (u, mc)) - delta_y = rng.normal(0, 1, (u, 1)) + with pytest.raises(ValueError, match="Z must"): + _LM(X=X, y=y, W=W, dof=7) - beta = rng.normal(0, 0.1, (mx, 1)) - gamma = 10 * rng.normal(0, 0.1, (mc, 1)) - Pi_X = rng.normal(0, 1, (k, mx)) - Pi_W = rng.normal(0, 1, (k, mc)) +@pytest.mark.parametrize( + "n, mx, mw, k", + [(100, 1, 1, 2), (100, 1, 2, 5), (1000, 2, 5, 10), (1000, 5, 2, 10)], +) +def test__LM__init__(n, mx, mw, k): + rng = np.random.RandomState(0) + X, W, y, Z = rng.randn(n, mx), rng.randn(n, mw), rng.randn(n), rng.randn(n, k) + + X_proj, W_proj, y_proj = proj(Z, X, W, y) - U = rng.normal(0, 1, (n, u)) + lm1 = _LM(X=X, y=y, W=W, dof=7, X_proj=X_proj, y_proj=y_proj, W_proj=W_proj) + lm2 = _LM(X=X, y=y, W=W, dof=7, Z=Z) + assert lm1.__dict__.keys() == lm2.__dict__.keys() + assert all( + [ + np.all(np.isclose(lm1.__dict__[k], lm2.__dict__[k])) + for k in lm1.__dict__.keys() + ] + ) - Z = rng.normal(0, 1, (n, k)) - X = Z @ Pi_X + U @ delta_X + rng.normal(0, 1, (n, mx)) - W = Z @ Pi_W + U @ delta_W + rng.normal(0, 1, (n, mc)) - y = X @ beta + W @ gamma + U @ delta_y + rng.normal(0, 1, (n, 1)) - X_proj = proj(Z, X) - W_proj = proj(Z, W) - y_proj = proj(Z, y) +@pytest.mark.parametrize( + "n, mx, mw, k", + [(100, 1, 1, 2), (100, 1, 2, 5), (1000, 2, 5, 10), (1000, 5, 2, 10)], +) +def test_lm_gradient(n, mx, mw, k): + Z, X, y, _, W = simulate_gaussian_iv( + n=n, mx=mx, k=k, mw=mw, include_intercept=False + ) + lm = _LM(X=X, y=y, W=W, dof=7, Z=Z) + rng = np.random.RandomState(0) for _ in range(5): - beta_test = rng.normal(0, 0.1, (mx, 1)) + beta_gamma_test = rng.normal(0, 1, (mx + mw)) grad_approx = scipy.optimize.approx_fprime( - beta_test.flatten(), - lambda b: _LM(X, X_proj, y, y_proj, W, W_proj, b.reshape(-1, 1))[0], + beta_gamma_test, + lambda b: lm.derivative(b)[0], 1e-6, ) - grad = _LM(X, X_proj, y, y_proj, W, W_proj, beta_test.reshape(-1, 1))[1] + grad = lm.derivative(beta_gamma_test)[1] + + assert np.allclose(grad, grad_approx, rtol=5e-5, atol=5e-5) + + +@pytest.mark.parametrize( + "n, mx, mw, k", + [(100, 2, 5, 10), (100, 5, 2, 10)], +) +def test_lm_gradient_beta_gamma(n, mx, mw, k): + Z, X, y, _, W = simulate_gaussian_iv( + n=n, mx=mx, k=k, mw=mw, include_intercept=False + ) + lm = _LM(X=X, y=y, W=W, dof=7, Z=Z) + + rng = np.random.RandomState(0) + beta = rng.normal(0, 1, mx) + gamma = rng.normal(0, 1, mw) + + assert np.allclose( + lm.derivative(np.concatenate([beta, gamma]))[0], + lm.derivative(beta, gamma)[0], + ) - assert np.allclose(grad.flatten(), grad_approx, rtol=1e-4, atol=1e-4) + assert np.allclose( + lm.derivative(np.concatenate([beta, gamma]))[1], + lm.derivative(beta, gamma)[1], + )