diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4e06d3c..031558d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,8 +1,21 @@ Changelog ========= -0.2.0 - 2024-06-7 ------------------ +0.3.0 - 2024-07-xx +------------------ + +**New features:** + +- New function :func:`~ivmodels.tests.inverse_lagrange_multiplier_test`. + +- New class :class:`~ivmodels.confidence_sets.ConfidenceSet`. + +**Other changes:** + +- The function :func:`~ivmodels.tests.lagrange_multiplier_test` is now slightly faster. + +0.2.0 - 2024-06-07 +------------------ **New features:** diff --git a/benchmarks/benchmarks/tests.py b/benchmarks/benchmarks/tests.py index a4f213a..c154cb5 100644 --- a/benchmarks/benchmarks/tests.py +++ b/benchmarks/benchmarks/tests.py @@ -4,6 +4,10 @@ from ivmodels.tests import ( anderson_rubin_test, conditional_likelihood_ratio_test, + inverse_anderson_rubin_test, + inverse_lagrange_multiplier_test, + inverse_likelihood_ratio_test, + inverse_wald_test, lagrange_multiplier_test, likelihood_ratio_test, rank_test, @@ -36,17 +40,15 @@ def setup(self, n, data): def time_anderson_rubin_test(self, n, data): _, _ = anderson_rubin_test(**self.data) + def time_inverse_anderson_rubin_test(self, n, data): + data = {k: v for k, v in self.data.items() if k != "beta"} + _ = inverse_anderson_rubin_test(**data) + @skip_for_params( [ (100, (1, 1, 0, 0)), - (100, (4, 2, 0, 0)), - (100, (4, 2, 0, 2)), (1000, (1, 1, 0, 0)), - (1000, (4, 2, 0, 0)), - (1000, (4, 2, 0, 2)), (10000, (1, 1, 0, 0)), - (10000, (4, 2, 0, 0)), - (10000, (4, 2, 0, 2)), ] ) def time_anderson_rubin_test_guggenberger19(self, n, data): @@ -55,9 +57,24 @@ def time_anderson_rubin_test_guggenberger19(self, n, data): def time_lagrange_multiplier_test(self, n, data): _, _ = lagrange_multiplier_test(**self.data) + @skip_for_params( + [ + (100, (4, 2, 2, 2)), + (1000, (4, 2, 2, 2)), + (10000, (4, 2, 0, 2)), + ] + ) + def time_inverse_lagrange_multiplier_test(self, n, data): + data = {k: v for k, v in self.data.items() if k != "beta"} + _ = inverse_lagrange_multiplier_test(**data) + def time_likelihood_ratio_test(self, n, data): _, _ = likelihood_ratio_test(**self.data) + def time_inverse_likelihood_ratio_test(self, n, data): + data = {k: v for k, v in self.data.items() if k != "beta"} + _ = inverse_likelihood_ratio_test(**data) + def time_conditional_likelihood_ratio_test_numerical_integration(self, n, data): _, _ = conditional_likelihood_ratio_test( **self.data, method="numerical_integration" @@ -69,8 +86,16 @@ def time_conditional_likelihood_ratio_test_power_series(self, n, data): def time_wald_test_tsls(self, n, data): _, _ = wald_test(**self.data, estimator="tsls") + def time_inverse_wald_test_tsls(self, n, data): + data = {k: v for k, v in self.data.items() if k != "beta"} + _ = inverse_wald_test(**data, estimator="tsls") + def time_wald_test_liml(self, n, data): _, _ = wald_test(**self.data, estimator="liml") + def time_inverse_wald_test_liml(self, n, data): + data = {k: v for k, v in self.data.items() if k != "beta"} + _ = inverse_wald_test(**data, estimator="liml") + def test_rank_test(self, n, data): _, _ = rank_test(Z=self.data["Z"], X=self.data["X"]) diff --git a/ivmodels/confidence_set.py b/ivmodels/confidence_set.py new file mode 100644 index 0000000..d21fb9b --- /dev/null +++ b/ivmodels/confidence_set.py @@ -0,0 +1,38 @@ +import numpy as np + + +class ConfidenceSet: + """A class to represent a confidence set.""" + + def __init__(self, left, right, convex, empty=False, message=None): + self.left = left + self.right = right + self.convex = convex + self.empty = empty + self.message = message + + def __call__(self, x): # noqa D + if self.empty: + return 1 + + between = self.left <= x <= self.right + if self.convex and between: + return -1 + elif not self.convex and not between: + return -1 + else: + return 1 + + def __str__(self): # noqa D + if self.empty: + return "[]" + elif self.convex: + return f"[{self.left}, {self.right}]" + else: + return f"(-infty, {self.left}] U [{self.right}, infty)" + + def _boundary(self, error=True): + if self.empty: + return np.zeros(shape=(0, 1)) + else: + return np.array([[self.left], [self.right]]) diff --git a/ivmodels/quadric.py b/ivmodels/quadric.py index f7c115a..10942a6 100644 --- a/ivmodels/quadric.py +++ b/ivmodels/quadric.py @@ -2,8 +2,10 @@ import numpy as np import scipy +from ivmodels.confidence_set import ConfidenceSet -class Quadric: + +class Quadric(ConfidenceSet): """ A class to represent a quadric :math:`x^T A x + b^T x + c \\leq 0`. @@ -57,6 +59,21 @@ def __init__(self, A, b, c): self.D = eigenvalues self.V = eigenvectors + if A.shape[0] == 1: + if (self.D[0] < 0) and (self.c_standardized <= 0): + left, right = -np.inf, np.inf + elif self.D[0] * self.c_standardized > 0: + left, right = np.nan, np.nan + else: + boundary = self._boundary() + left, right = boundary[0], boundary[1] + + volume = self.volume() + super().__init__(left, right, convex=np.isfinite(volume), empty=volume == 0) + else: + volume = self.volume() + super().__init__(None, None, np.isfinite(volume), empty=volume == 0) + def __call__(self, x): """Evaluate the quadric at :math:`x`. @@ -116,7 +133,10 @@ def _boundary(self, num=200, error=True): if len(self.D) == 1: if self.D[0] * self.c_standardized > 0: - raise ValueError("Quadric is empty.") + if error: + raise ValueError("Quadric is empty.") + else: + return np.zeros(shape=(0, 1)) else: return np.array( [ diff --git a/ivmodels/tests/__init__.py b/ivmodels/tests/__init__.py index 9c651ca..fc77d7c 100644 --- a/ivmodels/tests/__init__.py +++ b/ivmodels/tests/__init__.py @@ -1,6 +1,9 @@ from .anderson_rubin import anderson_rubin_test, inverse_anderson_rubin_test from .conditional_likelihood_ratio import conditional_likelihood_ratio_test -from .lagrange_multiplier import lagrange_multiplier_test +from .lagrange_multiplier import ( + inverse_lagrange_multiplier_test, + lagrange_multiplier_test, +) from .likelihood_ratio import inverse_likelihood_ratio_test, likelihood_ratio_test from .pulse import inverse_pulse_test, pulse_test from .rank import rank_test @@ -11,6 +14,7 @@ "inverse_anderson_rubin_test", "conditional_likelihood_ratio_test", "lagrange_multiplier_test", + "inverse_lagrange_multiplier_test", "likelihood_ratio_test", "inverse_likelihood_ratio_test", "pulse_test", diff --git a/ivmodels/tests/lagrange_multiplier.py b/ivmodels/tests/lagrange_multiplier.py index 0fa0e6b..8a4bda6 100644 --- a/ivmodels/tests/lagrange_multiplier.py +++ b/ivmodels/tests/lagrange_multiplier.py @@ -1,66 +1,234 @@ import numpy as np import scipy +import scipy.interpolate +import scipy.optimize +from scipy.optimize._optimize import MemoizeJac -from ivmodels.models.kclass import KClass +from ivmodels.confidence_set import ConfidenceSet from ivmodels.tests.utils import _check_test_inputs +from ivmodels.tests.wald import inverse_wald_test from ivmodels.utils import oproj, proj -def _LM(X, X_proj, Y, Y_proj, W, W_proj, beta): +# https://stackoverflow.com/a/68608349/10586763 +class MemoizeJacHess(MemoizeJac): + """Cache the return values of a function returning (fun, grad, hess).""" + + def __init__(self, fun): + super().__init__(fun) + self.hess = None + + def _compute_if_needed(self, x, *args): + if ( + not np.all(x == self.x) + or self._value is None + or self.jac is None + or self.hess is None + ): + self.x = np.asarray(x).copy() + self._value, self.jac, self.hess = self.fun(x, *args) + + def hessian(self, x, *args): # noqa D + self._compute_if_needed(x, *args) + return self.hess + + +class _LM: """ - Compute the Lagrange multiplier test statistic and its derivative at ``beta``. + Helper class to compute the Lagrange multiplier test statistic and its derivative. Parameters ---------- X: np.ndarray of dimension (n, mx) Regressors. - X_proj: np.ndarray of dimension (n, mx) - Projection of ``X`` onto the column space of ``Z``. - Y: np.ndarray of dimension (n,) + Y: np.ndarray of dimension (n,) or (n, 1) Outcomes. - Y_proj: np.ndarray of dimension (n,) - Projection of ``Y`` onto the column space of ``Z``. W: np.ndarray of dimension (n, mw) - Regressors. - W_proj: np.ndarray of dimension (n, mw) + Regressors to minimize over. + dof: int + Degrees of freedom for variance computation. Equal to n - k - mc. + Z: np.ndarray of dimension (n, k), optional, default=None + Instruments. Either ``Z`` or ``X_proj``, ``Y_proj``, and ``W_proj`` must be + provided. + X_proj: np.ndarray of dimension (n, mx), optional, default=None + Projection of ``X`` onto the column space of ``Z``. + Y_proj: np.ndarray of dimension (n,) or (n, 1), optional, default=None + Projection of ``Y`` onto the column space of ``Z``. + W_proj: np.ndarray of dimension (n, mw), optional, default=None Projection of ``W`` onto the column space of ``Z``. - beta: np.ndarray of dimension (mx,) - Coefficients to test. - - Returns - ------- - lm: float - The Lagrange multiplier test statistic. - d_lm: float - The derivative of the Lagrange multiplier test statistic at ``beta``. """ - n = X.shape[0] - residuals = Y - X @ beta - residuals_proj = Y_proj - X_proj @ beta - residuals_orth = residuals - residuals_proj + def __init__(self, X, y, W, dof, Z=None, X_proj=None, y_proj=None, W_proj=None): + + self.X = X + self.y = y.reshape(-1, 1) + self.W = W + if X_proj is None or y_proj is None or W_proj is None: + if Z is None: + raise ValueError("Z must be provided to compute the projection.") + else: + X_proj, y_proj, W_proj = proj(Z, self.X, self.y, self.W) + + self.X_proj = X_proj + self.X_orth = X - X_proj + self.y_proj = y_proj.reshape(-1, 1) + self.y_orth = self.y - self.y_proj + self.W_proj = W_proj + self.W_orth = W - W_proj + self.yS = np.hstack((self.y, X, W)) + self.yS_proj = np.hstack((self.y_proj, X_proj, W_proj)) + self.yS_orth = np.hstack((self.y_orth, self.X_orth, self.W_orth)) + + self.n, self.mx = X.shape + self.mw = W.shape[1] + self.dof = dof + + # precomputation for Sigma, sigma_hat, and Omega_hat for liml + self.yS_orth_at_yS = self.yS_orth.T @ self.yS_orth + # for liml + self.yS_proj_at_yS = self.yS_proj.T @ self.yS_proj + + def liml(self, beta=None): + """ + Efficiently compute the LIML. + + This is the LIML using outcomes y (or y - X beta if beta is not none), + covariates S (or W), and instruments Z. + """ + if beta is not None: + beta = beta.reshape(-1, 1) + + one_beta_id = np.zeros((1 + self.mx + self.mw, 1 + self.mw)) + one_beta_id[0, 0] = 1 + one_beta_id[1 : (1 + self.mx), 0] = -beta[:, 0] + one_beta_id[(1 + self.mx) :, 1:] = np.diag(np.ones(self.mw)) + + X_proj_X = one_beta_id.T @ self.yS_proj_at_yS @ one_beta_id + X_orth_X = one_beta_id.T @ self.yS_orth_at_yS @ one_beta_id + else: + X_proj_X = self.yS_proj_at_yS + X_orth_X = self.yS_orth_at_yS + + eigval = scipy.linalg.eigh( + a=X_proj_X, + b=X_orth_X, + subset_by_index=[0, 0], + )[1] + + return -eigval[1:, 0] / eigval[0, 0] + + def derivative(self, beta, gamma=None, jac_and_hess=True): + """Return LM and derivative of LM at beta, gamma w.r.t. (beta, gamma).""" + if gamma is not None: + one_beta_gamma = np.hstack(([1], -beta.flatten(), -gamma.flatten())) + else: + one_beta_gamma = np.hstack(([1], -beta.flatten())) + + residuals_proj = self.yS_proj @ one_beta_gamma + + Sigma = one_beta_gamma.T @ self.yS_orth_at_yS + sigma_hat = Sigma @ one_beta_gamma + Sigma = Sigma[1:] / sigma_hat + + St_proj = self.yS_proj[:, 1:] - np.outer(residuals_proj, Sigma) + + if not jac_and_hess: + residuals_proj_St = proj(St_proj, residuals_proj) + return self.dof * residuals_proj_St.T @ residuals_proj_St / sigma_hat + + residuals = self.yS @ one_beta_gamma + St = self.yS[:, 1:] - np.outer(residuals, Sigma) + St_orth = St - St_proj + + mat = St_proj.T @ St_proj + cond = np.linalg.cond(mat) + if cond > 1e12: + mat += 1e-6 * np.eye(mat.shape[0]) + + solved = np.linalg.solve( + mat, + np.hstack( + [ + St_proj.T @ residuals_proj.reshape(-1, 1), + St_orth.T @ St[:, self.mx :], + ] + ), + ) + residuals_proj_St = St_proj @ solved[:, 0] + + ar = residuals_proj.T @ residuals_proj / sigma_hat + lm = residuals_proj_St.T @ residuals_proj_St / sigma_hat + kappa = ar - lm + + first_term = -St_proj[:, self.mx :].T @ residuals_proj + second_term = St_orth[:, self.mx :].T @ St @ solved[:, 0] + S = self.yS[:, 1:] + S_proj = self.yS_proj[:, 1:] + S_orth = S - S_proj + + d_lm = 2 * (first_term + kappa * second_term) / sigma_hat + + dd_lm = ( + 2 + * ( + -3 * kappa * np.outer(second_term, second_term) / sigma_hat + + kappa**2 * St_orth[:, self.mx :].T @ St_orth @ solved[:, 1:] + - kappa * St_orth[:, self.mx :].T @ St_orth[:, self.mx :] + - kappa + * St_orth[:, self.mx :].T + @ St_orth + @ np.outer(solved[:, 0], Sigma[self.mx :]) + + St[:, self.mx :].T + @ (S_proj[:, self.mx :] - ar * S_orth[:, self.mx :]) + - np.outer( + Sigma[self.mx :], + (St_proj - kappa * St_orth)[:, self.mx :].T @ St @ solved[:, 0], + ) + + 2 + * kappa + * np.outer(S_orth[:, self.mx :].T @ St @ solved[:, 0], Sigma[self.mx :]) + - 2 * np.outer(St_proj[:, self.mx :].T @ residuals, Sigma[self.mx :]) + ) + / sigma_hat + ) + + return (self.dof * lm.item(), self.dof * d_lm.flatten(), self.dof * dd_lm) - sigma_hat = residuals_orth.T @ residuals_orth + def lm(self, beta): + """ + Compute the Lagrange multiplier test statistic at ``beta``. - S = np.hstack((X, W)) - S_proj = np.hstack((X_proj, W_proj)) - Sigma = residuals_orth.T @ S / sigma_hat - St = S - np.outer(residuals, Sigma) - St_proj = S_proj - np.outer(residuals_proj, Sigma) + Computed by minimization over ``gamma``. + """ + if isinstance(beta, float): + beta = np.array([[beta]]) - solved = np.linalg.solve(St_proj.T @ St_proj, St_proj.T @ residuals_proj) - residuals_proj_St = St_proj @ solved + if self.mw == 0: + return self.derivative(beta, jac_and_hess=False) - lm = residuals_proj_St.T @ residuals_proj_St / sigma_hat - ar = residuals_proj.T @ residuals_proj / sigma_hat - kappa = ar - lm + gamma_0 = self.liml(beta=beta) - first_term = -St_proj[:, : X.shape[1]].T @ residuals_proj - second_term = kappa * (St - St_proj)[:, : X.shape[1]].T @ St @ solved + def _derivative(gamma): + result = self.derivative(beta, gamma, jac_and_hess=True) + return (result[0], result[1], result[2]) - d_lm = 2 * (first_term + second_term) / sigma_hat + objective = MemoizeJacHess(_derivative) + jac = objective.derivative + hess = objective.hessian - return (n * lm.item(), n * d_lm.flatten()) + res1 = scipy.optimize.minimize( + objective, jac=jac, hess=hess, x0=gamma_0, method="newton-cg" + ) + + res2 = scipy.optimize.minimize( + objective, + jac=jac, + hess=hess, + method="newton-cg", + x0=np.zeros_like(gamma_0), + ) + + return np.min([res1.fun, res2.fun]) def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True): @@ -130,45 +298,15 @@ def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True): if C.shape[1] > 0: X, y, Z, W = oproj(C, X, y, Z, W) - residuals = y - X @ beta - - X_proj, W_proj, residuals_proj = proj(Z, X, W, residuals) - if W.shape[1] > 0: - gamma_hat = KClass(kappa="liml").fit(X=W, y=y - X @ beta, Z=Z).coef_ - res = scipy.optimize.minimize( - lambda gamma: _LM( - X=W, - X_proj=W_proj, - Y=residuals, - Y_proj=residuals_proj, - W=X, - W_proj=X_proj, - beta=gamma, - ), - jac=True, - x0=gamma_hat, - ) - - res2 = scipy.optimize.minimize( - lambda gamma: _LM( - X=W, - X_proj=W_proj, - Y=residuals, - Y_proj=residuals_proj, - W=X, - W_proj=X_proj, - beta=gamma, - ), - jac=True, - x0=np.zeros_like(gamma_hat), - ) - - statistic = min(res.fun, res2.fun) / n * (n - k - C.shape[1]) + statistic = _LM(X=X, y=y, W=W, Z=Z, dof=n - k - C.shape[1]).lm(beta) p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx) else: + residuals = y - X @ beta + residuals_proj, X_proj = proj(Z, residuals, X) + orth_residuals = residuals - residuals_proj sigma_hat = residuals.T @ orth_residuals @@ -186,3 +324,108 @@ def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True): p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx) return statistic, p_value + + +def inverse_lagrange_multiplier_test( + Z, X, y, alpha=0.05, W=None, C=None, fit_intercept=True +): + """ + Return an approximation of the confidence set by inversion of the LM test. + + This is only implemented if ``X.shape[1] == 1``. The confidence set is essentially + computed by a grid search plus a root finding algorithm to improve the precision. + Due to the numerical nature, this is in no way guaranteed to return the true + confidence set, or contain it. + """ + if not 0 < alpha < 1: + raise ValueError("alpha must be in (0, 1).") + + Z, X, y, W, C, _ = _check_test_inputs(Z, X, y, W=W, C=C) + + n, mx = X.shape + if not mx == 1: + raise ValueError("mx must be 1.") + + k = Z.shape[1] + + if fit_intercept: + C_ = np.hstack([np.ones((n, 1)), C]) + else: + C_ = C + + if C_.shape[1] > 0: + X_, y_, Z_, W_ = oproj(C_, X, y, Z, W) + else: + X_, y_, Z_, W_ = X, y, Z, W + + dof = n - k - C_.shape[1] + lm = _LM(X=X_, W=W_, y=y_, Z=Z_, dof=dof) + critical_value = scipy.stats.chi2(df=mx).ppf(1 - alpha) + + # Use a test with closed-form solution to get an idea of the "scale". + approx = inverse_wald_test( + Z=Z, X=X, y=y, alpha=alpha, W=W, C=C, fit_intercept=fit_intercept + ) + + left, right = approx.left, approx.right + step = right - left + + while lm.lm(right) < critical_value: + right += step + step *= 2 + + if right > 1e6: + return ConfidenceSet( + left=-np.inf, right=np.inf, convex=True, message="no bounds found" + ) + + right += 4 * step + + while lm.lm(left) < critical_value: + left -= step + step *= 2 + + if left < -1e6: + return ConfidenceSet( + left=-np.inf, right=np.inf, convex=True, message="no bounds found" + ) + + left -= 4 * step + + n_points = 200 + x_ = np.linspace(left, right, n_points) + y__ = np.zeros(n_points) + + for idx in range(n_points): + y__[idx] = lm.lm(x_[idx]) + + where = np.where(y__ < critical_value)[0] + left_bracket = x_[where[0] - 1], x_[where[where > where[0] + 1][0]] + right_bracket = x_[where[-1] + 1], x_[where[where < where[-1] - 1][-1]] + + def f(x): + arr = np.ones(1) + arr[0] = x + return lm.lm(arr) - critical_value + + new_left = scipy.optimize.root_scalar( + f, bracket=left_bracket, maxiter=1000, xtol=1e-5 + ).root + + new_right = scipy.optimize.root_scalar( + f, + bracket=right_bracket, + maxiter=1000, + xtol=1e-5, + ).root + + if not max(np.abs([f(new_left), f(new_right)])) < 1e-3: + return ConfidenceSet( + left=new_left, + right=new_right, + convex=True, + empty=False, + message="No roots found.", + ) + else: + return ConfidenceSet(left=new_left, right=new_right, convex=True, empty=False) diff --git a/tests/tests/test_lagrange_multiplier.py b/tests/tests/test_lagrange_multiplier.py index 6b41013..adc54ac 100644 --- a/tests/tests/test_lagrange_multiplier.py +++ b/tests/tests/test_lagrange_multiplier.py @@ -2,46 +2,98 @@ import pytest import scipy +from ivmodels.simulate import simulate_gaussian_iv from ivmodels.tests.lagrange_multiplier import _LM from ivmodels.utils import proj @pytest.mark.parametrize( - "n, mx, mc, k, u", - [(100, 1, 1, 2, 1), (100, 1, 2, 5, 2), (1000, 2, 5, 10, 2), (1000, 5, 2, 10, 2)], + "n, mx, mw", + [(100, 1, 1)], ) -def test_lm_gradient(n, mx, mc, k, u): +def test__LM_raises(n, mx, mw): rng = np.random.RandomState(0) + X, W, y = rng.randn(n, mx), rng.randn(n, mw), rng.randn(n) - delta_X = rng.normal(0, 1, (u, mx)) - delta_W = rng.normal(0, 1, (u, mc)) - delta_y = rng.normal(0, 1, (u, 1)) + with pytest.raises(ValueError, match="Z must"): + _LM(X=X, y=y, W=W, dof=7) - beta = rng.normal(0, 0.1, (mx, 1)) - gamma = 10 * rng.normal(0, 0.1, (mc, 1)) - Pi_X = rng.normal(0, 1, (k, mx)) - Pi_W = rng.normal(0, 1, (k, mc)) +@pytest.mark.parametrize( + "n, mx, mw, k", + [(100, 1, 1, 2), (100, 1, 2, 5), (1000, 2, 5, 10), (1000, 5, 2, 10)], +) +def test__LM__init__(n, mx, mw, k): + rng = np.random.RandomState(0) + X, W, y, Z = rng.randn(n, mx), rng.randn(n, mw), rng.randn(n), rng.randn(n, k) - U = rng.normal(0, 1, (n, u)) + X_proj, W_proj, y_proj = proj(Z, X, W, y) - Z = rng.normal(0, 1, (n, k)) - X = Z @ Pi_X + U @ delta_X + rng.normal(0, 1, (n, mx)) - W = Z @ Pi_W + U @ delta_W + rng.normal(0, 1, (n, mc)) - y = X @ beta + W @ gamma + U @ delta_y + rng.normal(0, 1, (n, 1)) + lm1 = _LM(X=X, y=y, W=W, dof=7, X_proj=X_proj, y_proj=y_proj, W_proj=W_proj) + lm2 = _LM(X=X, y=y, W=W, dof=7, Z=Z) + assert lm1.__dict__.keys() == lm2.__dict__.keys() + assert all( + [ + np.all(np.isclose(lm1.__dict__[k], lm2.__dict__[k])) + for k in lm1.__dict__.keys() + ] + ) - X_proj = proj(Z, X) - W_proj = proj(Z, W) - y_proj = proj(Z, y) +@pytest.mark.parametrize( + "n, mx, mw, k", + [(100, 1, 1, 2), (100, 1, 2, 5), (1000, 2, 5, 10), (1000, 5, 2, 10)], +) +def test_lm_gradient(n, mx, mw, k): + Z, X, y, _, W = simulate_gaussian_iv( + n=n, mx=mx, k=k, mw=mw, include_intercept=False + ) + lm = _LM(X=X, y=y, W=W, dof=7, Z=Z) + + rng = np.random.RandomState(0) for _ in range(5): - beta_test = rng.normal(0, 0.1, (mx, 1)) + beta = rng.normal(0, 1, mx) + gamma = rng.normal(0, 1, mw) grad_approx = scipy.optimize.approx_fprime( - beta_test.flatten(), - lambda b: _LM(X, X_proj, y, y_proj, W, W_proj, b.reshape(-1, 1))[0], + gamma, + lambda g: lm.derivative(beta=beta, gamma=g)[0], 1e-6, ) - grad = _LM(X, X_proj, y, y_proj, W, W_proj, beta_test.reshape(-1, 1))[1] + grad = lm.derivative(beta, gamma)[1] + + assert np.allclose(grad, grad_approx, rtol=5e-5, atol=5e-5) + + hess_approx = scipy.optimize.approx_fprime( + gamma, + lambda g: lm.derivative(beta=beta, gamma=g)[1], + 1e-6, + ) + hess = lm.derivative(beta, gamma)[2] + + assert np.allclose(hess, hess_approx, rtol=5e-5, atol=5e-5) + + +@pytest.mark.parametrize( + "n, mx, mw, k", + [(100, 2, 5, 10), (100, 5, 2, 10)], +) +def test_lm_gradient_beta_gamma(n, mx, mw, k): + Z, X, y, _, W = simulate_gaussian_iv( + n=n, mx=mx, k=k, mw=mw, include_intercept=False + ) + lm = _LM(X=X, y=y, W=W, dof=7, Z=Z) + + rng = np.random.RandomState(0) + beta = rng.normal(0, 1, mx) + gamma = rng.normal(0, 1, mw) + + assert np.allclose( + lm.derivative(np.concatenate([beta, gamma]))[0], + lm.derivative(beta, gamma)[0], + ) - assert np.allclose(grad.flatten(), grad_approx, rtol=1e-4, atol=1e-4) + assert np.allclose( + lm.derivative(np.concatenate([beta, gamma]))[1], + lm.derivative(beta, gamma)[1], + ) diff --git a/tests/tests/test_tests.py b/tests/tests/test_tests.py index 4152181..a3cd910 100644 --- a/tests/tests/test_tests.py +++ b/tests/tests/test_tests.py @@ -4,11 +4,13 @@ import pytest from ivmodels.models.kclass import KClass +from ivmodels.quadric import Quadric from ivmodels.simulate import simulate_gaussian_iv, simulate_guggenberger12 from ivmodels.tests import ( anderson_rubin_test, conditional_likelihood_ratio_test, inverse_anderson_rubin_test, + inverse_lagrange_multiplier_test, inverse_likelihood_ratio_test, inverse_pulse_test, inverse_wald_test, @@ -31,7 +33,7 @@ TEST_PAIRS = [ (conditional_likelihood_ratio_test, None), (pulse_test, inverse_pulse_test), - (lagrange_multiplier_test, None), + (lagrange_multiplier_test, inverse_lagrange_multiplier_test), (anderson_rubin_test, inverse_anderson_rubin_test), (f_anderson_rubin_test, inverse_f_anderson_rubin_test), (wald_test, inverse_wald_test), @@ -253,29 +255,41 @@ def test_test_size_weak_ivs(test, n, mx, k, u, mc): (anderson_rubin_test, inverse_anderson_rubin_test), (f_anderson_rubin_test, inverse_f_anderson_rubin_test), (wald_test, inverse_wald_test), + (lagrange_multiplier_test, inverse_lagrange_multiplier_test), (liml_wald_test, liml_inverse_wald_test), (likelihood_ratio_test, inverse_likelihood_ratio_test), ], ) -@pytest.mark.parametrize("n, mx, k, u, mc", [(100, 2, 2, 1, 3), (100, 2, 5, 2, 0)]) +@pytest.mark.parametrize( + "data", [(100, 1, 2, 1, 3), (100, 2, 5, 2, 0), "guggenberger12"] +) @pytest.mark.parametrize("p_value", [0.1, 0.01]) @pytest.mark.parametrize("fit_intercept", [True, False]) -def test_test_round_trip(test, inverse_test, n, mx, k, u, mc, p_value, fit_intercept): +def test_test_round_trip(test, inverse_test, data, p_value, fit_intercept): """A test's p-value at the confidence set's boundary equals the nominal level.""" - Z, X, y, C, _ = simulate_gaussian_iv( - n=n, mx=mx, k=k, u=u, mc=mc, seed=0, include_intercept=fit_intercept - ) + if data == "guggenberger12": + Z, X, y, C, _ = simulate_guggenberger12(n=100, k=5, seed=0) + else: + n, mx, k, u, mc = data + + if test == lagrange_multiplier_test and mx > 1: + pytest.skip("LM test inverse not implemented for mx > 1") + + Z, X, y, C, _ = simulate_gaussian_iv( + n=n, mx=mx, k=k, u=u, mc=mc, seed=0, include_intercept=fit_intercept + ) quadric = inverse_test(Z, X, y, C=C, alpha=p_value, fit_intercept=fit_intercept) - boundary = quadric._boundary() + boundary = quadric._boundary(error=False) - assert np.allclose(quadric(boundary), 0, atol=1e-7) + if isinstance(quadric, Quadric): + assert np.allclose(quadric(boundary), 0, atol=1e-7) p_values = np.zeros(boundary.shape[0]) for idx, row in enumerate(boundary): p_values[idx] = test(Z, X, y, C=C, beta=row, fit_intercept=fit_intercept)[1] - assert np.allclose(p_values, p_value, atol=1e-8) + assert np.allclose(p_values, p_value, atol=1e-4) @pytest.mark.parametrize( @@ -284,38 +298,68 @@ def test_test_round_trip(test, inverse_test, n, mx, k, u, mc, p_value, fit_inter (wald_test, inverse_wald_test), (liml_wald_test, liml_inverse_wald_test), (anderson_rubin_test, inverse_anderson_rubin_test), + (lagrange_multiplier_test, inverse_lagrange_multiplier_test), (f_anderson_rubin_test, inverse_f_anderson_rubin_test), (likelihood_ratio_test, inverse_likelihood_ratio_test), ], ) @pytest.mark.parametrize( - "n, mx, k, mw, u, mc", - [(100, 2, 3, 1, 2, 3), (100, 2, 5, 2, 3, 0), (100, 1, 3, 1, None, 0)], + "data", + [ + (100, 1, 3, 1, 2, 3), + (100, 2, 5, 2, 3, 0), + (100, 1, 10, 5, None, 0), + "guggenberger12", + ], ) @pytest.mark.parametrize("p_value", [0.1]) @pytest.mark.parametrize("fit_intercept", [True, False]) -def test_subvector_round_trip( - test, inverse_test, n, mx, k, u, mw, mc, p_value, fit_intercept -): +def test_subvector_round_trip(test, inverse_test, data, p_value, fit_intercept): """ A test's p-value at the confidence set's boundary equals the nominal level. This time for subvector tests. """ - Z, X, y, C, W = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mw=mw, mc=mc, seed=0) + if data == "guggenberger12": + Z, X, y, C, W = simulate_guggenberger12(n=100, k=5, seed=0) + else: + n, mx, k, mw, u, mc = data + + if test == lagrange_multiplier_test and mx > 1: + pytest.skip("LM test inverse not implemented for mx > 1") + + Z, X, y, C, W = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mw=mw, mc=mc, seed=0) + + kwargs = {"Z": Z, "X": X, "y": y, "W": W, "C": C, "fit_intercept": fit_intercept} quadric = inverse_test(Z, X, y, p_value, W=W, C=C, fit_intercept=fit_intercept) - boundary = quadric._boundary() - assert np.allclose(quadric(boundary), 0, atol=1e-7) + boundary = quadric._boundary(error=False) - p_values = np.zeros(boundary.shape[0]) - for idx, row in enumerate(boundary): - p_values[idx] = test(Z, X, y, beta=row, W=W, C=C, fit_intercept=fit_intercept)[ - 1 - ] + if quadric.message is not None: + if quadric.empty or not all(np.isfinite([quadric.left, quadric.right])): + return + + eps = 1e-6 * (quadric.right - quadric.left) + tol = 1e-3 + + left_m = test(**kwargs, beta=np.array([quadric.left - eps])) + left_p = test(Z, X, y, beta=np.array([quadric.left + eps])) + assert left_m[1] + tol >= p_value >= left_p[1] - tol + + right_p = test(**kwargs, beta=np.array([quadric.right + eps])) + right_m = test(Z, X, y, beta=np.array([quadric.right - eps])) + assert right_p[1] + tol >= p_value >= right_m[1] - tol + + else: + if isinstance(quadric, Quadric): + assert np.allclose(quadric(boundary), 0, atol=1e-7) + + p_values = np.zeros(boundary.shape[0]) + for idx, row in enumerate(boundary): + p_values[idx] = test(beta=row, **kwargs)[1] - assert np.allclose(p_values, p_value, atol=1e-8) + assert np.allclose(p_values, p_value, atol=1e-4) @pytest.mark.parametrize(