diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f1e7308..cca06be 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,5 +6,9 @@ Unreleased **New features:** -- New method func:`ivmodels.simulate.simulate_guggenberger12` to draw from the data - generating process of Guggenberger (2012). \ No newline at end of file +- New method :func:`ivmodels.simulate.simulate_guggenberger12` to draw from the data + generating process of Guggenberger (2012). + +- The utility functions :func:`ivmodels.utils.proj` and :func:`ivmodels.utils.oproj` + now accept multiple args to be projected. Usage of this results in performance + improvements. \ No newline at end of file diff --git a/ivmodels/models/kclass.py b/ivmodels/models/kclass.py index a446103..89e1a22 100644 --- a/ivmodels/models/kclass.py +++ b/ivmodels/models/kclass.py @@ -5,7 +5,7 @@ import scipy from glum import GeneralizedLinearRegressor -from ivmodels.utils import proj, to_numpy +from ivmodels.utils import oproj, proj, to_numpy try: import pandas as pd @@ -246,7 +246,9 @@ def _fuller_alpha(self, kappa): def _spectrum(X, y, Z=None, X_proj=None, y_proj=None, subset_by_index=None): if (y_proj is None or X_proj is None) and Z is None: raise ValueError("Either Z or both X_proj and y_proj must be specified.") - if X_proj is None: + if X_proj is None and y_proj is None: + X_proj, y_proj = proj(Z, X, y) + elif X_proj is None: X_proj = proj(Z, X) if y_proj is None: y_proj = proj(Z, y) @@ -379,14 +381,13 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs): if C.shape[1] > 0: Z = np.hstack([Z, C]) # save some compute here, as proj([Z, C], C) = C - X_proj = np.hstack([proj(Z, X), C]) + X_proj, y_proj = proj(Z, X, y) + X_proj = np.hstack([X_proj, C]) X = np.hstack([X, C]) x_mean = np.hstack([x_mean, c_mean]) else: - X_proj = proj(Z, X) - - y_proj = proj(Z, y) + X_proj, y_proj = proj(Z, X, y) if isinstance(self.kappa, str): if self.kappa.lower() in {"tsls", "2sls"}: @@ -398,9 +399,10 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs): # If C!=None, we compute the ar_min as if we removed C from all design # matrices. I.e., we replace Z <- M_C Z, X <- M_C X, y <- M_C y. # We also exclude the columns in X coming from C. + X_orth_C, y_orth_C = oproj(C, X[:, :mx], y) self.ar_min_ = self.ar_min( - X[:, :mx] - proj(C, X[:, :mx]), - y - proj(C, y), + X_orth_C, + y_orth_C, X_proj=X_proj[:, :mx], y_proj=y_proj, ) diff --git a/ivmodels/tests/anderson_rubin.py b/ivmodels/tests/anderson_rubin.py index 4c0c8f2..0056d74 100644 --- a/ivmodels/tests/anderson_rubin.py +++ b/ivmodels/tests/anderson_rubin.py @@ -175,10 +175,7 @@ def anderson_rubin_test( C = np.hstack([np.ones((n, 1)), C]) if C.shape[1] > 0: - X = oproj(C, X) - y = oproj(C, y) - Z = oproj(C, Z) - W = oproj(C, W) + X, y, Z, W = oproj(C, X, y, Z, W) if W.shape[1] == 0: residuals = y - X @ beta @@ -287,10 +284,7 @@ def inverse_anderson_rubin_test( C = np.hstack([np.ones((n, 1)), C]) if C.shape[1] > 0: - X = oproj(C, X) - y = oproj(C, y) - Z = oproj(C, Z) - W = oproj(C, W) + X, y, Z, W = oproj(C, X, y, Z, W) X = np.concatenate([X, W], axis=1) dfn = k - W.shape[1] @@ -308,9 +302,8 @@ def inverse_anderson_rubin_test( "critical_values must be one of 'chi2', 'f'. Got " f"{critical_values}." ) - X_proj = proj(Z, X) + X_proj, y_proj = proj(Z, X, y) X_orth = X - X_proj - y_proj = proj(Z, y) y_orth = y - y_proj A = X.T @ (X_proj - quantile * X_orth) diff --git a/ivmodels/tests/conditional_likelihood_ratio.py b/ivmodels/tests/conditional_likelihood_ratio.py index c004211..c0f7e33 100644 --- a/ivmodels/tests/conditional_likelihood_ratio.py +++ b/ivmodels/tests/conditional_likelihood_ratio.py @@ -308,14 +308,10 @@ def conditional_likelihood_ratio_test( C = np.hstack([np.ones((n, 1)), C]) if C.shape[1] > 0: - X = oproj(C, X) - y = oproj(C, y) - Z = oproj(C, Z) - W = oproj(C, W) + X, y, Z, W = oproj(C, X, y, Z, W) # Z = scipy.linalg.qr(Z, mode="economic")[0] - X_proj = proj(Z, X) # , orthogonal=True) - y_proj = proj(Z, y) # , orthogonal=True) + X_proj, y_proj, W_proj = proj(Z, X, y, W) # , orthogonal=True) if mw == 0: residuals = y - X @ beta @@ -339,7 +335,6 @@ def conditional_likelihood_ratio_test( statistic = (n - k - C.shape[1]) * (ar - ar_min) elif mw > 0: - W_proj = proj(Z, W) XWy = np.concatenate([X, W, y.reshape(-1, 1)], axis=1) XWy_proj = np.concatenate([X_proj, W_proj, y_proj.reshape(-1, 1)], axis=1) @@ -366,7 +361,7 @@ def conditional_likelihood_ratio_test( XW_proj = np.concatenate([X_proj, W_proj], axis=1) residuals = y - X @ beta - kclass.predict(X=W) - residuals_proj = proj(Z, residuals) + residuals_proj = proj(Z, y_proj - X_proj @ beta - kclass.predict(X=W_proj)) residuals_orth = residuals - residuals_proj Sigma = (residuals_orth.T @ XW) / (residuals_orth.T @ residuals_orth) diff --git a/ivmodels/tests/lagrange_multiplier.py b/ivmodels/tests/lagrange_multiplier.py index ea5e7bf..0fa0e6b 100644 --- a/ivmodels/tests/lagrange_multiplier.py +++ b/ivmodels/tests/lagrange_multiplier.py @@ -128,16 +128,11 @@ def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True): C = np.hstack([np.ones((n, 1)), C]) if C.shape[1] > 0: - X = oproj(C, X) - y = oproj(C, y) - Z = oproj(C, Z) - W = oproj(C, W) - - X_proj = proj(Z, X) - W_proj = proj(Z, W) + X, y, Z, W = oproj(C, X, y, Z, W) residuals = y - X @ beta - residuals_proj = proj(Z, residuals) + + X_proj, W_proj, residuals_proj = proj(Z, X, W, residuals) if W.shape[1] > 0: gamma_hat = KClass(kappa="liml").fit(X=W, y=y - X @ beta, Z=Z).coef_ @@ -174,21 +169,17 @@ def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True): p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx) else: - residuals = (y - X @ beta).reshape(-1, 1) - proj_residuals = proj(Z, residuals) - orth_residuals = residuals - proj_residuals + orth_residuals = residuals - residuals_proj + + sigma_hat = residuals.T @ orth_residuals + Sigma = orth_residuals.T @ X / sigma_hat # X - (y - X beta) * (y - X beta)^T M_Z X / (y - X beta)^T M_Z (y - X beta) - X_tilde = X - residuals @ (orth_residuals.T @ X) / ( - residuals.T @ orth_residuals - ) - proj_X_tilde = proj(Z, X_tilde) - X_tilde_proj_residuals = proj(proj_X_tilde, residuals) + X_tilde_proj = X_proj - np.outer(residuals_proj, Sigma) + + X_tilde_proj_residuals = proj(X_tilde_proj, residuals) # (y - X beta) P_{P_Z X_tilde} (y - X beta) / (y - X_beta) M_Z (y - X beta) - statistic = ( - np.square(X_tilde_proj_residuals).sum() - / (residuals.T @ orth_residuals).item() - ) + statistic = np.square(X_tilde_proj_residuals).sum() / sigma_hat statistic *= n - k - C.shape[1] diff --git a/ivmodels/tests/likelihood_ratio.py b/ivmodels/tests/likelihood_ratio.py index 686bade..97fab28 100644 --- a/ivmodels/tests/likelihood_ratio.py +++ b/ivmodels/tests/likelihood_ratio.py @@ -79,14 +79,9 @@ def likelihood_ratio_test(Z, X, y, beta, W=None, C=None, fit_intercept=True): C = np.hstack([np.ones((n, 1)), C]) if C.shape[1] > 0: - X = oproj(C, X) - y = oproj(C, y) - Z = oproj(C, Z) - W = oproj(C, W) + X, y, Z, W = oproj(C, X, y, Z, W) - X_proj = proj(Z, X) - y_proj = proj(Z, y) - W_proj = proj(Z, W) + X_proj, y_proj, W_proj = proj(Z, X, y, W) XWy = np.concatenate([X, W, y.reshape(-1, 1)], axis=1) XWy_proj = np.concatenate([X_proj, W_proj, y_proj.reshape(-1, 1)], axis=1) @@ -159,16 +154,12 @@ def inverse_likelihood_ratio_test( C = np.hstack([np.ones((n, 1)), C]) if C.shape[1] > 0: - X = oproj(C, X) - y = oproj(C, y) - Z = oproj(C, Z) - W = oproj(C, W) + X, y, Z, W = oproj(C, X, y, Z, W) X = np.concatenate([X, W], axis=1) - X_proj = proj(Z, X) + X_proj, y_proj = proj(Z, X, y) X_orth = X - X_proj - y_proj = proj(Z, y) y_orth = y - y_proj Xy_proj = np.concatenate([X_proj, y_proj.reshape(-1, 1)], axis=1) diff --git a/ivmodels/tests/pulse.py b/ivmodels/tests/pulse.py index 17e15b8..56ffaac 100644 --- a/ivmodels/tests/pulse.py +++ b/ivmodels/tests/pulse.py @@ -66,9 +66,7 @@ def pulse_test(Z, X, y, beta, C=None, W=None, fit_intercept=True): C = np.hstack([np.ones((n, 1)), C]) if C.shape[1] > 0: - X = oproj(C, X) - y = oproj(C, y) - Z = oproj(C, Z) + X, y, Z = oproj(C, X, y, Z) residuals = y - X @ beta proj_residuals = proj(Z, residuals) @@ -91,12 +89,9 @@ def inverse_pulse_test(Z, X, y, C=None, alpha=0.05, fit_intercept=True): C = np.hstack([np.ones((n, 1)), C]) if C.shape[1] > 0: - X = oproj(C, X) - y = oproj(C, y) - Z = oproj(C, Z) + X, y, Z = oproj(C, X, y, Z) - X_proj = proj(Z, X) - y_proj = proj(Z, y) + X_proj, y_proj = proj(Z, X, y) A = X.T @ (X_proj - 1 / (n - k - C.shape[1]) * quantile * X) b = -2 * (X_proj - 1 / (n - k - C.shape[1]) * quantile * X).T @ y diff --git a/ivmodels/tests/rank.py b/ivmodels/tests/rank.py index 415b947..20dde6a 100644 --- a/ivmodels/tests/rank.py +++ b/ivmodels/tests/rank.py @@ -56,8 +56,7 @@ def rank_test(Z, X, C=None, fit_intercept=True): C = np.hstack([np.ones((n, 1)), C]) if C.shape[1] > 0: - X = oproj(C, X) - Z = oproj(C, Z) + X, Z = oproj(C, X, Z) X_proj = proj(Z, X) diff --git a/ivmodels/tests/wald.py b/ivmodels/tests/wald.py index b571bda..4d7ff70 100644 --- a/ivmodels/tests/wald.py +++ b/ivmodels/tests/wald.py @@ -79,10 +79,7 @@ def wald_test(Z, X, y, beta, W=None, C=None, estimator="tsls", fit_intercept=Tru C = np.hstack([np.ones((n, 1)), C]) if C.shape[1] > 0: - X = oproj(C, X) - y = oproj(C, y) - Z = oproj(C, Z) - W = oproj(C, W) + X, y, Z, W = oproj(C, X, y, Z, W) XW = np.concatenate([X, W], axis=1) @@ -177,10 +174,7 @@ def inverse_wald_test( C = np.hstack([np.ones((n, 1)), C]) if C.shape[1] > 0: - X = oproj(C, X) - y = oproj(C, y) - Z = oproj(C, Z) - W = oproj(C, W) + X, y, Z, W = oproj(C, X, y, Z, W) XW = np.concatenate([X, W], axis=1) @@ -190,11 +184,13 @@ def inverse_wald_test( residuals = y - kclass.predict(XW) hat_sigma_sq = np.sum(residuals**2) / (n - XW.shape[1] - C.shape[1]) - Xkappa = kclass.kappa_ * proj(Z, X) + (1 - kclass.kappa_) * X + X_proj, W_proj = proj(Z, X, W) + + Xkappa = kclass.kappa_ * X_proj + (1 - kclass.kappa_) * X A = X.T @ Xkappa if W.shape[1] > 0: - Wkappa = kclass.kappa_ * proj(Z, W) + (1 - kclass.kappa_) * W + Wkappa = kclass.kappa_ * W_proj + (1 - kclass.kappa_) * W A = A - Xkappa.T @ W @ np.linalg.solve(Wkappa.T @ W, W.T @ Xkappa) b = -2 * A @ beta[: X.shape[1]] diff --git a/ivmodels/utils.py b/ivmodels/utils.py index 130575f..70aa349 100644 --- a/ivmodels/utils.py +++ b/ivmodels/utils.py @@ -8,46 +8,86 @@ _PANDAS_INSTALLED = False -def proj(Z, f): +def proj(Z, *args): """Project f onto the subspace spanned by Z. Parameters ---------- Z: np.ndarray of dimension (n, d_Z) The Z matrix. If None, returns np.zeros_like(f). - f: np.ndarray of dimension (n, d_f) or (n,) - The vector to project. + *args: np.ndarrays of dimension (n, d_f) or (n,) + vector or matrices to project. Returns ------- np.ndarray of dimension (n, d_f) or (n,) - Projection of f onto the subspace spanned by Z. Same dimension as f. + Projection of args onto the subspace spanned by Z. Same number of + outputs as args. Same dimension as args """ if Z is None: - return np.zeros_like(f) + return (*(np.zeros_like(f) for f in args),) - return np.dot(Z, np.linalg.lstsq(Z, f, rcond=None)[0]) + for f in args: + if len(f.shape) > 2: + raise ValueError( + f"*args should have shapes (n, d_f) or (n,). Got {f.shape}." + ) + if f.shape[0] != Z.shape[0]: + raise ValueError(f"Shape mismatch: Z.shape={Z.shape}, f.shape={f.shape}.") + if len(args) == 1: + return np.dot(Z, np.linalg.lstsq(Z, args[0], rcond=None)[0]) -def oproj(Z, f): - """Project f onto the subspace orthogonal to the subspace spanned by Z. + csum = np.cumsum([f.shape[1] if len(f.shape) == 2 else 1 for f in args]) + csum = [0] + csum.tolist() + + fs = np.hstack([f.reshape(Z.shape[0], -1) for f in args]) + fs = np.dot(Z, np.linalg.lstsq(Z, fs, rcond=None)[0]) + + return ( + *(fs[:, i:j].reshape(f.shape) for i, j, f in zip(csum[:-1], csum[1:], args)), + ) + + +def oproj(Z, *args): + """Project f onto the subspace orthogonal to Z. Parameters ---------- - Z: np.ndarray of dimension (n, d_Z) or None + Z: np.ndarray of dimension (n, d_Z) The Z matrix. If None, returns f. - f: np.ndarray of dimension (n, d_f) or (n,) - The vector to project. + *args: np.ndarrays of dimension (n, d_f) or (n,) + vector or matrices to project. Returns ------- np.ndarray of dimension (n, d_f) or (n,) - Projection of f onto the subspace spanned by Z. Same dimension as f. + Projection of args onto the subspace spanned by Z. Same number of + outputs as args. Same dimension as args """ if Z is None: - return f + return (*args,) + + for f in args: + if len(f.shape) > 2: + raise ValueError( + f"*args should have shapes (n, d_f) or (n,). Got {f.shape}." + ) + if f.shape[0] != Z.shape[0]: + raise ValueError(f"Shape mismatch: Z.shape={Z.shape}, f.shape={f.shape}.") + + if len(args) == 1: + return args[0] - np.dot(Z, np.linalg.lstsq(Z, args[0], rcond=None)[0]) + + csum = np.cumsum([f.shape[1] if len(f.shape) == 2 else 1 for f in args]) + csum = [0] + csum.tolist() + + fs = np.hstack([f.reshape(Z.shape[0], -1) for f in args]) + fs = fs - np.dot(Z, np.linalg.lstsq(Z, fs, rcond=None)[0]) - return f - np.dot(Z, np.linalg.lstsq(Z, f, rcond=None)[0]) + return ( + *(fs[:, i:j].reshape(f.shape) for i, j, f in zip(csum[:-1], csum[1:], args)), + ) def to_numpy(x): diff --git a/tests/test_utils.py b/tests/test_utils.py index 96d5d3e..6adef43 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from ivmodels.utils import oproj, proj @@ -17,3 +18,73 @@ def test_oproj(): Z = rng.normal(0, 1, (100, 5)) assert np.allclose(X - proj(Z, X), oproj(Z, X)) + + +def test_proj_multiple_args(): + rng = np.random.RandomState(0) + + X = rng.normal(0, 1, (100, 2)) + z1 = rng.normal(0, 1, (100, 5)) + z2 = rng.normal(0, 1, (100, 0)) + z3 = rng.normal(0, 1, (100,)) + + z1_proj, z2_proj, z3_proj = proj(X, z1, z2, z3) + assert (z1_proj.shape, z2_proj.shape, z3_proj.shape) == ( + z1.shape, + z2.shape, + z3.shape, + ) + assert np.allclose(proj(X, z1), X @ np.linalg.inv(X.T @ X) @ X.T @ z1) + assert np.allclose(proj(X, z2), X @ np.linalg.inv(X.T @ X) @ X.T @ z2) + assert np.allclose(proj(X, z3), X @ np.linalg.inv(X.T @ X) @ X.T @ z3) + + +def test_oproj_multiple_args(): + rng = np.random.RandomState(0) + + X = rng.normal(0, 1, (100, 2)) + z1 = rng.normal(0, 1, (100, 5)) + z2 = rng.normal(0, 1, (100, 0)) + z3 = rng.normal(0, 1, (100,)) + + z1_proj, z2_proj, z3_proj = proj(X, z1, z2, z3) + assert (z1_proj.shape, z2_proj.shape, z3_proj.shape) == ( + z1.shape, + z2.shape, + z3.shape, + ) + assert np.allclose(oproj(X, z1), z1 - X @ np.linalg.inv(X.T @ X) @ X.T @ z1) + assert np.allclose(oproj(X, z2), z2 - X @ np.linalg.inv(X.T @ X) @ X.T @ z2) + assert np.allclose(oproj(X, z3), z3 - X @ np.linalg.inv(X.T @ X) @ X.T @ z3) + + +def test_proj_raises(): + rng = np.random.RandomState(0) + + X = rng.normal(0, 1, (100, 2)) + z = rng.normal(0, 1, (100, 5)) + + with pytest.raises(ValueError, match="Shape mismatch:"): + proj(X, z[1:, :], z) + + with pytest.raises(ValueError, match="Shape mismatch:"): + proj(X, z[1:, 0], z) + + with pytest.raises(ValueError, match="args should have shapes"): + proj(X, z.reshape(10, 10, 5), z) + + +def test_oproj_raises(): + rng = np.random.RandomState(0) + + X = rng.normal(0, 1, (100, 2)) + z = rng.normal(0, 1, (100, 5)) + + with pytest.raises(ValueError, match="Shape mismatch:"): + oproj(X, z[1:, :], z) + + with pytest.raises(ValueError, match="Shape mismatch:"): + oproj(X, z[1:, 0], z) + + with pytest.raises(ValueError, match="args should have shapes"): + oproj(X, z.reshape(10, 10, 5), z)