Make proj and oproj accept multiple args (#78)

* Make proj and oproj accept multiple args. * Fix test and add tests. * Changelog * Fix regex
mlondschien · Jun 6, 2024 · f5bbf5b · f5bbf5b
1 parent 40bafc9
commit f5bbf5b
Show file tree

Hide file tree

Showing 11 changed files with 172 additions and 95 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,5 +6,9 @@ Unreleased
 
 **New features:**
 
-- New method func:`ivmodels.simulate.simulate_guggenberger12` to draw from the data
-  generating process of Guggenberger (2012).
+- New method :func:`ivmodels.simulate.simulate_guggenberger12` to draw from the data
+  generating process of Guggenberger (2012).
+
+- The utility functions :func:`ivmodels.utils.proj` and :func:`ivmodels.utils.oproj`
+  now accept multiple args to be projected. Usage of this results in performance
+  improvements.
diff --git a/ivmodels/models/kclass.py b/ivmodels/models/kclass.py
@@ -5,7 +5,7 @@
 import scipy
 from glum import GeneralizedLinearRegressor
 
-from ivmodels.utils import proj, to_numpy
+from ivmodels.utils import oproj, proj, to_numpy
 
 try:
     import pandas as pd
@@ -246,7 +246,9 @@ def _fuller_alpha(self, kappa):
     def _spectrum(X, y, Z=None, X_proj=None, y_proj=None, subset_by_index=None):
         if (y_proj is None or X_proj is None) and Z is None:
             raise ValueError("Either Z or both X_proj and y_proj must be specified.")
-        if X_proj is None:
+        if X_proj is None and y_proj is None:
+            X_proj, y_proj = proj(Z, X, y)
+        elif X_proj is None:
             X_proj = proj(Z, X)
         if y_proj is None:
             y_proj = proj(Z, y)
@@ -379,14 +381,13 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
         if C.shape[1] > 0:
             Z = np.hstack([Z, C])
             # save some compute here, as proj([Z, C], C) = C
-            X_proj = np.hstack([proj(Z, X), C])
+            X_proj, y_proj = proj(Z, X, y)
+            X_proj = np.hstack([X_proj, C])
             X = np.hstack([X, C])
             x_mean = np.hstack([x_mean, c_mean])
 
         else:
-            X_proj = proj(Z, X)
-
-        y_proj = proj(Z, y)
+            X_proj, y_proj = proj(Z, X, y)
 
         if isinstance(self.kappa, str):
             if self.kappa.lower() in {"tsls", "2sls"}:
@@ -398,9 +399,10 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
                 # If C!=None, we compute the ar_min as if we removed C from all design
                 # matrices. I.e., we replace Z <- M_C Z, X <- M_C X, y <- M_C y.
                 # We also exclude the columns in X coming from C.
+                X_orth_C, y_orth_C = oproj(C, X[:, :mx], y)
                 self.ar_min_ = self.ar_min(
-                    X[:, :mx] - proj(C, X[:, :mx]),
-                    y - proj(C, y),
+                    X_orth_C,
+                    y_orth_C,
                     X_proj=X_proj[:, :mx],
                     y_proj=y_proj,
                 )

diff --git a/ivmodels/tests/anderson_rubin.py b/ivmodels/tests/anderson_rubin.py
@@ -175,10 +175,7 @@ def anderson_rubin_test(
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X = oproj(C, X)
-        y = oproj(C, y)
-        Z = oproj(C, Z)
-        W = oproj(C, W)
+        X, y, Z, W = oproj(C, X, y, Z, W)
 
     if W.shape[1] == 0:
         residuals = y - X @ beta
@@ -287,10 +284,7 @@ def inverse_anderson_rubin_test(
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X = oproj(C, X)
-        y = oproj(C, y)
-        Z = oproj(C, Z)
-        W = oproj(C, W)
+        X, y, Z, W = oproj(C, X, y, Z, W)
 
     X = np.concatenate([X, W], axis=1)
     dfn = k - W.shape[1]
@@ -308,9 +302,8 @@ def inverse_anderson_rubin_test(
             "critical_values must be one of 'chi2', 'f'. Got " f"{critical_values}."
         )
 
-    X_proj = proj(Z, X)
+    X_proj, y_proj = proj(Z, X, y)
     X_orth = X - X_proj
-    y_proj = proj(Z, y)
     y_orth = y - y_proj
 
     A = X.T @ (X_proj - quantile * X_orth)

diff --git a/ivmodels/tests/conditional_likelihood_ratio.py b/ivmodels/tests/conditional_likelihood_ratio.py
@@ -308,14 +308,10 @@ def conditional_likelihood_ratio_test(
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X = oproj(C, X)
-        y = oproj(C, y)
-        Z = oproj(C, Z)
-        W = oproj(C, W)
+        X, y, Z, W = oproj(C, X, y, Z, W)
 
     # Z = scipy.linalg.qr(Z, mode="economic")[0]
-    X_proj = proj(Z, X)  # , orthogonal=True)
-    y_proj = proj(Z, y)  # , orthogonal=True)
+    X_proj, y_proj, W_proj = proj(Z, X, y, W)  # , orthogonal=True)
 
     if mw == 0:
         residuals = y - X @ beta
@@ -339,7 +335,6 @@ def conditional_likelihood_ratio_test(
         statistic = (n - k - C.shape[1]) * (ar - ar_min)
 
     elif mw > 0:
-        W_proj = proj(Z, W)
         XWy = np.concatenate([X, W, y.reshape(-1, 1)], axis=1)
         XWy_proj = np.concatenate([X_proj, W_proj, y_proj.reshape(-1, 1)], axis=1)
 
@@ -366,7 +361,7 @@ def conditional_likelihood_ratio_test(
             XW_proj = np.concatenate([X_proj, W_proj], axis=1)
 
             residuals = y - X @ beta - kclass.predict(X=W)
-            residuals_proj = proj(Z, residuals)
+            residuals_proj = proj(Z, y_proj - X_proj @ beta - kclass.predict(X=W_proj))
             residuals_orth = residuals - residuals_proj
 
             Sigma = (residuals_orth.T @ XW) / (residuals_orth.T @ residuals_orth)

diff --git a/ivmodels/tests/lagrange_multiplier.py b/ivmodels/tests/lagrange_multiplier.py
@@ -128,16 +128,11 @@ def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True):
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X = oproj(C, X)
-        y = oproj(C, y)
-        Z = oproj(C, Z)
-        W = oproj(C, W)
-
-    X_proj = proj(Z, X)
-    W_proj = proj(Z, W)
+        X, y, Z, W = oproj(C, X, y, Z, W)
 
     residuals = y - X @ beta
-    residuals_proj = proj(Z, residuals)
+
+    X_proj, W_proj, residuals_proj = proj(Z, X, W, residuals)
 
     if W.shape[1] > 0:
         gamma_hat = KClass(kappa="liml").fit(X=W, y=y - X @ beta, Z=Z).coef_
@@ -174,21 +169,17 @@ def lagrange_multiplier_test(Z, X, y, beta, W=None, C=None, fit_intercept=True):
         p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx)
 
     else:
-        residuals = (y - X @ beta).reshape(-1, 1)
-        proj_residuals = proj(Z, residuals)
-        orth_residuals = residuals - proj_residuals
+        orth_residuals = residuals - residuals_proj
+
+        sigma_hat = residuals.T @ orth_residuals
+        Sigma = orth_residuals.T @ X / sigma_hat
 
         # X - (y - X beta) * (y - X beta)^T M_Z X / (y - X beta)^T M_Z (y - X beta)
-        X_tilde = X - residuals @ (orth_residuals.T @ X) / (
-            residuals.T @ orth_residuals
-        )
-        proj_X_tilde = proj(Z, X_tilde)
-        X_tilde_proj_residuals = proj(proj_X_tilde, residuals)
+        X_tilde_proj = X_proj - np.outer(residuals_proj, Sigma)
+
+        X_tilde_proj_residuals = proj(X_tilde_proj, residuals)
         # (y - X beta) P_{P_Z X_tilde} (y - X beta) / (y - X_beta) M_Z (y - X beta)
-        statistic = (
-            np.square(X_tilde_proj_residuals).sum()
-            / (residuals.T @ orth_residuals).item()
-        )
+        statistic = np.square(X_tilde_proj_residuals).sum() / sigma_hat
 
         statistic *= n - k - C.shape[1]
 

diff --git a/ivmodels/tests/likelihood_ratio.py b/ivmodels/tests/likelihood_ratio.py
@@ -79,14 +79,9 @@ def likelihood_ratio_test(Z, X, y, beta, W=None, C=None, fit_intercept=True):
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X = oproj(C, X)
-        y = oproj(C, y)
-        Z = oproj(C, Z)
-        W = oproj(C, W)
+        X, y, Z, W = oproj(C, X, y, Z, W)
 
-    X_proj = proj(Z, X)
-    y_proj = proj(Z, y)
-    W_proj = proj(Z, W)
+    X_proj, y_proj, W_proj = proj(Z, X, y, W)
 
     XWy = np.concatenate([X, W, y.reshape(-1, 1)], axis=1)
     XWy_proj = np.concatenate([X_proj, W_proj, y_proj.reshape(-1, 1)], axis=1)
@@ -159,16 +154,12 @@ def inverse_likelihood_ratio_test(
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X = oproj(C, X)
-        y = oproj(C, y)
-        Z = oproj(C, Z)
-        W = oproj(C, W)
+        X, y, Z, W = oproj(C, X, y, Z, W)
 
     X = np.concatenate([X, W], axis=1)
 
-    X_proj = proj(Z, X)
+    X_proj, y_proj = proj(Z, X, y)
     X_orth = X - X_proj
-    y_proj = proj(Z, y)
     y_orth = y - y_proj
 
     Xy_proj = np.concatenate([X_proj, y_proj.reshape(-1, 1)], axis=1)

diff --git a/ivmodels/tests/pulse.py b/ivmodels/tests/pulse.py
@@ -66,9 +66,7 @@ def pulse_test(Z, X, y, beta, C=None, W=None, fit_intercept=True):
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X = oproj(C, X)
-        y = oproj(C, y)
-        Z = oproj(C, Z)
+        X, y, Z = oproj(C, X, y, Z)
 
     residuals = y - X @ beta
     proj_residuals = proj(Z, residuals)
@@ -91,12 +89,9 @@ def inverse_pulse_test(Z, X, y, C=None, alpha=0.05, fit_intercept=True):
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X = oproj(C, X)
-        y = oproj(C, y)
-        Z = oproj(C, Z)
+        X, y, Z = oproj(C, X, y, Z)
 
-    X_proj = proj(Z, X)
-    y_proj = proj(Z, y)
+    X_proj, y_proj = proj(Z, X, y)
 
     A = X.T @ (X_proj - 1 / (n - k - C.shape[1]) * quantile * X)
     b = -2 * (X_proj - 1 / (n - k - C.shape[1]) * quantile * X).T @ y

diff --git a/ivmodels/tests/rank.py b/ivmodels/tests/rank.py
@@ -56,8 +56,7 @@ def rank_test(Z, X, C=None, fit_intercept=True):
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X = oproj(C, X)
-        Z = oproj(C, Z)
+        X, Z = oproj(C, X, Z)
 
     X_proj = proj(Z, X)
 

diff --git a/ivmodels/tests/wald.py b/ivmodels/tests/wald.py
@@ -79,10 +79,7 @@ def wald_test(Z, X, y, beta, W=None, C=None, estimator="tsls", fit_intercept=Tru
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X = oproj(C, X)
-        y = oproj(C, y)
-        Z = oproj(C, Z)
-        W = oproj(C, W)
+        X, y, Z, W = oproj(C, X, y, Z, W)
 
     XW = np.concatenate([X, W], axis=1)
 
@@ -177,10 +174,7 @@ def inverse_wald_test(
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X = oproj(C, X)
-        y = oproj(C, y)
-        Z = oproj(C, Z)
-        W = oproj(C, W)
+        X, y, Z, W = oproj(C, X, y, Z, W)
 
     XW = np.concatenate([X, W], axis=1)
 
@@ -190,11 +184,13 @@ def inverse_wald_test(
     residuals = y - kclass.predict(XW)
     hat_sigma_sq = np.sum(residuals**2) / (n - XW.shape[1] - C.shape[1])
 
-    Xkappa = kclass.kappa_ * proj(Z, X) + (1 - kclass.kappa_) * X
+    X_proj, W_proj = proj(Z, X, W)
+
+    Xkappa = kclass.kappa_ * X_proj + (1 - kclass.kappa_) * X
 
     A = X.T @ Xkappa
     if W.shape[1] > 0:
-        Wkappa = kclass.kappa_ * proj(Z, W) + (1 - kclass.kappa_) * W
+        Wkappa = kclass.kappa_ * W_proj + (1 - kclass.kappa_) * W
         A = A - Xkappa.T @ W @ np.linalg.solve(Wkappa.T @ W, W.T @ Xkappa)
 
     b = -2 * A @ beta[: X.shape[1]]

diff --git a/ivmodels/utils.py b/ivmodels/utils.py
@@ -8,46 +8,86 @@
     _PANDAS_INSTALLED = False
 
 
-def proj(Z, f):
+def proj(Z, *args):
     """Project f onto the subspace spanned by Z.
 
     Parameters
     ----------
     Z: np.ndarray of dimension (n, d_Z)
         The Z matrix. If None, returns np.zeros_like(f).
-    f: np.ndarray of dimension (n, d_f) or (n,)
-        The vector to project.
+    *args: np.ndarrays of dimension (n, d_f) or (n,)
+        vector or matrices to project.
 
     Returns
     -------
     np.ndarray of dimension (n, d_f) or (n,)
-        Projection of f onto the subspace spanned by Z. Same dimension as f.
+        Projection of args onto the subspace spanned by Z. Same number of
+        outputs as args. Same dimension as args
     """
     if Z is None:
-        return np.zeros_like(f)
+        return (*(np.zeros_like(f) for f in args),)
 
-    return np.dot(Z, np.linalg.lstsq(Z, f, rcond=None)[0])
+    for f in args:
+        if len(f.shape) > 2:
+            raise ValueError(
+                f"*args should have shapes (n, d_f) or (n,). Got {f.shape}."
+            )
+        if f.shape[0] != Z.shape[0]:
+            raise ValueError(f"Shape mismatch: Z.shape={Z.shape}, f.shape={f.shape}.")
 
+    if len(args) == 1:
+        return np.dot(Z, np.linalg.lstsq(Z, args[0], rcond=None)[0])
 
-def oproj(Z, f):
-    """Project f onto the subspace orthogonal to the subspace spanned by Z.
+    csum = np.cumsum([f.shape[1] if len(f.shape) == 2 else 1 for f in args])
+    csum = [0] + csum.tolist()
+
+    fs = np.hstack([f.reshape(Z.shape[0], -1) for f in args])
+    fs = np.dot(Z, np.linalg.lstsq(Z, fs, rcond=None)[0])
+
+    return (
+        *(fs[:, i:j].reshape(f.shape) for i, j, f in zip(csum[:-1], csum[1:], args)),
+    )
+
+
+def oproj(Z, *args):
+    """Project f onto the subspace orthogonal to Z.
 
     Parameters
     ----------
-    Z: np.ndarray of dimension (n, d_Z) or None
+    Z: np.ndarray of dimension (n, d_Z)
         The Z matrix. If None, returns f.
-    f: np.ndarray of dimension (n, d_f) or (n,)
-        The vector to project.
+    *args: np.ndarrays of dimension (n, d_f) or (n,)
+        vector or matrices to project.
 
     Returns
     -------
     np.ndarray of dimension (n, d_f) or (n,)
-        Projection of f onto the subspace spanned by Z. Same dimension as f.
+        Projection of args onto the subspace spanned by Z. Same number of
+        outputs as args. Same dimension as args
     """
     if Z is None:
-        return f
+        return (*args,)
+
+    for f in args:
+        if len(f.shape) > 2:
+            raise ValueError(
+                f"*args should have shapes (n, d_f) or (n,). Got {f.shape}."
+            )
+        if f.shape[0] != Z.shape[0]:
+            raise ValueError(f"Shape mismatch: Z.shape={Z.shape}, f.shape={f.shape}.")
+
+    if len(args) == 1:
+        return args[0] - np.dot(Z, np.linalg.lstsq(Z, args[0], rcond=None)[0])
+
+    csum = np.cumsum([f.shape[1] if len(f.shape) == 2 else 1 for f in args])
+    csum = [0] + csum.tolist()
+
+    fs = np.hstack([f.reshape(Z.shape[0], -1) for f in args])
+    fs = fs - np.dot(Z, np.linalg.lstsq(Z, fs, rcond=None)[0])
 
-    return f - np.dot(Z, np.linalg.lstsq(Z, f, rcond=None)[0])
+    return (
+        *(fs[:, i:j].reshape(f.shape) for i, j, f in zip(csum[:-1], csum[1:], args)),
+    )
 
 
 def to_numpy(x):