From 94b8f8de6fe914286ef07e5b0aabe1630d3b7838 Mon Sep 17 00:00:00 2001
From: Malte Londschien <61679398+mlondschien@users.noreply.github.com>
Date: Thu, 18 Jul 2024 12:04:17 +0200
Subject: [PATCH] New argument for tests: D, included exogenous variables (#94)

* New argument for Wald test: D

* New argument for AR: D

* Additional arg to simulate_gaussian_iv: md

* New argument for likelihood_ratio: D

* Add D argument to lagrange_multiplier.

* Add D argument to CLR test.

* Fix some tests.

* Some bugfixes.

* Fix last test.

* Changelog.

* Add test.

* More testing.

* Fix inverse_wald output.

* Docstring.

* Revert rank.

* lagrange docstring.

* Fix LM inverse.

* Fix warnings.

* Don't test LM.

* This time correctly.

* Remove superfluous code.
---
 CHANGELOG.rst                                 |  10 +
 benchmarks/benchmarks/kclass.py               |   2 +-
 benchmarks/benchmarks/tests.py                |   2 +-
 ivmodels/models/kclass.py                     |  18 +-
 ivmodels/quadric.py                           |   6 +-
 ivmodels/simulate.py                          |  44 +--
 ivmodels/summary.py                           |  31 ++-
 ivmodels/tests/anderson_rubin.py              |  76 ++++--
 .../tests/conditional_likelihood_ratio.py     |  96 ++++---
 ivmodels/tests/lagrange_multiplier.py         |  90 ++++---
 ivmodels/tests/likelihood_ratio.py            | 114 +++++---
 ivmodels/tests/pulse.py                       |  30 ++-
 ivmodels/tests/rank.py                        |   2 +-
 ivmodels/tests/utils.py                       |  45 +++-
 ivmodels/tests/wald.py                        |  88 +++---
 tests/models/test_anchor_regression.py        |   8 +-
 tests/models/test_compare_to_linearmodels.py  |  11 +-
 tests/models/test_k_class.py                  |  20 +-
 tests/models/test_pulse.py                    |   2 +-
 tests/test_simulate.py                        |   7 +-
 tests/test_summary.py                         |   2 +-
 tests/tests/test_anderson_rubin.py            |   2 +-
 .../test_conditional_likelihood_ratio.py      |  25 ++
 tests/tests/test_lagrange_multiplier.py       |  39 ++-
 tests/tests/test_likelihood_ratio.py          |  25 +-
 tests/tests/test_rank.py                      |   4 +-
 tests/tests/test_tests.py                     | 251 +++++++++++++-----
 tests/tests/test_wald.py                      |  31 ++-
 28 files changed, 724 insertions(+), 357 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index a92c0c0..dc052ee 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -21,6 +21,16 @@ Changelog
   `endogenous_names_`, `exogenous_names_`,  and `instrument_names_`. If pandas is
   installed, there's also `names_coefs_`.
 
+- The tests :func:`~ivmodels.tests.anderson_rubin_test`,
+  :func:`~ivmodels.tests.lagrange_multiplier_test`,
+  :func:`~ivmodels.tests.likelihood_ratio_test`, and
+  :func:`~ivmodels.tests.wald_test` and their inverses
+  :func:`~ivmodels.tests.inverse_anderson_rubin_test`,
+  :func:`~ivmodels.tests.inverse_lagrange_multiplier_test`,
+  :func:`~ivmodels.tests.inverse_likelihood_ratio_test`, and
+  :func:`~ivmodels.tests.inverse_wald_test` now support an additional parameter `D`
+  of exogenous covariates to be included in the test. This is not supported for
+  the conditional likelihood ratio test.
 
 
 **Other changes:**
diff --git a/benchmarks/benchmarks/kclass.py b/benchmarks/benchmarks/kclass.py
index 7b69a93..de54004 100644
--- a/benchmarks/benchmarks/kclass.py
+++ b/benchmarks/benchmarks/kclass.py
@@ -16,7 +16,7 @@ def setup(self, kappa, data):
             Z, X, y, C, _ = simulate_guggenberger12(n=1000, k=10, seed=0)
         else:
             n, mx, k, mc = data
-            Z, X, y, C, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=mx, mc=mc)
+            Z, X, y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=mx, mc=mc)
 
         self.data = {
             "Z": Z,
diff --git a/benchmarks/benchmarks/tests.py b/benchmarks/benchmarks/tests.py
index 40efbbb..1a6aa2a 100644
--- a/benchmarks/benchmarks/tests.py
+++ b/benchmarks/benchmarks/tests.py
@@ -32,7 +32,7 @@ def setup(self, n, data):
             )
         else:
             k, mx, mw, mc = data
-            Z, X, y, C, W, beta = simulate_gaussian_iv(
+            Z, X, y, C, W, _, beta = simulate_gaussian_iv(
                 n=n, mx=mx, k=k, u=mx, mc=mc, mw=mw, return_beta=True
             )
 
diff --git a/ivmodels/models/kclass.py b/ivmodels/models/kclass.py
index 7636d84..4f8d5bb 100644
--- a/ivmodels/models/kclass.py
+++ b/ivmodels/models/kclass.py
@@ -363,7 +363,8 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
                 "Variable names must not contain 'intercept' when fit_intercept=True."
             )
 
-        n, mx = X.shape
+        n, k = Z.shape
+        mx, mc = X.shape[1], C.shape[1]
 
         # Including an intercept is equivalent to replacing y <- M_1 y, X <- M_1 X,
         # C <- M_1 C, Z <- M_1 Z, where M_1 = Id - 1/n 1 1^T is the projection
@@ -383,13 +384,13 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
             y = y - y_mean
             Z = Z - Z.mean(axis=0)
 
-            if C.shape[1] > 0:
+            if mc > 0:
                 c_mean = C.mean(axis=0)
                 C = C - c_mean
             else:
                 c_mean = np.zeros(0)
         else:
-            x_mean, y_mean, c_mean = np.zeros(mx), 0, np.zeros(C.shape[1])
+            x_mean, y_mean, c_mean = np.zeros(mx), 0, np.zeros(mc)
 
         # Including exogenous covariates C can be done by the following approaches:
         #
@@ -403,7 +404,7 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
         #   replace Z <- [Z C], X <- [X C], y <- y, and apply the k-class estimator to
         #   the augmented data. This is the approach taken here. Care needs to be taken
         #   to compute kappa. For this, we follow the first approach. See below.
-        if C.shape[1] > 0:
+        if mc > 0:
             Z = np.hstack([Z, C])
             # save some compute here, as proj([Z, C], C) = C
             X_proj, y_proj = proj(Z, X, y)
@@ -432,7 +433,7 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
                     y_proj=y_proj,
                 )
                 self.kappa_liml_ = 1 + self.ar_min_
-                self.kappa_ = self.kappa_liml_ - self.fuller_alpha_ / (n - Z.shape[1])
+                self.kappa_ = self.kappa_liml_ - self.fuller_alpha_ / (n - k - mc)
         else:
             self.kappa_ = self.kappa
 
@@ -482,7 +483,7 @@ def predict(self, X, C=None, *args, **kwargs):  # noqa D
         (X, _, C), _ = self._X_Z_C(X, C=C, Z=None, predict=True)
         return super().predict(np.hstack([X, C]), *args, **kwargs)
 
-    def summary(self, X, y, Z=None, C=None, test="wald (liml)", alpha=0.05, **kwargs):
+    def summary(self, X, y, Z=None, C=None, test="wald", alpha=0.05, **kwargs):
         """
         Create Summary object for the fitted model.
 
@@ -504,8 +505,9 @@ def summary(self, X, y, Z=None, C=None, test="wald (liml)", alpha=0.05, **kwargs
             specified, ``C`` must be ``None``. If ``C`` is specified,
             ``exogenous_names`` and ``exogenous_regex`` must be ``None``.
         test: str, optional, default="wald (liml)"
-            The test to use. Must be one of "wald (liml)", "wald (tsls)", "anderson
-            rubin", "AR", or "lagrange multiplier".
+            The test to use. Must be one of "wald", "anderson-rubin",
+            "lagrange multiplier", "likelihood-ratio", or
+            "conditional likelihood-ratio".
         alpha: float, optional, default=0.05
             The significance level.
         **kwargs
diff --git a/ivmodels/quadric.py b/ivmodels/quadric.py
index fa480a2..057a4a4 100644
--- a/ivmodels/quadric.py
+++ b/ivmodels/quadric.py
@@ -137,9 +137,9 @@ def _boundary(self, num=200):
                 )
 
         if np.all(self.D > 0) and (self.c_standardized > 0):
-            return np.zeros(shape=(0, 1))
+            return np.zeros(shape=(0, len(self.D)))
         if np.all(self.D < 0) and (self.c_standardized <= 0):
-            return np.zeros(shape=(0, 1))
+            return np.zeros(shape=(0, len(self.D)))
 
         # If both entries of D have the opposite sign as c_standardized, the
         # boundary of the quadric is an ellipse.
@@ -239,7 +239,7 @@ def project(self, coordinates):
         if mask.all():
             return self
 
-        if (  # [~mask, ~mask] will to a .reshape(-1) on the matrix
+        if (  # [~mask, ~mask] does a .reshape(-1) on the matrix
             scipy.linalg.eigvalsh(self.A[:, ~mask][~mask, :], subset_by_index=[0, 0])[0]
             < 0
         ):
diff --git a/ivmodels/simulate.py b/ivmodels/simulate.py
index 142ce99..88821b3 100644
--- a/ivmodels/simulate.py
+++ b/ivmodels/simulate.py
@@ -5,7 +5,7 @@
 
 
 def simulate_guggenberger12(
-    n, *, k, seed=0, h11=100, h12=1, rho=0.95, cov=None, return_beta=False
+    n, *, k, seed=0, h11=100, h12=1, rho=0.95, cov=None, return_beta=False, md=0
 ):
     """
     Generate data by process as proposed by :cite:t:`guggenberger2012asymptotic`.
@@ -96,14 +96,18 @@ def simulate_guggenberger12(
 
     Z = rng.normal(0, 1, (n, k))
 
+    Pi_ZD = rng.normal(0, 1, (k, md))
+    D = rng.normal(0, 1, (n, md)) + Z @ Pi_ZD
+    delta = rng.normal(0, 0.1, (md, 1))
+
     X = Z @ Pi_X + noise[:, 1:2]
     W = Z @ Pi_W + noise[:, 2:]
-    y = X @ beta + W @ gamma + noise[:, 0:1]
+    y = X @ beta + W @ gamma + D @ delta + noise[:, 0:1]
 
     if return_beta:
-        return Z, X, y.flatten(), None, W, beta.flatten()
+        return Z, X, y.flatten(), None, W, D, np.concatenate([beta, delta]).flatten()
     else:
-        return Z, X, y.flatten(), None, W
+        return Z, X, y.flatten(), None, W, D
 
 
 def simulate_gaussian_iv(
@@ -114,6 +118,7 @@ def simulate_gaussian_iv(
     u=None,
     mw=0,
     mc=0,
+    md=0,
     seed=0,
     include_intercept=True,
     return_beta=False,
@@ -171,32 +176,37 @@ def simulate_gaussian_iv(
     ux = rng.normal(0, 1, (u, m))
     uy = rng.normal(0, 1, (u, 1))
 
-    alpha = rng.normal(0, 1, (mc, 1))
+    alpha = rng.normal(0, 1, (mc + md, 1))
     beta = rng.normal(0, 1, (m, 1))
 
     Pi_ZX = rng.normal(0, 1, (k, m))
-    Pi_CX = rng.normal(0, 1, (mc, m))
-    Pi_CZ = rng.normal(0, 1, (mc, k))
+    Pi_CX = rng.normal(0, 1, (mc + md, m))
+    Pi_CZ = rng.normal(0, 1, (mc + md, k))
 
     U = rng.normal(0, 1, (n, u))
-    C = rng.normal(0, 1, (n, mc)) + include_intercept * rng.normal(0, 1, (1, mc))
+    C = rng.normal(0, 1, (n, mc + md))
+    if include_intercept:
+        C += rng.normal(0, 1, (1, mc + md))
 
-    Z = (
-        rng.normal(0, 1, (n, k))
-        + include_intercept * rng.normal(0, 1, (1, k))
-        + C @ Pi_CZ
-    )
+    Z = rng.normal(0, 1, (n, k)) + C @ Pi_CZ
+    if include_intercept:
+        Z += rng.normal(0, 1, (1, k))
 
     X = Z @ Pi_ZX + C @ Pi_CX + U @ ux
     X += rng.normal(0, 1, (n, m)) + include_intercept * rng.normal(0, 1, (1, m))
     y = C @ alpha + X @ beta + U @ uy
     y += rng.normal(0, 1, (n, 1)) + include_intercept * rng.normal(0, 1, (1, 1))
 
+    X, W, C, D = X[:, :mx], X[:, mx:], C[:, :mc], C[:, mc:]
+
+    gamma0 = beta[mx:]
+    beta0 = np.concatenate([beta[:mx], alpha[mc:]])
+
     if return_beta and return_gamma:
-        return Z, X[:, :mx], y.flatten(), C, X[:, mx:], beta[:mx], beta[mx:]
+        return Z, X, y.flatten(), C, W, D, beta0, gamma0
     elif return_beta:
-        return Z, X[:, :mx], y.flatten(), C, X[:, mx:], beta[:mx]
+        return Z, X, y.flatten(), C, W, D, beta0
     elif return_gamma:
-        return Z, X[:, :mx], y.flatten(), C, X[:, mx:], beta[mx:]
+        return Z, X, y.flatten(), C, W, D, gamma0
     else:
-        return Z, X[:, :mx], y.flatten(), C, X[:, mx:]
+        return Z, X, y.flatten(), C, W, D
diff --git a/ivmodels/summary.py b/ivmodels/summary.py
index cf822f1..b4e6fe1 100644
--- a/ivmodels/summary.py
+++ b/ivmodels/summary.py
@@ -93,6 +93,8 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
         if not str(self.test).lower() in _TESTS:
             raise ValueError(f"Test {self.test} not recognized.")
 
+        n = X.shape[0]
+
         test_, inverse_test_ = _TESTS.get(str(self.test).lower())
 
         self.estimates_ = self.kclass.coef_.tolist()
@@ -102,40 +104,42 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
 
         (X, Z, C), _ = self.kclass._X_Z_C(X, Z, C, predict=False)
 
-        self.feature_names_ = self.kclass.endogenous_names_
-        # + self.kclass.exogenous_names_
+        self.feature_names_ = (
+            self.kclass.endogenous_names_ + self.kclass.exogenous_names_
+        )
 
         idx = 0
-        # if self.kclass.fit_intercept:
-        #     self.feature_names_ = ["intercept"] + self.feature_names_
-        #     self.estimates_ = [self.kclass.intercept_] + self.estimates_
-        #     idx -= 1
+        if self.kclass.fit_intercept:
+            self.feature_names_ = ["intercept"] + self.feature_names_
+            self.estimates_ = [self.kclass.intercept_] + self.estimates_
+            idx -= 1
+
         for name in self.feature_names_:
             if name in self.kclass.endogenous_names_:
                 mask = np.zeros(len(self.kclass.endogenous_names_), dtype=bool)
                 mask[idx] = True
 
-                X_, W_, C_, Z_ = X[:, mask], X[:, ~mask], C, Z
+                X_, W_, C_, D_ = X[:, mask], X[:, ~mask], C, np.zeros((n, 0))
                 fit_intercept_ = True
 
             elif name in self.kclass.exogenous_names_:
                 mask = np.zeros(len(self.kclass.exogenous_names_), dtype=bool)
                 mask[idx - len(self.kclass.endogenous_names_)] = True
 
-                X_, W_, C_, Z_ = C[:, mask], X, C[:, ~mask], np.hstack([Z, C[:, mask]])
+                X_, W_, C_, D_ = np.zeros((n, 0)), X, C[:, ~mask], C[:, mask]
                 fit_intercept_ = True
 
             elif name == "intercept":
-                ones = np.ones((X.shape[0], 1))
-                X_, W_, C_, Z_ = ones, X, C, np.hstack([Z, ones])
+                X_, W_, C_, D_ = np.zeros((n, 0)), X, C, np.ones((n, 1))
                 fit_intercept_ = False
 
             test_result = test_(
-                Z=Z_,
+                Z=Z,
                 X=X_,
                 W=W_,
                 y=y,
                 C=C_,
+                D=D_,
                 beta=np.array([0]),
                 fit_intercept=fit_intercept_,
                 **kwargs,
@@ -144,11 +148,12 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
             self.p_values_.append(test_result[1])
 
             confidence_set = inverse_test_(
-                Z=Z_,
+                Z=Z,
                 X=X_,
                 W=W_,
                 y=y,
                 C=C_,
+                D=D_,
                 alpha=self.alpha,
                 fit_intercept=fit_intercept_,
                 **kwargs,
@@ -173,7 +178,7 @@ def __format__(self, format_spec: str) -> str:  # noqa D
             return "Summary not fitted yet."
 
         def format_p_value(x):
-            return f"{x:{format_spec}}" if x > 1e-16 else "<1e-16"
+            return f"{x:{format_spec}}" if np.isnan(x) or x > 1e-16 else "<1e-16"
 
         estimate_str = [f"{e:{format_spec}}" for e in self.estimates_]
         statistics_str = [f"{s:{format_spec}}" for s in self.statistics_]
diff --git a/ivmodels/tests/anderson_rubin.py b/ivmodels/tests/anderson_rubin.py
index ae7ac62..d5ff154 100644
--- a/ivmodels/tests/anderson_rubin.py
+++ b/ivmodels/tests/anderson_rubin.py
@@ -91,7 +91,7 @@ def f(x):
 
 
 def anderson_rubin_test(
-    Z, X, y, beta, W=None, C=None, critical_values="chi2", fit_intercept=True
+    Z, X, y, beta, W=None, C=None, D=None, critical_values="chi2", fit_intercept=True
 ):
     """
     Perform the Anderson Rubin test :cite:p:`anderson1949estimation`.
@@ -132,12 +132,14 @@ def anderson_rubin_test(
         Regressors.
     y: np.ndarray of dimension (n,)
         Outcomes.
-    beta: np.ndarray of dimension (mx,)
+    beta: np.ndarray of dimension (mx + md,)
         Coefficients to test.
     W: np.ndarray of dimension (n, mw) or None, optional, default = None
         Endogenous regressors not of interest.
     C: np.ndarray of dimension (n, mc) or None, optional, default = None
         Exogenous regressors not of interest.
+    D: np.ndarray of dimension (n, md) or None, optional, default = None
+        Exogenous regressors of interest.
     critical_values: str, optional, default = "chi2"
         If ``"chi2"``, use the :math:`\\chi^2(k - m_W)` distribution to compute the p-value.
         If ``"f"``, use the :math:`F_{k - m_W, n - k}` distribution to compute the p-value.
@@ -167,47 +169,55 @@ def anderson_rubin_test(
        guggenberger2012asymptotic
        guggenberger2019more
     """
-    Z, X, y, W, C, beta = _check_test_inputs(Z, X, y, W=W, C=C, beta=beta)
+    Z, X, y, W, C, D, beta = _check_test_inputs(Z, X, y, W=W, C=C, D=D, beta=beta)
 
     n, k = Z.shape
+    mw, mc, md = W.shape[1], C.shape[1], D.shape[1]
 
     if fit_intercept:
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X, y, Z, W = oproj(C, X, y, Z, W)
+        X, y, Z, W, D = oproj(C, X, y, Z, W, D)
 
-    if W.shape[1] == 0:
+    if md > 0:
+        X, Z = np.hstack([X, D]), np.hstack([Z, D])
+
+    if mw == 0:
         residuals = y - X @ beta
         proj_residuals = proj(Z, residuals)
         ar = (
             np.square(proj_residuals).sum()
             / np.square(residuals - proj_residuals).sum()
         )
-        dfn = k
+        dfn = k + md
     else:
-        ar = KClass._spectrum(X=W, y=y - X @ beta, Z=Z, subset_by_index=[0, 0])[0]
-        dfn = k - W.shape[1]
+        ar = KClass.ar_min(X=W, y=y - X @ beta, Z=Z)
+        dfn = k - mw + md
 
-    statistic = ar * (n - k - C.shape[1]) / dfn
+    dfd = n - k - mc - md - fit_intercept
+    statistic = ar * dfd / dfn
 
     if critical_values == "chi2":
         p_value = 1 - scipy.stats.chi2.cdf(statistic * dfn, df=dfn)
     elif critical_values == "f":
-        p_value = 1 - scipy.stats.f.cdf(statistic, dfn=dfn, dfd=n - k - C.shape[1])
+        p_value = 1 - scipy.stats.f.cdf(statistic, dfn=dfn, dfd=dfd)
     elif critical_values.startswith("guggenberger"):
-        if W.shape[1] == 0:
+        if mw == 0:
             raise ValueError(
                 "The critical value function proposed by Guggenberger et al. (2019) is "
                 "only available for the subvector variant where W is not None."
             )
-
-        mw = W.shape[1]
-        kappa_max = (n - k - C.shape[1]) * KClass._spectrum(
+        if md > 0:
+            raise ValueError(
+                "The critical value function proposed by Guggenberger et al. (2019) is "
+                "not valid if D is not None"
+            )
+        kappa_max = (n - k - mc - md) * KClass._spectrum(
             X=W, y=y - X @ beta, Z=Z, subset_by_index=[mw, mw]
         )[0]
         p_value = more_powerful_subvector_anderson_rubin_critical_value_function(
-            statistic * dfn, kappa_max, k, mw=W.shape[1]
+            statistic * dfn, kappa_max, k=k, mw=mw
         )
     else:
         raise ValueError(
@@ -218,7 +228,15 @@ def anderson_rubin_test(
 
 
 def inverse_anderson_rubin_test(
-    Z, X, y, alpha=0.05, W=None, C=None, critical_values="chi2", fit_intercept=True
+    Z,
+    X,
+    y,
+    alpha=0.05,
+    W=None,
+    C=None,
+    D=None,
+    critical_values="chi2",
+    fit_intercept=True,
 ):
     """
     Return the quadric for to the inverse Anderson-Rubin test's acceptance region.
@@ -258,6 +276,8 @@ def inverse_anderson_rubin_test(
         Endogenous regressors not of interest.
     C: np.ndarray of dimension (n, mc) or None, optional, default = None
         Exogenous regressors not of interest.
+    D: np.ndarray of dimension (n, md) or None, optional, default = None
+        Exogenous regressors of interest.
     critical_values: str, optional, default = "chi2"
         If ``"chi2"``, use the :math:`\\chi^2(k - m_W)` distribution to compute the
         p-value.
@@ -275,20 +295,21 @@ def inverse_anderson_rubin_test(
     if not 0 < alpha < 1:
         raise ValueError("alpha must be in (0, 1).")
 
-    Z, X, y, W, C, _ = _check_test_inputs(Z, X, y, W=W, C=C)
+    Z, X, y, W, C, D, _ = _check_test_inputs(Z, X, y, W=W, C=C, D=D)
 
     n, k = Z.shape
+    mx, mw, mc, md = X.shape[1], W.shape[1], C.shape[1], D.shape[1]
 
     if fit_intercept:
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X, y, Z, W = oproj(C, X, y, Z, W)
+        X, y, Z, W, D = oproj(C, X, y, Z, W, D)
 
     S = np.concatenate([X, W], axis=1)
 
-    dfn = k - W.shape[1]
-    dfd = n - k - C.shape[1]
+    dfn = k + md - mw
+    dfd = n - k - mc - md - fit_intercept
 
     if critical_values == "chi2":
         quantile = scipy.stats.chi2.ppf(1 - alpha, df=dfn) / dfd
@@ -299,7 +320,15 @@ def inverse_anderson_rubin_test(
             "critical_values must be one of 'chi2', 'f'. Got " f"{critical_values}."
         )
 
+    if md > 0:
+        Z = np.hstack([Z, D])
+
     S_proj, y_proj = proj(Z, S, y)
+
+    if md > 0:
+        S = np.hstack([S, D])
+        S_proj = np.hstack([S_proj, D])
+
     S_orth = S - S_proj
     y_orth = y - y_proj
 
@@ -310,8 +339,5 @@ def inverse_anderson_rubin_test(
     if isinstance(c, np.ndarray):
         c = c.item()
 
-    if W is not None:
-        return Quadric(A, b, c).project(range(X.shape[1]))
-
-    else:
-        return Quadric(A, b, c)
+    coordinates = np.concatenate([np.arange(mx), np.arange(mx + mw, mx + mw + md)])
+    return Quadric(A, b, c).project(coordinates)
diff --git a/ivmodels/tests/conditional_likelihood_ratio.py b/ivmodels/tests/conditional_likelihood_ratio.py
index 8a60934..b1f4fe6 100644
--- a/ivmodels/tests/conditional_likelihood_ratio.py
+++ b/ivmodels/tests/conditional_likelihood_ratio.py
@@ -173,6 +173,7 @@ def conditional_likelihood_ratio_test(
     beta,
     W=None,
     C=None,
+    D=None,
     fit_intercept=True,
     method="numerical_integration",
     tol=1e-4,
@@ -280,12 +281,14 @@ def conditional_likelihood_ratio_test(
         Regressors.
     y: np.ndarray of dimension (n,)
         Outcomes.
-    beta: np.ndarray of dimension (mx,)
+    beta: np.ndarray of dimension (mx + md,)
         Coefficients to test.
     W: np.ndarray of dimension (n, mw) or None, optional, default = None
         Endogenous regressors not of interest.
     C: np.ndarray of dimension (n, mc) or None, optional, default = None
         Exogenous regressors not of interest.
+    D: np.ndarray of dimension (n, 0) or None, optional, default = None
+        Exogenous regressors of interest. Not supported for this test.
     fit_intercept: bool, optional, default: True
         Whether to include an intercept. This is equivalent to centering the inputs.
     method: str, optional, default: "numerical_integration"
@@ -316,82 +319,100 @@ def conditional_likelihood_ratio_test(
        moreira2003conditional
        kleibergen2021efficient
     """
-    Z, X, y, W, C, beta = _check_test_inputs(Z, X, y, W=W, C=C, beta=beta)
+    Z, X, y, W, C, D, beta = _check_test_inputs(Z, X, y, W=W, C=C, D=D, beta=beta)
 
     n, k = Z.shape
-    mx = X.shape[1]
-    mw = W.shape[1]
+    mx, mw, mc, md = X.shape[1], W.shape[1], C.shape[1], D.shape[1]
+
+    if md > 0:
+        return (np.nan, 1)
 
     if fit_intercept:
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X, y, Z, W = oproj(C, X, y, Z, W)
+        X, y, Z, W, D = oproj(C, X, y, Z, W, D)
 
-    # Z = scipy.linalg.qr(Z, mode="economic")[0]
-    X_proj, y_proj, W_proj = proj(Z, X, y, W)  # , orthogonal=True)
+    X_proj, y_proj, W_proj = proj(Z, X, y, W)
+
+    residuals = y - X @ beta
+    residuals_proj = y_proj - X_proj @ beta
 
     if mw == 0:
-        residuals = y - X @ beta
-        residuals_proj = y_proj - X_proj[:, :mx] @ beta
         residuals_orth = residuals - residuals_proj
 
         Sigma = (residuals_orth.T @ X) / (residuals_orth.T @ residuals_orth)
         Xt = X - np.outer(residuals, Sigma)
         Xt_proj = X_proj - np.outer(residuals_proj, Sigma)
         Xt_orth = Xt - Xt_proj
+
         s_min = np.real(
             scipy.linalg.eigvalsh(
                 a=Xt_proj.T @ Xt_proj, b=Xt_orth.T @ Xt_orth, subset_by_index=[0, 0]
             )[0]
-        ) * (n - k - C.shape[1])
+        ) * (n - k - mc - md - fit_intercept)
+
+        Xy = np.concatenate([X, y.reshape(-1, 1)], axis=1)
+        Xy_proj = np.hstack([X_proj, y_proj.reshape(-1, 1)])
+
+        ar_min = (
+            scipy.linalg.eigvalsh(
+                a=Xy.T @ Xy,
+                b=(Xy - Xy_proj).T @ Xy,
+                subset_by_index=[0, 0],
+            )[0]
+            - 1
+        )
 
-        # TODO: This can be done with efficient rank-1 updates.
-        ar_min = KClass.ar_min(X=X, y=y, Z=Z)
         ar = residuals_proj.T @ residuals_proj / (residuals_orth.T @ residuals_orth)
 
-        statistic = (n - k - C.shape[1]) * (ar - ar_min)
+        statistic = (n - k - mc - md - fit_intercept) * (ar - ar_min)
 
     elif mw > 0:
         XWy = np.concatenate([X, W, y.reshape(-1, 1)], axis=1)
         XWy_proj = np.concatenate([X_proj, W_proj, y_proj.reshape(-1, 1)], axis=1)
 
-        XWy_eigenvals = np.sort(
-            np.real(
-                scipy.linalg.eigvalsh(
-                    a=XWy_proj.T @ XWy,
-                    b=(XWy - XWy_proj).T @ XWy,
-                    subset_by_index=[0, 1],
+        XWy_eigenvals = (
+            np.sort(
+                np.real(
+                    scipy.linalg.eigvalsh(
+                        a=XWy.T @ XWy,
+                        b=(XWy - XWy_proj).T @ XWy,
+                        subset_by_index=[0, 1],
+                    )
                 )
             )
-        )
-        kclass = KClass(kappa="liml").fit(X=W, y=y - X @ beta, Z=Z)
-        ar = kclass.ar_min(
-            X=W, y=y - X @ beta, X_proj=W_proj, y_proj=y_proj - X_proj @ beta
+            - 1
         )
 
-        statistic = (n - k - C.shape[1]) * (ar - XWy_eigenvals[0])
-        s_min = (n - k - C.shape[1]) * (XWy_eigenvals[0] + XWy_eigenvals[1] - ar)
+        kclass = KClass(kappa="liml").fit(X=W, y=residuals, Z=Z)
+        ar = kclass.ar_min(X=W, y=residuals, X_proj=W_proj, y_proj=residuals_proj)
+
+        dof = n - k - mc - fit_intercept - md
+        statistic = dof * (ar - XWy_eigenvals[0])
+        s_min = dof * (XWy_eigenvals[0] + XWy_eigenvals[1] - ar)
 
     p_value = conditional_likelihood_ratio_critical_value_function(
-        mx, k - mw, s_min, statistic, method=method, tol=tol
+        mx + md, k + md - mw, s_min, statistic, method=method, tol=tol
     )
 
     return statistic, p_value
 
 
 def inverse_conditional_likelihood_ratio_test(
-    Z, X, y, alpha=0.05, W=None, C=None, fit_intercept=True, tol=1e-4
+    Z, X, y, alpha=0.05, W=None, C=None, D=None, fit_intercept=True, tol=1e-4
 ):
     """Return an approximation of the confidence set by inversion of the CLR test."""
     if not 0 < alpha < 1:
         raise ValueError("alpha must be in (0, 1).")
 
-    Z, X, y, W, C, _ = _check_test_inputs(Z, X, y, W=W, C=C)
+    Z, X, y, W, C, D, _ = _check_test_inputs(Z, X, y, W=W, C=C, D=D)
+
+    n, k = Z.shape
+    mx, mw, mc, md = X.shape[1], W.shape[1], C.shape[1], D.shape[1]
 
-    n, mx = X.shape
-    mw = W.shape[1]
-    k = Z.shape[1]
+    if md > 0:
+        return ConfidenceSet(boundaries=[(-np.inf, np.inf)])
 
     if fit_intercept:
         C = np.hstack([np.ones((n, 1)), C])
@@ -414,23 +435,26 @@ def inverse_conditional_likelihood_ratio_test(
         )
     )
 
-    dof = n - k - C.shape[1]
+    dof = n - k - mc - md - fit_intercept
 
     # "lower bound" on the confidence set to be computed. That is, the confidence set
     # to be computed will contain the lower bound.
-    quantile_lower = scipy.stats.chi2.ppf(1 - alpha, df=mx) + dof * Sy_eigvals[0]
+    quantile_lower = scipy.stats.chi2.ppf(1 - alpha, df=mx + md) + dof * Sy_eigvals[0]
 
     A = S.T @ (dof * S_proj - quantile_lower * S_orth)
     b = -2 * (dof * S_proj - quantile_lower * S_orth).T @ y
     c = y.T @ (dof * y_proj - quantile_lower * y_orth)
-    cs_lower = ConfidenceSet.from_quadric(Quadric(A, b, c).project(range(X.shape[1])))
+    coordinates = np.concatenate([np.arange(mx), np.arange(mx + mw, mx + mw + md)])
+    cs_lower = ConfidenceSet.from_quadric(Quadric(A, b, c).project(coordinates))
 
-    quantile_upper = scipy.stats.chi2.ppf(1 - alpha, df=k - mw) + dof * Sy_eigvals[0]
+    quantile_upper = (
+        scipy.stats.chi2.ppf(1 - alpha, df=k + md - mw) + dof * Sy_eigvals[0]
+    )
 
     A = S.T @ (dof * S_proj - quantile_upper * S_orth)
     b = -2 * (dof * S_proj - quantile_upper * S_orth).T @ y
     c = y.T @ (dof * y_proj - quantile_upper * y_orth)
-    cs_upper = ConfidenceSet.from_quadric(Quadric(A, b, c).project(range(X.shape[1])))
+    cs_upper = ConfidenceSet.from_quadric(Quadric(A, b, c).project(coordinates))
 
     Wy_proj = Sy_proj[:, mx:]
     Wy_orth = Sy_orth[:, mx:]
diff --git a/ivmodels/tests/lagrange_multiplier.py b/ivmodels/tests/lagrange_multiplier.py
index 765527e..8a4b780 100644
--- a/ivmodels/tests/lagrange_multiplier.py
+++ b/ivmodels/tests/lagrange_multiplier.py
@@ -5,6 +5,7 @@
 from scipy.optimize._optimize import MemoizeJac
 
 from ivmodels.confidence_set import ConfidenceSet
+from ivmodels.models.kclass import KClass
 from ivmodels.tests.utils import _check_test_inputs, _find_roots
 from ivmodels.utils import oproj, proj
 
@@ -55,6 +56,11 @@ class _LM:
         Projection of ``Y`` onto the column space of ``Z``.
     W_proj: np.ndarray of dimension (n, mw), optional, default=None
         Projection of ``W`` onto the column space of ``Z``.
+    optimizer: str, optional, default="bfgs"
+        Optimization method to use. Passed to ``scipy.optimize.minimize``.
+    gamma_0: list of str or np.ndarray of dimension (mw), optional, default=None
+        Initial value for the minimization. If ``str``, must be one of "liml" or "zero".
+        If ``None``, ``"liml"`` is used.
     """
 
     def __init__(
@@ -74,6 +80,7 @@ def __init__(
         self.X = X
         self.y = y.reshape(-1, 1)
         self.W = W
+
         if X_proj is None or y_proj is None or W_proj is None:
             if Z is None:
                 raise ValueError("Z must be provided to compute the projection.")
@@ -101,6 +108,8 @@ def __init__(
 
         self.optimizer = optimizer
         self.gamma_0 = ["liml"] if gamma_0 is None else gamma_0
+        if isinstance(self.gamma_0, str):
+            self.gamma_0 = [self.gamma_0]
 
     def liml(self, beta=None):
         """
@@ -148,6 +157,7 @@ def derivative(self, beta, gamma=None, jac=True, hess=True):
 
         if not jac:  # not jac -> not hess
             residuals_proj_St = proj(St_proj, residuals_proj)
+
             return (
                 self.dof * residuals_proj_St.T @ residuals_proj_St / sigma_hat,
                 None,
@@ -275,11 +285,13 @@ def _derivative(gamma):
                 )
             )
 
-        return np.min([r.fun for r in results])
+        res = min(results, key=lambda r: r.fun)
+
+        return res.fun
 
 
 def lagrange_multiplier_test(
-    Z, X, y, beta, W=None, C=None, fit_intercept=True, **kwargs
+    Z, X, y, beta, W=None, C=None, D=None, fit_intercept=True, **kwargs
 ):
     """
     Perform the Lagrange multiplier test for ``beta`` by :cite:t:`kleibergen2002pivotal`.
@@ -312,12 +324,14 @@ def lagrange_multiplier_test(
         Regressors of interest.
     y: np.ndarray of dimension (n,)
         Outcomes.
-    beta: np.ndarray of dimension (mx,)
+    beta: np.ndarray of dimension (mx + md,)
         Coefficients to test.
     W: np.ndarray of dimension (n, mw) or None, optional, default=None
         Endogenous regressors not of interest.
     C: np.ndarray of dimension (n, mc) or None, optional, default=None
         Exogenous regressors not of interest.
+    D: np.ndarray of dimension (n, md) or None, optional, default=None
+        Exogenous regressors of interest.
     fit_intercept: bool, optional, default=True
         Whether to fit an intercept. This is equivalent to centering the inputs.
 
@@ -336,21 +350,26 @@ def lagrange_multiplier_test(
     ValueError:
         If the dimensions of the inputs are incorrect.
     """
-    Z, X, y, W, C, beta = _check_test_inputs(Z, X, y, W=W, C=C, beta=beta)
+    Z, X, y, W, C, D, beta = _check_test_inputs(Z, X, y, W=W, C=C, D=D, beta=beta)
 
     n, k = Z.shape
-    mx = X.shape[1]
+    mx, mw, mc, md = X.shape[1], W.shape[1], C.shape[1], D.shape[1]
 
     if fit_intercept:
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X, y, Z, W = oproj(C, X, y, Z, W)
+        X, y, Z, W, D = oproj(C, X, y, Z, W, D)
+
+    if md > 0:
+        X = np.hstack([X, D])
+        Z = np.hstack([Z, D])
 
-    if W.shape[1] > 0:
-        statistic = _LM(X=X, y=y, W=W, Z=Z, dof=n - k - C.shape[1], **kwargs).lm(beta)
+    dof = n - k - mc - md - fit_intercept
+    if mw > 0:
+        statistic = _LM(X=X, y=y, W=W, Z=Z, dof=dof, **kwargs).lm(beta)
 
-        p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx)
+        p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx + md)
 
     else:
         residuals = y - X @ beta
@@ -358,7 +377,7 @@ def lagrange_multiplier_test(
 
         orth_residuals = residuals - residuals_proj
 
-        sigma_hat = residuals.T @ orth_residuals
+        sigma_hat = orth_residuals.T @ orth_residuals
         Sigma = orth_residuals.T @ X / sigma_hat
 
         # X - (y - X beta) * (y - X beta)^T M_Z X / (y - X beta)^T M_Z (y - X beta)
@@ -366,11 +385,9 @@ def lagrange_multiplier_test(
 
         X_tilde_proj_residuals = proj(X_tilde_proj, residuals)
         # (y - X beta) P_{P_Z X_tilde} (y - X beta) / (y - X_beta) M_Z (y - X beta)
-        statistic = np.square(X_tilde_proj_residuals).sum() / sigma_hat
+        statistic = dof * np.square(X_tilde_proj_residuals).sum() / sigma_hat
 
-        statistic *= n - k - C.shape[1]
-
-        p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx)
+        p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx + md)
 
     return statistic, p_value
 
@@ -382,6 +399,7 @@ def inverse_lagrange_multiplier_test(
     alpha=0.05,
     W=None,
     C=None,
+    D=None,
     fit_intercept=True,
     tol=1e-4,
     max_value=1e8,
@@ -390,15 +408,15 @@ def inverse_lagrange_multiplier_test(
     """
     Return an approximation of the confidence set by inversion of the LM test.
 
-    This is only implemented if ``X.shape[1] == 1``. The confidence set is computed by
-    a root finding algorithm, see the docs of
+    This is only implemented if `mx + md = 1`. The confidence set is
+    computed by a root finding algorithm, see the docs of
     :func:`~ivmodels.tests.utils._find_roots` for more details.
 
     Parameters
     ----------
     Z: np.ndarray of dimension (n, k)
         Instruments.
-    X: np.ndarray of dimension (n, 1)
+    X: np.ndarray of dimension (n, mx)
         Regressors of interest.
     y: np.ndarray of dimension (n,)
         Outcomes.
@@ -408,6 +426,8 @@ def inverse_lagrange_multiplier_test(
         Endogenous regressors not of interest.
     C: np.ndarray of dimension (n, mc) or None, optional, default=None
         Exogenous regressors not of interest.
+    D: np.ndarray of dimension (n, md) or None, optional, default=None
+        Exogenous regressors of interest.
     fit_intercept: bool, optional, default=True
         Whether to fit an intercept. This is equivalent to centering the inputs.
     tol: float, optional, default=1e-4
@@ -421,32 +441,32 @@ def inverse_lagrange_multiplier_test(
     if not 0 < alpha < 1:
         raise ValueError("alpha must be in (0, 1).")
 
-    Z, X, y, W, C, _ = _check_test_inputs(Z, X, y, W=W, C=C)
+    Z, X, y, W, C, D, _ = _check_test_inputs(Z, X, y, W=W, C=C, D=D)
 
-    n, mx = X.shape
-    if not mx == 1:
-        raise ValueError("mx must be 1.")
+    n, k = Z.shape
+    mx, mc, md = X.shape[1], C.shape[1], D.shape[1]
 
-    k = Z.shape[1]
+    if not mx + md == 1:
+        raise ValueError("mx + md must be 1.")
 
     if fit_intercept:
-        C_ = np.hstack([np.ones((n, 1)), C])
-    else:
-        C_ = C
+        C = np.hstack([np.ones((n, 1)), C])
 
-    if C_.shape[1] > 0:
-        X_, y_, Z_, W_ = oproj(C_, X, y, Z, W)
-    else:
-        X_, y_, Z_, W_ = X, y, Z, W
+    if C.shape[1] > 0:
+        X, y, Z, W, D = oproj(C, X, y, Z, W, D)
+
+    dof = n - k - mc - md - fit_intercept
 
-    dof = n - k - C_.shape[1]
-    lm = _LM(X=X_, W=W_, y=y_, Z=Z_, dof=dof)
-    critical_value = scipy.stats.chi2(df=mx).ppf(1 - alpha)
-    liml = lm.liml()
+    lm = _LM(X=np.hstack([X, D]), W=W, y=y, Z=np.hstack([Z, D]), dof=dof)
+    critical_value = scipy.stats.chi2(df=mx + md).ppf(1 - alpha)
+    if md == 0:
+        liml = lm.liml()[0]
+    else:
+        liml = KClass(kappa="liml", fit_intercept=False).fit(W, y, Z=Z, C=D).coef_[-1]
 
     left = _find_roots(
         lambda x: lm.lm(x) - critical_value,
-        a=liml[0],
+        a=liml,
         b=-np.inf,
         tol=tol,
         max_value=max_value,
@@ -454,7 +474,7 @@ def inverse_lagrange_multiplier_test(
     )
     right = _find_roots(
         lambda x: lm.lm(x) - critical_value,
-        a=liml[0],
+        a=liml,
         b=np.inf,
         tol=tol,
         max_value=max_value,
diff --git a/ivmodels/tests/likelihood_ratio.py b/ivmodels/tests/likelihood_ratio.py
index 97fab28..a83930d 100644
--- a/ivmodels/tests/likelihood_ratio.py
+++ b/ivmodels/tests/likelihood_ratio.py
@@ -6,7 +6,7 @@
 from ivmodels.utils import oproj, proj
 
 
-def likelihood_ratio_test(Z, X, y, beta, W=None, C=None, fit_intercept=True):
+def likelihood_ratio_test(Z, X, y, beta, W=None, C=None, D=None, fit_intercept=True):
     """
     Perform the likelihood ratio test for ``beta``.
 
@@ -46,12 +46,14 @@ def likelihood_ratio_test(Z, X, y, beta, W=None, C=None, fit_intercept=True):
         Regressors.
     y: np.ndarray of dimension (n,)
         Outcomes.
-    beta: np.ndarray of dimension (mx,)
+    beta: np.ndarray of dimension (mx + md,)
         Coefficients to test.
     W: np.ndarray of dimension (n, mw) or None, optional, default=None
         Endogenous regressors not of interest.
     C: np.ndarray of dimension (n, mc) or None, optional, default=None
         Exogenous regressors not of interest.
+    D: np.ndarray of dimension (n, md) or None, optional, default=None
+        Exogenous regressors of interest.
     fit_intercept: bool, optional, default=True
         Whether to fit an intercept. This is equivalent to centering the inputs.
 
@@ -70,50 +72,70 @@ def likelihood_ratio_test(Z, X, y, beta, W=None, C=None, fit_intercept=True):
     ValueError:
         If the dimensions of the inputs are incorrect.
     """
-    Z, X, y, W, C, beta = _check_test_inputs(Z, X, y, W=W, C=C, beta=beta)
+    Z, X, y, W, C, D, beta = _check_test_inputs(Z, X, y, W=W, C=C, D=D, beta=beta)
 
-    n, mx = X.shape
-    k = Z.shape[1]
+    n, k = Z.shape
+    mx, mw, mc, md = X.shape[1], W.shape[1], C.shape[1], D.shape[1]
 
     if fit_intercept:
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X, y, Z, W = oproj(C, X, y, Z, W)
+        X, y, Z, W, D = oproj(C, X, y, Z, W, D)
+
+    if md > 0:
+        Z = np.concatenate([Z, D], axis=1)
 
     X_proj, y_proj, W_proj = proj(Z, X, y, W)
 
     XWy = np.concatenate([X, W, y.reshape(-1, 1)], axis=1)
-    XWy_proj = np.concatenate([X_proj, W_proj, y_proj.reshape(-1, 1)], axis=1)
 
-    ar_min = (n - k - C.shape[1]) * np.real(
+    XWy_proj = np.hstack([X_proj, W_proj, y_proj.reshape(-1, 1)])
+
+    ar_min = np.real(
         scipy.linalg.eigvalsh(
-            a=XWy.T @ XWy_proj, b=XWy.T @ (XWy - XWy_proj), subset_by_index=[0, 0]
+            a=oproj(D, XWy).T @ XWy_proj,
+            b=XWy.T @ (XWy - XWy_proj),
+            subset_by_index=[0, 0],
         )[0]
     )
 
-    if W.shape[1] == 0:
-        statistic = (n - k - C.shape[1]) * np.linalg.norm(
-            y_proj - X_proj @ beta
-        ) ** 2 / np.linalg.norm((y - y_proj) - (X - X_proj) @ beta) ** 2 - ar_min
+    if md > 0:
+        X = np.hstack([X, D])
+        X_proj = np.hstack([X_proj, D])
+
+    residuals = y - X @ beta
+    residuals_proj = y_proj - X_proj @ beta
+
+    if mw == 0:
+        statistic = np.linalg.norm(residuals_proj) ** 2
+        statistic /= np.linalg.norm(residuals - residuals_proj) ** 2
+        statistic -= ar_min
+
+        statistic *= n - k - mc - md - fit_intercept
     else:
-        Wy = np.concatenate([W, (y - X @ beta).reshape(-1, 1)], axis=1)
-        Wy_proj = np.concatenate(
-            [W_proj, (y_proj - X_proj @ beta).reshape(-1, 1)], axis=1
+        Wy = np.hstack([W, residuals.reshape(-1, 1)])
+        Wy_proj = np.hstack([W_proj, residuals_proj.reshape(-1, 1)])
+
+        statistic = (
+            np.real(
+                scipy.linalg.eigvalsh(
+                    a=Wy_proj.T @ Wy_proj,
+                    b=Wy.T @ (Wy - Wy_proj),
+                    subset_by_index=[0, 0],
+                )[0]
+            )
+            - ar_min
         )
-        statistic = (n - k - C.shape[1]) * np.real(
-            scipy.linalg.eigvalsh(
-                a=Wy.T @ Wy_proj, b=Wy.T @ (Wy - Wy_proj), subset_by_index=[0, 0]
-            )[0]
-        ) - ar_min
+        statistic *= n - k - mc - md - fit_intercept
 
-    p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx)
+    p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx + md)
 
     return statistic, p_value
 
 
 def inverse_likelihood_ratio_test(
-    Z, X, y, alpha=0.05, W=None, C=None, fit_intercept=True
+    Z, X, y, alpha=0.05, W=None, C=None, D=None, fit_intercept=True
 ):
     """
     Return the quadric for the inverse likelihood ratio test's acceptance region.
@@ -138,6 +160,8 @@ def inverse_likelihood_ratio_test(
         Endogenous regressors not of interest.
     C: np.ndarray of dimension (n, mc) or None, optional, default=None
         Exogenous regressors not of interest.
+    D: np.ndarray of dimension (n, md) or None, optional, default=None
+        Exogenous regressors of interest.
     fit_intercept: bool, optional, default=True
         Whether to fit an intercept. This is equivalent to centering the inputs.
 
@@ -145,43 +169,51 @@ def inverse_likelihood_ratio_test(
     if not 0 < alpha < 1:
         raise ValueError("alpha must be in (0, 1).")
 
-    Z, X, y, W, C, _ = _check_test_inputs(Z, X, y, W=W, C=C)
+    Z, X, y, W, C, D, _ = _check_test_inputs(Z, X, y, W=W, C=C, D=D)
 
-    n, mx = X.shape
-    k = Z.shape[1]
+    n, k = Z.shape
+    mx, mw, mc, md = X.shape[1], W.shape[1], C.shape[1], D.shape[1]
 
     if fit_intercept:
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X, y, Z, W = oproj(C, X, y, Z, W)
+        X, y, Z, W, D = oproj(C, X, y, Z, W, D)
+
+    XW = np.concatenate([X, W], axis=1)
 
-    X = np.concatenate([X, W], axis=1)
+    if md > 0:
+        Z = np.concatenate([Z, D], axis=1)
 
-    X_proj, y_proj = proj(Z, X, y)
-    X_orth = X - X_proj
+    XW_proj, y_proj = proj(Z, XW, y)
+    XW_orth = XW - XW_proj
     y_orth = y - y_proj
 
-    Xy_proj = np.concatenate([X_proj, y_proj.reshape(-1, 1)], axis=1)
-    Xy = np.concatenate([X, y.reshape(-1, 1)], axis=1)
+    XWy_proj = np.concatenate([XW_proj, y_proj.reshape(-1, 1)], axis=1)
+    XWy = np.concatenate([XW, y.reshape(-1, 1)], axis=1)
 
     kappa_liml = np.real(
         scipy.linalg.eigvalsh(
-            a=Xy.T @ Xy_proj, b=Xy.T @ (Xy - Xy_proj), subset_by_index=[0, 0]
+            a=oproj(D, XWy).T @ XWy_proj,
+            b=XWy.T @ (XWy - XWy_proj),
+            subset_by_index=[0, 0],
         )[0]
     )
 
-    dfd = n - k - C.shape[1]
-    quantile = scipy.stats.chi2.ppf(1 - alpha, df=mx) + dfd * kappa_liml
+    dfd = n - k - mc - md - fit_intercept
+    quantile = scipy.stats.chi2.ppf(1 - alpha, df=mx + md) + dfd * kappa_liml
 
-    A = X.T @ (X_proj - 1 / dfd * quantile * X_orth)
-    b = -2 * (X_proj - 1 / dfd * quantile * X_orth).T @ y
+    if md > 0:
+        XW = np.concatenate([XW, D], axis=1)
+        XW_proj = np.concatenate([XW_proj, D], axis=1)
+        XW_orth = np.concatenate([XW_orth, np.zeros_like(D)], axis=1)
+
+    A = XW.T @ (XW_proj - 1 / dfd * quantile * XW_orth)
+    b = -2 * (XW_proj - 1 / dfd * quantile * XW_orth).T @ y
     c = y.T @ (y_proj - 1 / dfd * quantile * y_orth)
 
     if isinstance(c, np.ndarray):
         c = c.item()
 
-    if W is not None:
-        return Quadric(A, b, c).project(range(X.shape[1] - W.shape[1]))
-    else:
-        return Quadric(A, b, c)
+    coordinates = np.concatenate([np.arange(mx), np.arange(mx + mw, mx + mw + md)])
+    return Quadric(A, b, c).project(coordinates)
diff --git a/ivmodels/tests/pulse.py b/ivmodels/tests/pulse.py
index 56ffaac..193ff1f 100644
--- a/ivmodels/tests/pulse.py
+++ b/ivmodels/tests/pulse.py
@@ -6,7 +6,7 @@
 from ivmodels.utils import oproj, proj
 
 
-def pulse_test(Z, X, y, beta, C=None, W=None, fit_intercept=True):
+def pulse_test(Z, X, y, beta, C=None, W=None, D=None, fit_intercept=True):
     """
     Test proposed by :cite:t:`jakobsen2022distributional` with null hypothesis: :math:`Z` and :math:`y - X \\beta` are uncorrelated.
 
@@ -25,7 +25,7 @@ def pulse_test(Z, X, y, beta, C=None, W=None, fit_intercept=True):
         Regressors.
     y: np.ndarray of dimension (n,)
         Outcomes.
-    beta: np.ndarray of dimension (mx,)
+    beta: np.ndarray of dimension (mx + md,)
         Coefficients to test.
     C: np.ndarray of dimension (n, mc) or None, optional, default=None
         Exogenous regressors not of interest.
@@ -55,11 +55,21 @@ def pulse_test(Z, X, y, beta, C=None, W=None, fit_intercept=True):
 
        jakobsen2022distributional
     """
-    Z, X, y, W, C, beta = _check_test_inputs(Z, X, y, C=C, W=W, beta=beta)
+    Z, X, y, W, C, D, beta = _check_test_inputs(Z, X, y, C=C, W=W, D=D, beta=beta)
 
     if W.shape[1] > 0:
         raise ValueError("No subvector variant of the pulse test is implemented.")
 
+    if D.shape[1] > 0:
+        return pulse_test(
+            np.hstack([Z, D]),
+            np.hstack([Z, X]),
+            y,
+            beta,
+            C=C,
+            fit_intercept=fit_intercept,
+        )
+
     n, k = Z.shape
 
     if fit_intercept:
@@ -77,9 +87,19 @@ def pulse_test(Z, X, y, beta, C=None, W=None, fit_intercept=True):
     return statistic, p_value
 
 
-def inverse_pulse_test(Z, X, y, C=None, alpha=0.05, fit_intercept=True):
+def inverse_pulse_test(Z, X, y, C=None, D=None, alpha=0.05, fit_intercept=True):
     """Return the quadric for the inverse pulse test's acceptance region."""
-    Z, X, y, _, C, _ = _check_test_inputs(Z, X, y, C=C)
+    Z, X, y, _, C, D, _ = _check_test_inputs(Z, X, y, C=C, D=D)
+
+    if D.shape[1] > 0:
+        return inverse_pulse_test(
+            np.hstack([Z, D]),
+            np.hstack([Z, X]),
+            y,
+            C=C,
+            alpha=alpha,
+            fit_intercept=fit_intercept,
+        )
 
     n, k = Z.shape
 
diff --git a/ivmodels/tests/rank.py b/ivmodels/tests/rank.py
index 20dde6a..380bf2f 100644
--- a/ivmodels/tests/rank.py
+++ b/ivmodels/tests/rank.py
@@ -44,7 +44,7 @@ def rank_test(Z, X, C=None, fit_intercept=True):
         of the :math:`\\chi^2(k - m_X + 1)` distribution.
 
     """
-    Z, X, _, _, C, _ = _check_test_inputs(Z, X, y=None, C=C)
+    Z, X, _, _, C, _, _ = _check_test_inputs(Z, X, y=None, C=C)
 
     n, k = Z.shape
     m = X.shape[1]
diff --git a/ivmodels/tests/utils.py b/ivmodels/tests/utils.py
index dc0b4d3..7989606 100644
--- a/ivmodels/tests/utils.py
+++ b/ivmodels/tests/utils.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 
-def _check_test_inputs(Z, X, y, W=None, C=None, beta=None):
+def _check_test_inputs(Z, X, y, W=None, C=None, D=None, beta=None):
     """
     Test dimensions of inputs to tests.
 
@@ -17,7 +17,9 @@ def _check_test_inputs(Z, X, y, W=None, C=None, beta=None):
         Regressors to control for.
     C: np.ndarray of dimension (n, r), optional, default=None
         Exogenous regressors not of interest.
-    beta: np.ndarray of dimension (mx,), optional, default=None
+    D: np.ndarray of dimension (n, md), optional, default=None
+        Exogenous regressors of interest.
+    beta: np.ndarray of dimension (mx + md,), optional, default=None
         Coefficients.
 
     Returns
@@ -32,8 +34,12 @@ def _check_test_inputs(Z, X, y, W=None, C=None, beta=None):
         Regressors to control for. If input was None, returns an empty matrix of
         shape (n, 0).
     C: np.ndarray of dimension (n, mc)
-        Exogenous regressors not of interest.
-    beta: np.ndarray of dimension (mx,) or None
+        Exogenous regressors not of interest. If input was None, returns an empty matrix
+        of shape (n, 0).
+    D: np.ndarray of dimension (n, md)
+        Exogenous regressors of interest. If input was None, returns an empty matrix of
+        shape (n, 0).
+    beta: np.ndarray of dimension (mx + md,) or None
         Coefficients.
 
     Raises
@@ -42,12 +48,18 @@ def _check_test_inputs(Z, X, y, W=None, C=None, beta=None):
         If the dimensions of the inputs are incorrect.
 
     """
+    if X is None:
+        X = np.empty((Z.shape[0], 0))
+
     if W is None:
         W = np.empty((Z.shape[0], 0))
 
     if C is None:
         C = np.empty((Z.shape[0], 0))
 
+    if D is None:
+        D = np.empty((Z.shape[0], 0))
+
     if Z.ndim != 2:
         raise ValueError(f"Z must be a matrix. Got shape {Z.shape}.")
     if X.ndim != 2:
@@ -63,11 +75,20 @@ def _check_test_inputs(Z, X, y, W=None, C=None, beta=None):
         raise ValueError(f"W must be a matrix. Got shape {W.shape}.")
     if C.ndim != 2:
         raise ValueError(f"C must be a matrix. Got shape {C.shape}.")
-
-    if not Z.shape[0] == X.shape[0] == y.shape[0] == W.shape[0] == C.shape[0]:
+    if D.ndim != 2:
+        raise ValueError(f"D must be a matrix. Got shape {D.shape}.")
+
+    if (
+        not Z.shape[0]
+        == X.shape[0]
+        == y.shape[0]
+        == W.shape[0]
+        == C.shape[0]
+        == D.shape[0]
+    ):
         raise ValueError(
-            f"Z, X, y, W, and C must have the same number of rows. Got shapes "
-            f"{Z.shape}, {X.shape}, {y.shape}, {W.shape}, and {C.shape}."
+            f"Z, X, y, W, C, and D must have the same number of rows. Got shapes "
+            f"{Z.shape}, {X.shape}, {y.shape}, {W.shape}, {C.shape}, and {D.shape}."
         )
 
     if beta is not None and beta.ndim != 1:
@@ -77,13 +98,13 @@ def _check_test_inputs(Z, X, y, W=None, C=None, beta=None):
             beta = beta.flatten()
 
     if beta is not None:
-        if beta.shape[0] != X.shape[1]:
+        if beta.shape[0] != X.shape[1] + D.shape[1]:
             raise ValueError(
-                "beta must have the same length or number of rows as X has columns. "
-                f"Got shapes {beta.shape} and {X.shape}."
+                "beta must have the same length or number of rows as X and D have "
+                f"columns. Got shapes {beta.shape} and {X.shape}, {D.shape}."
             )
 
-    return Z, X, y, W, C, beta
+    return Z, X, y, W, C, D, beta
 
 
 def _find_roots(f, a, b, tol, max_value, max_eval, n_points=50):
diff --git a/ivmodels/tests/wald.py b/ivmodels/tests/wald.py
index acdf7f7..553a938 100644
--- a/ivmodels/tests/wald.py
+++ b/ivmodels/tests/wald.py
@@ -7,7 +7,9 @@
 from ivmodels.utils import oproj, proj
 
 
-def wald_test(Z, X, y, beta, W=None, C=None, estimator="tsls", fit_intercept=True):
+def wald_test(
+    Z, X, y, beta, W=None, C=None, D=None, estimator="tsls", fit_intercept=True
+):
     """
     Test based on asymptotic normality of the TSLS (or LIML) estimator.
 
@@ -47,7 +49,9 @@ def wald_test(Z, X, y, beta, W=None, C=None, estimator="tsls", fit_intercept=Tru
         Endogenous regressors not of interest.
     C: np.ndarray of dimension (n, mc) or None
         Exogenous regressors not of interest.
-    beta: np.ndarray of dimension (mx,)
+    D: np.ndarray of dimension (n, md) or None
+        Exogenous regressors of interest.
+    beta: np.ndarray of dimension (mx + md,)
         Coefficients to test.
     estimator: str or float, optional, default = "liml"
         Estimator to use. Passed to ``kappa`` argument of ``KClass``.
@@ -71,49 +75,56 @@ def wald_test(Z, X, y, beta, W=None, C=None, estimator="tsls", fit_intercept=Tru
         If the dimensions of the inputs are incorrect.
 
     """
-    Z, X, y, W, C, beta = _check_test_inputs(Z, X, y, W=W, C=C, beta=beta)
+    Z, X, y, W, C, D, beta = _check_test_inputs(Z, X, y, W=W, C=C, D=D, beta=beta)
 
     n, mx = X.shape
+    mw, mc, md = W.shape[1], C.shape[1], D.shape[1]
 
     if fit_intercept:
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X, y, Z, W = oproj(C, X, y, Z, W)
+        X, y, Z, W, D = oproj(C, X, y, Z, W, D)
 
     XW = np.concatenate([X, W], axis=1)
 
-    estimator = KClass(kappa=estimator, fit_intercept=False).fit(XW, y, Z)
+    estimator = KClass(kappa=estimator, fit_intercept=False).fit(XW, y, Z, C=D)
 
-    beta_gamma_hat = estimator.coef_
+    residuals = y - estimator.predict(XW, C=D)
+    sigma_hat_sq = np.sum(residuals**2) / (n - mx - mw - mc - md - fit_intercept)
 
-    residuals = y - estimator.predict(XW)
-    sigma_hat_sq = np.sum(residuals**2) / (n - mx - W.shape[1] - C.shape[1])
+    kappa = estimator.kappa_
 
-    XW_proj = proj(Z, XW)
+    if md > 0:
+        Z = np.hstack([Z, D])
 
-    kappa = estimator.kappa_
-    cov_hat = (kappa * XW_proj + (1 - kappa) * XW).T @ XW
+    X_proj, W_proj = proj(Z, X, W)
+
+    if md > 0:
+        X_proj = np.hstack([X_proj, D])
+        X = np.hstack([X, D])
+
+    Xkappa = kappa * X_proj + (1 - kappa) * X
+    cov_hat = Xkappa.T @ X
 
-    if W.shape[1] == 0:
-        statistic = (beta_gamma_hat - beta).T @ cov_hat @ (beta_gamma_hat - beta)
+    if mw > 0:
+        Wkappa = kappa * W_proj + (1 - kappa) * W
+        cov_hat -= Xkappa.T @ W @ np.linalg.solve(Wkappa.T @ W, W.T @ Xkappa)
+        beta_hat = np.concatenate([estimator.coef_[:mx], estimator.coef_[(mx + mw) :]])
     else:
-        beta_hat = beta_gamma_hat[:mx]
-        statistic = (
-            (beta_hat - beta).T
-            @ np.linalg.inv(np.linalg.inv(cov_hat)[:mx, :mx])
-            @ (beta_hat - beta)
-        )
+        beta_hat = estimator.coef_
+
+    statistic = (beta_hat - beta).T @ cov_hat @ (beta_hat - beta)
 
     statistic /= sigma_hat_sq
 
-    p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx)
+    p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx + md)
 
     return statistic, p_value
 
 
 def inverse_wald_test(
-    Z, X, y, alpha=0.05, W=None, C=None, estimator="tsls", fit_intercept=True
+    Z, X, y, alpha=0.05, W=None, C=None, D=None, estimator="tsls", fit_intercept=True
 ):
     """
     Return the quadric for the acceptance region based on asymptotic normality.
@@ -155,6 +166,8 @@ def inverse_wald_test(
         Endogenous regressors not of interest.
     C: np.ndarray of dimension (n, mc) or None, optional, default = None
         Exogenous regressors not of interest.
+    D: np.ndarray of dimension (n, md) or None, optional, default = None
+        Exogenous regressors of interest.
     estimator: float or str, optional, default = "tsls"
         Estimator to use. Passed as ``kappa`` parameter to ``KClass``.
     fit_intercept: bool, optional, default = True
@@ -164,42 +177,47 @@ def inverse_wald_test(
     """
     if not 0 < alpha < 1:
         raise ValueError("alpha must be in (0, 1).")
-    n = Z.shape[0]
 
-    Z, X, y, W, C, _ = _check_test_inputs(Z, X, y, W=W, C=C)
+    Z, X, y, W, C, D, _ = _check_test_inputs(Z, X, y, W=W, C=C, D=D)
+
+    n, mx = X.shape
+    mw, mc, md = W.shape[1], C.shape[1], D.shape[1]
 
-    z_alpha = scipy.stats.chi2.ppf(1 - alpha, df=X.shape[1])
+    z_alpha = scipy.stats.chi2.ppf(1 - alpha, df=mx + md)
 
     if fit_intercept:
         C = np.hstack([np.ones((n, 1)), C])
 
     if C.shape[1] > 0:
-        X, y, Z, W = oproj(C, X, y, Z, W)
+        X, y, Z, W, D = oproj(C, X, y, Z, W, D)
 
     XW = np.concatenate([X, W], axis=1)
 
-    kclass = KClass(kappa=estimator, fit_intercept=False).fit(XW, y, Z)
+    kclass = KClass(kappa=estimator, fit_intercept=False).fit(XW, y, Z=Z, C=D)
     beta = kclass.coef_
+    beta = np.concatenate([beta[:mx], beta[(mx + mw) :]])
 
-    residuals = y - kclass.predict(XW)
-    hat_sigma_sq = np.sum(residuals**2) / (n - XW.shape[1] - C.shape[1])
+    residuals = y - kclass.predict(XW, C=D)
+    hat_sigma_sq = np.sum(residuals**2) / (n - mx - mw - md - mc - fit_intercept)
+
+    if md > 0:
+        Z = np.hstack([Z, D])
 
     X_proj, W_proj = proj(Z, X, W)
+    X_proj = np.hstack([X_proj, D])
+    X = np.hstack([X, D])
 
     Xkappa = kclass.kappa_ * X_proj + (1 - kclass.kappa_) * X
 
     A = X.T @ Xkappa
-    if W.shape[1] > 0:
+    if mw > 0:
         Wkappa = kclass.kappa_ * W_proj + (1 - kclass.kappa_) * W
         A = A - Xkappa.T @ W @ np.linalg.solve(Wkappa.T @ W, W.T @ Xkappa)
 
-    b = -2 * A @ beta[: X.shape[1]]
-    c = beta[: X.shape[1]].T @ A @ beta[: X.shape[1]] - hat_sigma_sq * z_alpha
+    b = -2 * A @ beta
+    c = beta.T @ A @ beta - hat_sigma_sq * z_alpha
 
     if isinstance(c, np.ndarray):
         c = c.item()
 
-    if W is not None:
-        return Quadric(A, b, c).project(range(XW.shape[1] - W.shape[1]))
-    else:
-        return Quadric(A, b, c)
+    return Quadric(A, b, c)
diff --git a/tests/models/test_anchor_regression.py b/tests/models/test_anchor_regression.py
index e6f6f51..1eaa857 100644
--- a/tests/models/test_anchor_regression.py
+++ b/tests/models/test_anchor_regression.py
@@ -14,7 +14,7 @@
 def test_linear_anchor_regression_equal_to_ols(alpha, l1_ratio, n, mx, k, u):
     n = 100
 
-    A, X, y, _, _ = simulate_gaussian_iv(n=n, mx=mx, u=u, k=k)
+    A, X, y, _, _, _ = simulate_gaussian_iv(n=n, mx=mx, u=u, k=k)
     df = pd.DataFrame(
         np.hstack([X, A]),
         columns=[f"X{i}" for i in range(mx)] + [f"anchor{i}" for i in range(k)],
@@ -39,7 +39,7 @@ def test_linear_anchor_regression_equal_to_ols(alpha, l1_ratio, n, mx, k, u):
 @pytest.mark.parametrize("n, mx, k, u", [(100, 2, 2, 1), (100, 2, 5, 2)])
 @pytest.mark.parametrize("gamma", [0.01, 1, 5])
 def test_linear_anchor_regression_different_inputs(gamma, n, mx, k, u):
-    A, X, y, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
+    A, X, y, _, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
 
     anchors = [f"A{i}" for i in range(k)]
     df = pd.DataFrame(np.hstack([X, A]), columns=[f"X{i}" for i in range(mx)] + anchors)
@@ -56,7 +56,7 @@ def test_linear_anchor_regression_different_inputs(gamma, n, mx, k, u):
 
 
 def test_score():
-    A, X, y, _, _ = simulate_gaussian_iv(n=100, k=2, mx=2, mw=1)
+    A, X, y, _, _, _ = simulate_gaussian_iv(n=100, k=2, mx=2, mw=1)
     model = AnchorRegression(gamma=1).fit(X, y, A)
     assert model.score(X, y) > 0.5
 
@@ -71,7 +71,7 @@ def test_anchor_solution_minimizes_loss(n, mx, k, u, mc, gamma):
 
     This indirectly checks the mapping kappa <-> gamma for validity.
     """
-    Z, X, y, C, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
+    Z, X, y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
 
     ar = AnchorRegression(gamma=gamma, fit_intercept=False).fit(X, y, Z, C=C)
 
diff --git a/tests/models/test_compare_to_linearmodels.py b/tests/models/test_compare_to_linearmodels.py
index 7cb29bd..d2d2318 100644
--- a/tests/models/test_compare_to_linearmodels.py
+++ b/tests/models/test_compare_to_linearmodels.py
@@ -11,7 +11,7 @@
 @pytest.mark.parametrize("n, mx, k, mc, u", [(100, 2, 2, 1, 1), (100, 2, 5, 0, 2)])
 @pytest.mark.parametrize("kappa", ["liml", "tsls"])
 def test_compare_against_linearmodels(fit_intercept, n, mx, k, mc, u, kappa):
-    Z, X, y, C, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
+    Z, X, y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
 
     kclass = KClass(kappa=kappa, fit_intercept=fit_intercept)
     kclass.fit(X, y, Z=Z, C=C)
@@ -29,14 +29,13 @@ def test_compare_against_linearmodels(fit_intercept, n, mx, k, mc, u, kappa):
         raise ValueError
 
     results = linearmodel.fit(cov_type="unadjusted")
+    params = results.params.to_numpy()
 
-    np.testing.assert_allclose(kclass.coef_[:mx], results.params[-mx:], rtol=1e-5)
-    np.testing.assert_allclose(
-        kclass.coef_[mx:], results.params[-(mx + mc) : -mx], rtol=1e-5
-    )
+    np.testing.assert_allclose(kclass.coef_[:mx], params[-mx:], rtol=1e-5)
+    np.testing.assert_allclose(kclass.coef_[mx:], params[-(mx + mc) : -mx], rtol=1e-5)
     np.testing.assert_allclose(
         kclass.predict(X, C), results.fitted_values.to_numpy().flatten(), rtol=1e-5
     )
 
     if fit_intercept:
-        np.testing.assert_allclose(kclass.intercept_, results.params[0], rtol=1e-5)
+        np.testing.assert_allclose(kclass.intercept_, params[0], rtol=1e-5)
diff --git a/tests/models/test_k_class.py b/tests/models/test_k_class.py
index 05355fb..115fc55 100644
--- a/tests/models/test_k_class.py
+++ b/tests/models/test_k_class.py
@@ -35,7 +35,7 @@ def test__fuller_alpha(kappa, expected):
 def test_k_class_equal_to_ols(fit_intercept, alpha, l1_ratio, n, mx, mc, k, u):
     n = 100
 
-    Z, X, y, C, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mw=0, mc=mc)
+    Z, X, y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mw=0, mc=mc)
     XC = np.hstack([X, C])
 
     kclass = KClass(
@@ -61,7 +61,7 @@ def test_k_class_equal_to_ols(fit_intercept, alpha, l1_ratio, n, mx, mc, k, u):
 def test_k_class_intercept_equiv_to_all_ones_in_C(kappa, n, mx, mc, k, u):
     n = 100
 
-    Z, X, y, C, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
+    Z, X, y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
 
     kclass_with = KClass(kappa=kappa, fit_intercept=True)
     kclass_without = KClass(kappa=kappa, fit_intercept=False)
@@ -84,7 +84,7 @@ def test_equivalence_exogenous_covariates_and_fitting_on_residuals(
 ):
     # It should be equivalent to include exogenous covariates C or to fit a model on the
     # residuals M_C Z, M_C X, M_C y.
-    Z, X, y, C, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
+    Z, X, y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
 
     if fit_intercept:
         C = C - C.mean(axis=0)
@@ -107,7 +107,7 @@ def test_equivalence_exogenous_covariates_and_fitting_on_residuals(
 
 @pytest.mark.parametrize("n, mx, k, u", [(100, 2, 2, 1), (100, 2, 5, 2)])
 def test_ar_min_same_with_shortcut(n, mx, k, u):
-    Z, X, y, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
+    Z, X, y, _, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
 
     X_proj = proj(Z, X)
     y_proj = proj(Z, y)
@@ -122,7 +122,7 @@ def test_ar_min_same_with_shortcut(n, mx, k, u):
 @pytest.mark.parametrize("n, mx, k, u", [(100, 2, 2, 1), (100, 2, 5, 2)])
 def test_ar_min_positive(n, mx, k, u):
     # If k > mx, then kappa ~ chi2(k-mx) > 0. Else kappa = 0.
-    Z, X, y, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
+    Z, X, y, _, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
 
     ar_min = KClass.ar_min(X, y, Z)
 
@@ -135,7 +135,7 @@ def test_ar_min_positive(n, mx, k, u):
 @pytest.mark.parametrize("fit_intercept", [True, False])
 @pytest.mark.parametrize("n, mx, k, u", [(100, 2, 2, 1), (100, 2, 5, 2)])
 def test_liml_minimizes_anderson_rubin(fit_intercept, n, mx, k, u):
-    Z, X, y, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
+    Z, X, y, _, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
 
     kclass = KClass(kappa="liml", fit_intercept=fit_intercept)
     kclass.fit(X, y, Z=Z)
@@ -152,7 +152,7 @@ def ar(beta):
 @pytest.mark.parametrize("n, mx, k, u", [(100, 2, 2, 1), (100, 2, 5, 2)])
 @pytest.mark.parametrize("kappa", [0.2, 0.8])
 def test_k_class_normal_equations(fit_intercept, kappa, n, mx, k, u):
-    Z, X, y, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
+    Z, X, y, _, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
 
     k_class = KClass(kappa=kappa, fit_intercept=fit_intercept)
     k_class.fit(X, y, Z)
@@ -172,7 +172,7 @@ def test_k_class_normal_equations(fit_intercept, kappa, n, mx, k, u):
 def test_k_class_normal_equations_2(fit_intercept, n, mx, k, mc, u):
     # If kappa <=1, then the algorithm uses OLS on transformed data. If kappa > 1, then
     # it uses the normal equations. The result should be the same
-    Z, X, y, C, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
+    Z, X, y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
 
     kclass1 = KClass(kappa=1 - 1e-10, fit_intercept=fit_intercept)
     kclass2 = KClass(kappa=1 + 1e-10, fit_intercept=fit_intercept)
@@ -186,7 +186,7 @@ def test_k_class_normal_equations_2(fit_intercept, n, mx, k, mc, u):
 
 @pytest.mark.parametrize("n, mx, k, mc, u", [(100, 2, 2, 0, 1), (100, 4, 4, 1, 2)])
 def test_liml_equal_to_tsls_in_just_identified_setting(n, mx, k, mc, u):
-    Z, X, y, C, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
+    Z, X, y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
     y = y.flatten()
 
     liml = KClass(kappa="liml")
@@ -204,7 +204,7 @@ def test_liml_equal_to_tsls_in_just_identified_setting(n, mx, k, mc, u):
 @pytest.mark.parametrize("fit_intercept", [True, False])
 @pytest.mark.parametrize("n, mx, k, mc, u", [(100, 2, 2, 0, 1), (100, 4, 4, 1, 2)])
 def test_anderson_rubin_at_liml_is_equal_to_ar_min(n, mx, k, mc, u, fit_intercept):
-    Z, X, y, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
+    Z, X, y, _, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
     y = y.flatten()
 
     liml = KClass(kappa="liml", fit_intercept=fit_intercept)
diff --git a/tests/models/test_pulse.py b/tests/models/test_pulse.py
index ad5aef8..366fb60 100644
--- a/tests/models/test_pulse.py
+++ b/tests/models/test_pulse.py
@@ -11,7 +11,7 @@
 @pytest.mark.parametrize("p_min", [0.05, 0.01, 0.001])
 @pytest.mark.parametrize("n, mx, k, u", [(1000, 5, 1, 1), (1000, 2, 1, 2)])
 def test_pulse(p_min, rtol, n, mx, k, u):
-    A, X, Y, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, seed=1)
+    A, X, Y, _, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, seed=1)
     pulse = PULSE(p_min=p_min, rtol=rtol, kappa_max=0.999).fit(X, Y.flatten(), A)
 
     # The PULSE selects the "smallest" kappa s.t. p_value(kappa) >= p_min, where
diff --git a/tests/test_simulate.py b/tests/test_simulate.py
index 02913c9..4e2e6e9 100644
--- a/tests/test_simulate.py
+++ b/tests/test_simulate.py
@@ -4,12 +4,13 @@
 
 
 @pytest.mark.parametrize(
-    "n, mx, k, mc, u, mw", [(100, 2, 2, 1, 1, 2), (100, 2, 5, 0, 2, 0)]
+    "n, mx, k, mc, u, mw, md", [(100, 2, 2, 1, 1, 2, 1), (100, 2, 5, 0, 2, 0, 0)]
 )
-def test_simulate_gaussian_iv(n, mx, k, mc, u, mw):
-    Z, X, y, C, W = simulate_gaussian_iv(n=n, mx=mx, k=k, mc=mc, u=u, mw=mw)
+def test_simulate_gaussian_iv(n, mx, k, mc, u, mw, md):
+    Z, X, y, C, W, D = simulate_gaussian_iv(n=n, mx=mx, k=k, mc=mc, u=u, mw=mw, md=md)
     assert Z.shape == (n, k)
     assert X.shape == (n, mx)
     assert y.shape == (n,)
     assert C.shape == (n, mc)
     assert W.shape == (n, mw)
+    assert D.shape == (n, md)
diff --git a/tests/test_summary.py b/tests/test_summary.py
index 7d2e944..f60fb48 100644
--- a/tests/test_summary.py
+++ b/tests/test_summary.py
@@ -18,7 +18,7 @@
     "n, mx, k, mc, fit_intercept", [(100, 2, 3, 0, False), (100, 4, 10, 2, True)]
 )
 def test_kclass_summary(test, n, mx, k, mc, fit_intercept):
-    Z, X, y, C, _ = simulate_gaussian_iv(
+    Z, X, y, C, _, _ = simulate_gaussian_iv(
         n=n, mx=mx, k=k, mc=mc, mw=0, include_intercept=fit_intercept
     )
 
diff --git a/tests/tests/test_anderson_rubin.py b/tests/tests/test_anderson_rubin.py
index 2715b7f..9eb4d93 100644
--- a/tests/tests/test_anderson_rubin.py
+++ b/tests/tests/test_anderson_rubin.py
@@ -75,7 +75,7 @@ def test_more_powerful_sAR_critical_value_function_integrates_to_one(k, hat_kapp
 def test_inverse_anderson_rubin_confidence_set_alternative_formulation(
     alpha, n, k, mx, mw
 ):
-    Z, X, y, _, W = simulate_gaussian_iv(n=n, mx=mx, mw=mw, k=k)
+    Z, X, y, _, W, _ = simulate_gaussian_iv(n=n, mx=mx, mw=mw, k=k)
     S = np.hstack([X, W])
 
     inverse_ar = inverse_anderson_rubin_test(
diff --git a/tests/tests/test_conditional_likelihood_ratio.py b/tests/tests/test_conditional_likelihood_ratio.py
index f965170..3930476 100644
--- a/tests/tests/test_conditional_likelihood_ratio.py
+++ b/tests/tests/test_conditional_likelihood_ratio.py
@@ -2,8 +2,11 @@
 import pytest
 import scipy
 
+from ivmodels.models.kclass import KClass
+from ivmodels.simulate import simulate_gaussian_iv
 from ivmodels.tests.conditional_likelihood_ratio import (
     conditional_likelihood_ratio_critical_value_function,
+    conditional_likelihood_ratio_test,
 )
 
 
@@ -88,3 +91,25 @@ def test_conditional_likelihood_ratio_critical_value_function_some_by_method(
         ),
         atol=2 * tol,
     )
+
+
+@pytest.mark.parametrize(
+    "n, k, mx, mw, mc, md, fit_intercept",
+    [(100, 2, 1, 1, 0, 0, False), (100, 5, 2, 2, 1, 0, True)],
+)
+def test_conditional_likelihood_ratio_test_minimum_is_zero(
+    n, k, mx, mw, mc, md, fit_intercept
+):
+    Z, X, y, C, W, D = simulate_gaussian_iv(n=n, mx=mx, mw=mw, k=k, mc=mc, md=md)
+
+    liml = KClass(kappa="liml", fit_intercept=fit_intercept).fit(
+        X=np.hstack([X, W]), y=y, Z=Z, C=np.hstack([D, C])
+    )
+
+    x0 = np.concatenate([liml.coef_[:mx], liml.coef_[(mx + mw) : (mx + mw + md)]])
+
+    def f(x):
+        return conditional_likelihood_ratio_test(Z, X, y, x, W, C, D, fit_intercept)[0]
+
+    scipy.optimize.check_grad(func=f, grad=lambda _: 0, x0=x0)
+    assert np.allclose(f(x0), 0, atol=1e-8)
diff --git a/tests/tests/test_lagrange_multiplier.py b/tests/tests/test_lagrange_multiplier.py
index eae8cdc..f6cc0b6 100644
--- a/tests/tests/test_lagrange_multiplier.py
+++ b/tests/tests/test_lagrange_multiplier.py
@@ -3,7 +3,7 @@
 import scipy
 
 from ivmodels.simulate import simulate_gaussian_iv
-from ivmodels.tests.lagrange_multiplier import _LM
+from ivmodels.tests.lagrange_multiplier import _LM, lagrange_multiplier_test
 from ivmodels.utils import proj
 
 
@@ -46,7 +46,7 @@ def test__LM__init__(n, mx, mw, k):
     [(100, 1, 1, 2), (100, 1, 2, 5), (1000, 2, 5, 10), (1000, 5, 2, 10)],
 )
 def test_lm_gradient(n, mx, mw, k):
-    Z, X, y, _, W = simulate_gaussian_iv(
+    Z, X, y, _, W, _ = simulate_gaussian_iv(
         n=n, mx=mx, k=k, mw=mw, include_intercept=False
     )
     lm = _LM(X=X, y=y, W=W, dof=7, Z=Z)
@@ -90,25 +90,24 @@ def test_lm_gradient(n, mx, mw, k):
 
 
 @pytest.mark.parametrize(
-    "n, mx, mw, k",
-    [(100, 2, 5, 10), (100, 5, 2, 10)],
+    "n, mx, k",
+    [
+        (100, 1, 2),
+        (100, 1, 5),
+        (1000, 5, 10),
+    ],
 )
-def test_lm_gradient_beta_gamma(n, mx, mw, k):
-    Z, X, y, _, W = simulate_gaussian_iv(
-        n=n, mx=mx, k=k, mw=mw, include_intercept=False
+def test_compare_test_and_lm_derivative(n, mx, k):
+    Z, X, y, C, W, _ = simulate_gaussian_iv(
+        n=n, mx=mx, k=k, mc=0, include_intercept=False
     )
-    lm = _LM(X=X, y=y, W=W, dof=7, Z=Z)
+    lm = _LM(X=X, y=y, W=W, dof=n - k, Z=Z)
 
     rng = np.random.RandomState(0)
-    beta = rng.normal(0, 1, mx)
-    gamma = rng.normal(0, 1, mw)
-
-    assert np.allclose(
-        lm.derivative(np.concatenate([beta, gamma]))[0],
-        lm.derivative(beta, gamma)[0],
-    )
-
-    assert np.allclose(
-        lm.derivative(np.concatenate([beta, gamma]))[1],
-        lm.derivative(beta, gamma)[1],
-    )
+    for _ in range(5):
+        beta = rng.normal(0, 1, mx)
+        statistic1 = lm.derivative(beta=beta, jac=False, hess=False)[0]
+        statistic2 = lagrange_multiplier_test(
+            Z, X, y=y, beta=beta, C=C, fit_intercept=False
+        )[0]
+        assert np.allclose(statistic1, statistic2, rtol=1e-5, atol=1e-5)
diff --git a/tests/tests/test_likelihood_ratio.py b/tests/tests/test_likelihood_ratio.py
index fcc2d81..27e562f 100644
--- a/tests/tests/test_likelihood_ratio.py
+++ b/tests/tests/test_likelihood_ratio.py
@@ -1,10 +1,11 @@
 import numpy as np
 import pytest
 import scipy
+import scipy.optimize
 
 from ivmodels import KClass
 from ivmodels.simulate import simulate_gaussian_iv
-from ivmodels.tests import inverse_likelihood_ratio_test
+from ivmodels.tests import inverse_likelihood_ratio_test, likelihood_ratio_test
 from ivmodels.utils import proj
 
 
@@ -13,7 +14,7 @@
 def test_inverse_likelihood_ratio_confidence_set_alternative_formulation(
     alpha, n, k, mx, u
 ):
-    Z, X, y, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
+    Z, X, y, _, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u)
 
     kappa_ = KClass(kappa="liml", fit_intercept=False).fit(X=X, y=y, Z=Z).kappa_
 
@@ -33,3 +34,23 @@ def test_inverse_likelihood_ratio_confidence_set_alternative_formulation(
         inverse_ar.A,
         rtol=1e-8,
     )
+
+
+@pytest.mark.parametrize(
+    "n, k, mx, mw, mc, md, fit_intercept",
+    [(100, 2, 1, 1, 0, 0, False), (100, 5, 2, 2, 1, 1, True)],
+)
+def test_likelihood_ratio_test_minimum_is_zero(n, k, mx, mw, mc, md, fit_intercept):
+    Z, X, y, C, W, D = simulate_gaussian_iv(n=n, mx=mx, mw=mw, k=k, mc=mc, md=md)
+
+    liml = KClass(kappa="liml", fit_intercept=fit_intercept).fit(
+        X=np.hstack([X, W]), y=y, Z=Z, C=np.hstack([D, C])
+    )
+
+    x0 = np.concatenate([liml.coef_[:mx], liml.coef_[(mx + mw) : (mx + mw + md)]])
+
+    def f(x):
+        return likelihood_ratio_test(Z, X, y, x, W, C, D, fit_intercept)[0]
+
+    scipy.optimize.check_grad(func=f, grad=lambda _: 0, x0=x0)
+    assert np.allclose(f(x0), 0, atol=1e-8)
diff --git a/tests/tests/test_rank.py b/tests/tests/test_rank.py
index 934934d..5915b42 100644
--- a/tests/tests/test_rank.py
+++ b/tests/tests/test_rank.py
@@ -11,7 +11,7 @@
 @pytest.mark.parametrize("n, mx, k, u", [(40, 2, 2, 1), (40, 2, 5, 2), (40, 1, 2, 2)])
 @pytest.mark.parametrize("fit_intercept", [True, False])
 def test_bounded_inverse_anderson_rubin_p_value(n, mx, k, u, fit_intercept):
-    Z, X, y, _, _ = simulate_gaussian_iv(
+    Z, X, y, _, _, _ = simulate_gaussian_iv(
         n=n, mx=mx, k=k, u=u, seed=0, include_intercept=fit_intercept
     )
 
@@ -32,7 +32,7 @@ def test_bounded_inverse_anderson_rubin_p_value(n, mx, k, u, fit_intercept):
 
 
 def test_rank_test_raises():
-    Z, X, y, _, _ = simulate_gaussian_iv(n=10, k=1, mx=2, u=0)
+    Z, X, y, _, _, _ = simulate_gaussian_iv(n=10, k=1, mx=2, u=0)
 
     with pytest.raises(ValueError, match=re.escape("Need `Z.shape[1] >= X.shape[1]`.")):
         _ = rank_test(Z, X)
diff --git a/tests/tests/test_tests.py b/tests/tests/test_tests.py
index 9a5a0c9..2969b26 100644
--- a/tests/tests/test_tests.py
+++ b/tests/tests/test_tests.py
@@ -42,7 +42,7 @@
 ]
 
 
-# The Pulse and the LM tests don't have subvector versions.
+# The Pulse doesn't have a subvector version.
 @pytest.mark.parametrize(
     "test",
     [
@@ -57,29 +57,34 @@
     ],
 )
 @pytest.mark.parametrize(
-    "n, mx, mw, k, u, mc",
-    [(100, 1, 1, 2, 1, 3), (100, 1, 2, 5, 2, 0), (300, 2, 5, 10, 2, 2)],
+    "n, mx, mw, k, md, mc",
+    [(100, 1, 1, 2, 1, 3), (100, 1, 2, 5, 0, 0), (300, 2, 5, 10, 2, 2)],
 )
 @pytest.mark.parametrize("fit_intercept", [True, False])
-def test_subvector_test_size(test, n, mx, mw, k, u, mc, fit_intercept):
+def test_subvector_test_size(test, n, mx, mw, k, md, mc, fit_intercept):
     """Test that the test size is close to the nominal level."""
+    if test == guggenberger_anderson_rubin_test and md > 0:
+        pytest.skip()
+
     n_seeds = 100
     p_values = np.zeros(n_seeds)
 
     for seed in range(n_seeds):
-        Z, X, y, C, W, beta = simulate_gaussian_iv(
+        Z, X, y, C, W, D, beta = simulate_gaussian_iv(
             n=n,
             mx=mx,
             k=k,
-            u=u,
             mw=mw,
             mc=mc,
+            md=md,
             seed=seed,
             include_intercept=fit_intercept,
             return_beta=True,
         )
 
-        _, p_values[seed] = test(Z, X, y, beta, W, C=C, fit_intercept=fit_intercept)
+        _, p_values[seed] = test(
+            Z, X, y, beta, W, C=C, D=D, fit_intercept=fit_intercept
+        )
 
     assert np.mean(p_values < 0.1) <= 0.2
 
@@ -99,13 +104,17 @@ def test_subvector_test_size(test, n, mx, mw, k, u, mc, fit_intercept):
     ],
 )
 @pytest.mark.parametrize(
-    "n, mx, mw, mc, k, u", [(100, 2, 5, 0, 10, 2), (100, 2, 2, 2, 5, 2)]
+    "n, mx, mw, mc, k, md", [(100, 2, 5, 2, 10, 1), (100, 5, 2, 0, 8, 1)]
 )
-def test_subvector_test_size_low_rank(test, n, mx, mw, mc, k, u):
+def test_subvector_test_size_low_rank(test, n, mx, mw, mc, k, md):
     """Test that the test size is close to the nominal level if Pi is low rank."""
+    if test == guggenberger_anderson_rubin_test and md > 0:
+        pytest.skip()
+
     n_seeds = 200
     p_values = np.zeros(n_seeds)
 
+    u = mx + mw
     for seed in range(n_seeds):
         rng = np.random.RandomState(seed)
 
@@ -114,7 +123,7 @@ def test_subvector_test_size_low_rank(test, n, mx, mw, mc, k, u):
         delta_y = rng.normal(0, 1, (u, 1))
 
         beta_X = rng.normal(0, 0.1, (mx, 1))
-        beta_C = rng.normal(0, 0.1, (mc, 1))
+        beta_C = rng.normal(0, 0.1, (mc + md, 1))
 
         gamma = rng.normal(0, 1, (mw, 1))
         Pi = rng.normal(0, 1, (k, 1)) @ rng.normal(0, 1, (1, mw + mx))
@@ -123,14 +132,16 @@ def test_subvector_test_size_low_rank(test, n, mx, mw, mc, k, u):
 
         U = rng.normal(0, 1, (n, u))
 
-        C = rng.normal(0, 1, (n, mc))
-        Z = rng.normal(0, 1, (n, k)) + C @ rng.normal(0, 0.1, (mc, k))
+        C = rng.normal(0, 1, (n, mc + md))
+        Z = rng.normal(0, 1, (n, k)) + C @ rng.normal(0, 0.1, (mc + md, k))
 
         X = Z @ Pi_X + U @ delta_X + rng.normal(0, 1, (n, mx))
         W = Z @ Pi_W + U @ delta_W + rng.normal(0, 1, (n, mw))
         y = X @ beta_X + C @ beta_C + W @ gamma + U @ delta_y + rng.normal(0, 1, (n, 1))
 
-        _, p_values[seed] = test(Z, X, y, beta_X, W, fit_intercept=False)
+        C, D = C[:, :mc], C[:, mc:]
+        beta = np.vstack([beta_X, beta_C[mc:]])
+        _, p_values[seed] = test(Z, X, y, beta, W, D=D, C=C, fit_intercept=False)
 
     assert np.mean(p_values < 0.1) < 0.15
 
@@ -147,8 +158,8 @@ def test_subvector_test_size_low_rank(test, n, mx, mw, mc, k, u):
         lagrange_multiplier_test,
     ],
 )
-@pytest.mark.parametrize("n, k", [(100, 5), (1000, 30)])
-def test_subvector_test_size_weak_instruments(test, n, k):
+@pytest.mark.parametrize("n, k, md", [(100, 5, 5), (1000, 30, 0)])
+def test_subvector_test_size_weak_instruments(test, n, k, md):
     """
     Test that the test size is close to the nominal level for weak instruments.
 
@@ -158,11 +169,14 @@ def test_subvector_test_size_weak_instruments(test, n, k):
     n_seeds = 200
     p_values = np.zeros(n_seeds)
 
+    if test == guggenberger_anderson_rubin_test:
+        md = 0
+
     for seed in range(n_seeds):
-        Z, X, y, _, W, beta = simulate_guggenberger12(
-            n, k=k, seed=seed, return_beta=True
+        Z, X, y, _, W, D, beta = simulate_guggenberger12(
+            n, k=k, seed=seed, return_beta=True, md=md
         )
-        _, p_values[seed] = test(Z, X, y, beta, W)
+        _, p_values[seed] = test(Z, X, y, beta, W, D=D)
 
     assert np.mean(p_values < 0.1) < 0.15
 
@@ -190,7 +204,7 @@ def test_test_size(test, n, mx, k, u, mc, fit_intercept):
     p_values = np.zeros(n_seeds)
 
     for seed in range(n_seeds):
-        Z, X, y, C, _, beta = simulate_gaussian_iv(
+        Z, X, y, C, _, _, beta = simulate_gaussian_iv(
             n=n,
             mx=mx,
             k=k,
@@ -263,27 +277,45 @@ def test_test_size_weak_ivs(test, n, mx, k, u, mc):
     ],
 )
 @pytest.mark.parametrize(
-    "data", [(100, 1, 2, 1, 3), (100, 2, 5, 2, 0), "guggenberger12"]
+    "data",
+    [
+        (100, 1, 2, 3, 0, True),
+        (100, 1, 2, 3, 1, False),
+        (100, 2, 5, 0, 0, False),
+        (100, 0, 2, 1, 2, False),
+        "guggenberger12",
+    ],
 )
 @pytest.mark.parametrize("p_value", [0.1, 0.01])
-@pytest.mark.parametrize("fit_intercept", [True, False])
-def test_test_round_trip(test, inverse_test, data, p_value, fit_intercept):
+def test_test_round_trip(test, inverse_test, data, p_value):
     """A test's p-value at the confidence set's boundary equals the nominal level."""
     if data == "guggenberger12":
-        Z, X, y, C, _ = simulate_guggenberger12(n=1000, k=10, seed=0)
+        Z, X, y, C, _, D = simulate_guggenberger12(n=1000, k=10, seed=0, md=1)
+        fit_intercept = False
     else:
-        n, mx, k, u, mc = data
+        n, mx, k, mc, md, fit_intercept = data
 
         if test == lagrange_multiplier_test and mx > 1:
             pytest.skip("LM test inverse not implemented for mx > 1")
         if test == conditional_likelihood_ratio_test and mx > 1:
             pytest.skip("CLR test inverse not implemented for mx > 1")
 
-        Z, X, y, C, _ = simulate_gaussian_iv(
-            n=n, mx=mx, k=k, u=u, mc=mc, seed=0, include_intercept=fit_intercept
+        Z, X, y, C, _, D = simulate_gaussian_iv(
+            n=n, mx=mx, k=k, mc=mc, md=md, seed=0, include_intercept=fit_intercept
         )
 
-    quadric = inverse_test(Z, X, y, C=C, alpha=p_value, fit_intercept=fit_intercept)
+    if D.shape[1] > 0 and test in [pulse_test, conditional_likelihood_ratio_test]:
+        pytest.skip("Pulse and CLR tests do not support incl. exog. variables.")
+
+    if D.shape[1] + X.shape[1] > 1 and test in [
+        conditional_likelihood_ratio_test,
+        lagrange_multiplier_test,
+    ]:
+        pytest.skip("inverse CLR and LM tests do not multidim conf. sets")
+
+    quadric = inverse_test(
+        Z, X, y, C=C, D=D, alpha=p_value, fit_intercept=fit_intercept
+    )
     boundary = quadric._boundary()
 
     if isinstance(quadric, Quadric):
@@ -291,7 +323,9 @@ def test_test_round_trip(test, inverse_test, data, p_value, fit_intercept):
 
     p_values = np.zeros(boundary.shape[0])
     for idx, row in enumerate(boundary):
-        p_values[idx] = test(Z, X, y, C=C, beta=row, fit_intercept=fit_intercept)[1]
+        p_values[idx] = test(Z, X, y, C=C, D=D, beta=row, fit_intercept=fit_intercept)[
+            1
+        ]
 
     assert np.allclose(p_values, p_value, atol=1e-4)
 
@@ -302,7 +336,7 @@ def test_test_round_trip(test, inverse_test, data, p_value, fit_intercept):
         (wald_test, inverse_wald_test),
         (liml_wald_test, liml_inverse_wald_test),
         (anderson_rubin_test, inverse_anderson_rubin_test),
-        (lagrange_multiplier_test, inverse_lagrange_multiplier_test),
+        # (lagrange_multiplier_test, inverse_lagrange_multiplier_test),
         (f_anderson_rubin_test, inverse_f_anderson_rubin_test),
         (likelihood_ratio_test, inverse_likelihood_ratio_test),
         (conditional_likelihood_ratio_test, inverse_conditional_likelihood_ratio_test),
@@ -311,33 +345,50 @@ def test_test_round_trip(test, inverse_test, data, p_value, fit_intercept):
 @pytest.mark.parametrize(
     "data",
     [
-        # (100, 1, 3, 1, 2, 3),
-        # (100, 2, 5, 2, 3, 0),
-        # (100, 1, 10, 5, None, 0),
-        "guggenberger12",
+        (1000, 1, 5, 3, 3, 0, True),
+        (100, 1, 4, 2, 3, 1, False),
+        (100, 2, 5, 0, 0, 0, False),
+        (1000, 0, 2, 2, 1, 1, False),
+        (100, 1, 3, 2, 1, 1, False),
+        "guggenberger12 (md=1)",
+        "guggenberger12 (md=0)",
     ],
 )
-@pytest.mark.parametrize("p_value", [0.1])
-@pytest.mark.parametrize("fit_intercept", [True, False])
-def test_subvector_round_trip(test, inverse_test, data, p_value, fit_intercept):
+@pytest.mark.parametrize("p_value", [0.1, 0.5])
+def test_subvector_round_trip(test, inverse_test, data, p_value):
     """
     A test's p-value at the confidence set's boundary equals the nominal level.
 
     This time for subvector tests.
     """
-    if data == "guggenberger12":
-        Z, X, y, C, W = simulate_guggenberger12(n=10000, k=10, seed=0)
+    if isinstance(data, str) and data.startswith("guggenberger12"):
+        md = 1 if data.endswith("(md=1)") else 0
+        if test == lagrange_multiplier_test and md > 0:
+            pytest.skip("LM test inverse not implemented for md + mx > 1")
+
+        Z, X, y, C, W, D = simulate_guggenberger12(n=1000, k=5, seed=0, md=md)
+        fit_intercept = False
     else:
-        n, mx, k, mw, u, mc = data
+        n, mx, k, mw, mc, md, fit_intercept = data
 
-        if test == lagrange_multiplier_test and mx > 1:
-            pytest.skip("LM test inverse not implemented for mx > 1")
-        if test == conditional_likelihood_ratio_test and mx > 1:
-            pytest.skip("CLR test inverse not implemented for mx > 1")
+        if test == lagrange_multiplier_test and mx + md > 1:
+            pytest.skip("LM test inverse not implemented for mx + md > 1")
+        if test == conditional_likelihood_ratio_test and mx + md > 1:
+            pytest.skip("CLR test inverse not implemented for mx + md > 1")
 
-        Z, X, y, C, W = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mw=mw, mc=mc, seed=0)
+        Z, X, y, C, W, D = simulate_gaussian_iv(
+            n=n, mx=mx, k=k, mw=mw, mc=mc, md=md, seed=0
+        )
 
-    kwargs = {"Z": Z, "X": X, "y": y, "W": W, "C": C, "fit_intercept": fit_intercept}
+    kwargs = {
+        "Z": Z,
+        "X": X,
+        "y": y,
+        "W": W,
+        "C": C,
+        "D": D,
+        "fit_intercept": fit_intercept,
+    }
 
     quadric = inverse_test(alpha=p_value, **kwargs)
     boundary = quadric._boundary()
@@ -369,7 +420,7 @@ def test_subvector_round_trip(test, inverse_test, data, p_value, fit_intercept):
 @pytest.mark.parametrize("n, mx, k, u, mc", [(100, 2, 2, 1, 3), (100, 2, 5, 2, 0)])
 def test_p_value_of_estimator(test, kappa, n, mx, k, u, mc):
     """The estimated coefficient should be in the confidence set with 95% coverage."""
-    Z, X, y, C, _ = simulate_gaussian_iv(n=n, mx=mx, mc=mc, k=k, u=u)
+    Z, X, y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, mc=mc, k=k, u=u)
     estimator = KClass(kappa=kappa).fit(X, y.flatten(), Z=Z, C=C)
     p_value = test(Z, X, y, beta=estimator.coef_[:mx], C=C)[1]
     assert p_value > 0.05
@@ -379,7 +430,7 @@ def test_p_value_of_estimator(test, kappa, n, mx, k, u, mc):
 @pytest.mark.parametrize("n, mx, k, u, mc", [(100, 2, 2, 1, 3), (100, 2, 5, 2, 0)])
 def test_ar_test_monotonic_in_kappa(test, n, mx, k, u, mc):
     """AR(beta(kappa)) should be decreasing in kappa increasing towards kappa."""
-    Z, X, Y, C, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
+    Z, X, Y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc)
     Y = Y.flatten()
     kappas = np.linspace(0, KClass.ar_min(X, Y, Z) + 1, 10)
     models = [KClass(kappa=kappa).fit(X, Y, Z=Z) for kappa in kappas]
@@ -390,7 +441,9 @@ def test_ar_test_monotonic_in_kappa(test, n, mx, k, u, mc):
     assert np.all(p_values[:-1] <= p_values[1:])
 
 
-@pytest.mark.parametrize("n, mx, k, u, mc", [(100, 2, 2, 1, 3), (100, 2, 5, 2, 0)])
+@pytest.mark.parametrize(
+    "n, mx, k, u, mc, md", [(100, 2, 2, 1, 3, 1), (100, 2, 5, 2, 0, 0)]
+)
 @pytest.mark.parametrize(
     "inverse_test",
     [
@@ -402,9 +455,9 @@ def test_ar_test_monotonic_in_kappa(test, n, mx, k, u, mc):
         inverse_likelihood_ratio_test,
     ],
 )
-def test_inverse_test_sorted(inverse_test, n, mx, k, u, mc):
+def test_inverse_test_sorted(inverse_test, n, mx, k, u, mc, md):
     """The volume of confidence sets should be increasing in the p-value."""
-    Z, X, y, C, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc, seed=0)
+    Z, X, y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mc=mc, md=md, seed=0)
 
     p_values = [0.5, 0.2, 0.1, 0.05]
     quadrics = [inverse_test(Z, X, y, C=C, alpha=p_value) for p_value in p_values]
@@ -414,26 +467,34 @@ def test_inverse_test_sorted(inverse_test, n, mx, k, u, mc):
     assert volumes[0] <= volumes[1] <= volumes[2] <= volumes[3]
 
 
-@pytest.mark.parametrize("fit_intercept", [True, False])
-@pytest.mark.parametrize("n, mx, mw, u, mc", [(100, 2, 0, 2, 2), (100, 2, 2, 2, 2)])
-def test_ar_lm_clr_yield_same_result(n, mx, mw, u, mc, fit_intercept):
+@pytest.mark.parametrize(
+    "n, mx, mw, u, mc, md, fit_intercept",
+    [(100, 2, 0, 2, 2, 0, True), (100, 2, 2, 2, 2, 1, False)],
+)
+def test_ar_lm_clr_yield_same_result(n, mx, mw, u, mc, md, fit_intercept):
     """The AR, LM, and CLR tests should yield the same result if k = m."""
-    Z, X, y, C, W = simulate_gaussian_iv(n=n, mx=mx, k=mx + mw, u=u, mw=mw, mc=mc)
+    Z, X, y, C, W, D = simulate_gaussian_iv(
+        n=n, mx=mx, k=mx + mw, u=u, mw=mw, mc=mc, md=md
+    )
 
     for seed in range(5):
         rng = np.random.RandomState(seed)
-        beta = rng.normal(size=(mx, 1))
+        beta = rng.normal(size=(mx + md, 1))
 
-        ar = anderson_rubin_test(Z, X, y, beta, W, C=C, fit_intercept=fit_intercept)[1]
+        ar = anderson_rubin_test(
+            Z, X, y, beta, W, C=C, D=D, fit_intercept=fit_intercept
+        )
         lm = lagrange_multiplier_test(
-            Z, X, y, beta, W, C=C, fit_intercept=fit_intercept
-        )[1]
+            Z, X, y, beta, W, C=C, D=D, fit_intercept=fit_intercept
+        )
         clr = conditional_likelihood_ratio_test(
-            Z, X, y, beta, W, C=C, fit_intercept=fit_intercept
-        )[1]
+            Z, X, y, beta, W, C=C, D=D, fit_intercept=fit_intercept
+        )
+        if md > 0:
+            clr = ar  # not supported
 
-        assert np.allclose(ar, lm)
-        assert np.allclose(ar, clr)
+        assert np.allclose(ar[0] * (mx + md), lm[0], clr[0])
+        assert np.allclose(ar[1], lm[1], clr[1])
 
 
 # once with W=None, once with W!=None
@@ -453,10 +514,74 @@ def test_test_output_type(n, mx, mw, u, mc, test):
     if test == pulse_test and mw > 0:
         pytest.skip("Pulse test does not have a subvector version.")
 
-    Z, X, y, C, W, beta = simulate_gaussian_iv(
+    Z, X, y, C, W, _, beta = simulate_gaussian_iv(
         n=n, mx=mx, k=mx + mw, u=u, mw=mw, mc=mc, return_beta=True
     )
 
     statistic, p_values = test(Z, X, y, beta=beta, W=W, C=C)
     assert isinstance(statistic, float)
     assert isinstance(p_values, float)
+
+
+@pytest.mark.parametrize(
+    "test, inverse_test",
+    [
+        (wald_test, inverse_wald_test),
+        (anderson_rubin_test, inverse_anderson_rubin_test),
+        (lagrange_multiplier_test, inverse_lagrange_multiplier_test),  # type: ignore
+    ],
+)
+@pytest.mark.parametrize(
+    "n, mx, mw, mc, md, fit_intercept",
+    [
+        (100, 1, 0, 2, 3, True),
+        (100, 0, 0, 2, 1, False),
+        (100, 0, 2, 2, 1, False),
+        (100, 0, 0, 2, 3, True),
+    ],
+)
+def test_d_and_z_same_result(n, mx, mw, mc, md, fit_intercept, test, inverse_test):
+    """
+    For the AR, LM, and Wald(tsls) test, passing D or including D into Z, W is the same.
+
+    For Wald LIML, computation of kappa fails.
+    """
+    Z, X, y, C, W, D = simulate_gaussian_iv(n=n, mx=mx, k=mx + mw, mw=mw, mc=mc, md=md)
+
+    if test != lagrange_multiplier_test:
+        inverse_test_1 = inverse_test(
+            Z=Z, X=X, y=y, C=C, W=W, D=D, fit_intercept=fit_intercept
+        )
+        inverse_test_2 = inverse_test(
+            Z=np.hstack([Z, D]),
+            X=np.hstack([X, D]),
+            y=y,
+            C=C,
+            W=W,
+            D=None,
+            fit_intercept=fit_intercept,
+        )
+
+        assert np.allclose(
+            inverse_test_1.A / inverse_test_1.c, inverse_test_2.A / inverse_test_2.c
+        )
+        assert np.allclose(
+            inverse_test_1.b / inverse_test_2.c, inverse_test_2.b / inverse_test_2.c
+        )
+
+    for seed in range(5):
+        rng = np.random.RandomState(seed)
+        beta = rng.normal(size=(mx + md, 1))
+
+        test_result_1 = test(Z, X, y, beta, W, C=C, D=D, fit_intercept=fit_intercept)
+        test_result_2 = test(
+            np.hstack([Z, D]),
+            np.hstack([X, D]),
+            y,
+            beta,
+            W,
+            C=C,
+            D=None,
+            fit_intercept=fit_intercept,
+        )
+        assert np.allclose(test_result_1, test_result_2)
diff --git a/tests/tests/test_wald.py b/tests/tests/test_wald.py
index d4fcc41..b926c96 100644
--- a/tests/tests/test_wald.py
+++ b/tests/tests/test_wald.py
@@ -9,36 +9,45 @@
 @pytest.mark.parametrize("fit_intercept", [True, False])
 @pytest.mark.parametrize("estimator", ["liml", "tsls"])
 @pytest.mark.parametrize(
-    "n, mx, mw, k, u", [(100, 2, 0, 2, 1), (100, 2, 0, 5, 2), (100, 1, 2, 4, 1)]
+    "n, mx, mw, md, k, u",
+    [(100, 2, 0, 0, 2, 1), (100, 2, 0, 1, 5, 2), (100, 1, 2, 2, 4, 1)],
 )
 def test_compare_wald_tests_with_linearmodels(
-    n, mx, mw, k, u, estimator, fit_intercept
+    n, mx, mw, md, k, u, estimator, fit_intercept
 ):
-    Z, X, y, _, W = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mw=mw)
+    Z, X, y, _, W, D = simulate_gaussian_iv(n=n, mx=mx, k=k, u=u, mw=mw, md=md)
 
     XW = np.hstack([X, W])
 
     if fit_intercept:
-        intercept = np.ones((n, 1))
-    else:
-        intercept = None
+        D = np.hstack([np.ones((n, 1)), D])
 
     if estimator == "liml":
-        linearmodel = IVLIML(y, intercept, XW, Z)
+        linearmodel = IVLIML(y, D, XW, Z)
     elif estimator == "tsls":
-        linearmodel = IV2SLS(y, intercept, XW, Z)
+        linearmodel = IV2SLS(y, D, XW, Z)
 
     results = linearmodel.fit(cov_type="unadjusted", debiased=True)
-    mat = np.eye(mx + mw + fit_intercept)[int(fit_intercept) : (mx + fit_intercept), :]
-    lm_wald_result = results.wald_test(mat, np.zeros(mx))
+
+    from ivmodels import KClass
+
+    KClass(estimator).fit(X=XW, y=y, Z=Z, C=D[:, fit_intercept:]).summary(
+        X=XW, y=y, Z=Z, C=D[:, fit_intercept:]
+    )
+
+    mat = np.eye(mx + mw + md + fit_intercept)[
+        int(fit_intercept) : (mx + md + fit_intercept), :
+    ]
+    lm_wald_result = results.wald_test(mat, np.zeros(mx + md))
     ivmodels_wald_result = wald_test(
         Z,
         X,
         y,
-        beta=np.zeros(mx),
+        beta=np.zeros(mx + md),
         estimator=estimator,
         fit_intercept=fit_intercept,
         W=W,
+        D=D[:, fit_intercept:],
     )
 
     np.testing.assert_allclose(