Skip to content

Commit

Permalink
New argument for tests: D, included exogenous variables (#94)
Browse files Browse the repository at this point in the history
* New argument for Wald test: D

* New argument for AR: D

* Additional arg to simulate_gaussian_iv: md

* New argument for likelihood_ratio: D

* Add D argument to lagrange_multiplier.

* Add D argument to CLR test.

* Fix some tests.

* Some bugfixes.

* Fix last test.

* Changelog.

* Add test.

* More testing.

* Fix inverse_wald output.

* Docstring.

* Revert rank.

* lagrange docstring.

* Fix LM inverse.

* Fix warnings.

* Don't test LM.

* This time correctly.

* Remove superfluous code.
  • Loading branch information
mlondschien authored Jul 18, 2024
1 parent fa32c3d commit 94b8f8d
Show file tree
Hide file tree
Showing 28 changed files with 724 additions and 357 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ Changelog
`endogenous_names_`, `exogenous_names_`, and `instrument_names_`. If pandas is
installed, there's also `names_coefs_`.

- The tests :func:`~ivmodels.tests.anderson_rubin_test`,
:func:`~ivmodels.tests.lagrange_multiplier_test`,
:func:`~ivmodels.tests.likelihood_ratio_test`, and
:func:`~ivmodels.tests.wald_test` and their inverses
:func:`~ivmodels.tests.inverse_anderson_rubin_test`,
:func:`~ivmodels.tests.inverse_lagrange_multiplier_test`,
:func:`~ivmodels.tests.inverse_likelihood_ratio_test`, and
:func:`~ivmodels.tests.inverse_wald_test` now support an additional parameter `D`
of exogenous covariates to be included in the test. This is not supported for
the conditional likelihood ratio test.


**Other changes:**
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/kclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def setup(self, kappa, data):
Z, X, y, C, _ = simulate_guggenberger12(n=1000, k=10, seed=0)
else:
n, mx, k, mc = data
Z, X, y, C, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=mx, mc=mc)
Z, X, y, C, _, _ = simulate_gaussian_iv(n=n, mx=mx, k=k, u=mx, mc=mc)

self.data = {
"Z": Z,
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def setup(self, n, data):
)
else:
k, mx, mw, mc = data
Z, X, y, C, W, beta = simulate_gaussian_iv(
Z, X, y, C, W, _, beta = simulate_gaussian_iv(
n=n, mx=mx, k=k, u=mx, mc=mc, mw=mw, return_beta=True
)

Expand Down
18 changes: 10 additions & 8 deletions ivmodels/models/kclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,8 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
"Variable names must not contain 'intercept' when fit_intercept=True."
)

n, mx = X.shape
n, k = Z.shape
mx, mc = X.shape[1], C.shape[1]

# Including an intercept is equivalent to replacing y <- M_1 y, X <- M_1 X,
# C <- M_1 C, Z <- M_1 Z, where M_1 = Id - 1/n 1 1^T is the projection
Expand All @@ -383,13 +384,13 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
y = y - y_mean
Z = Z - Z.mean(axis=0)

if C.shape[1] > 0:
if mc > 0:
c_mean = C.mean(axis=0)
C = C - c_mean
else:
c_mean = np.zeros(0)
else:
x_mean, y_mean, c_mean = np.zeros(mx), 0, np.zeros(C.shape[1])
x_mean, y_mean, c_mean = np.zeros(mx), 0, np.zeros(mc)

# Including exogenous covariates C can be done by the following approaches:
#
Expand All @@ -403,7 +404,7 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
# replace Z <- [Z C], X <- [X C], y <- y, and apply the k-class estimator to
# the augmented data. This is the approach taken here. Care needs to be taken
# to compute kappa. For this, we follow the first approach. See below.
if C.shape[1] > 0:
if mc > 0:
Z = np.hstack([Z, C])
# save some compute here, as proj([Z, C], C) = C
X_proj, y_proj = proj(Z, X, y)
Expand Down Expand Up @@ -432,7 +433,7 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
y_proj=y_proj,
)
self.kappa_liml_ = 1 + self.ar_min_
self.kappa_ = self.kappa_liml_ - self.fuller_alpha_ / (n - Z.shape[1])
self.kappa_ = self.kappa_liml_ - self.fuller_alpha_ / (n - k - mc)
else:
self.kappa_ = self.kappa

Expand Down Expand Up @@ -482,7 +483,7 @@ def predict(self, X, C=None, *args, **kwargs): # noqa D
(X, _, C), _ = self._X_Z_C(X, C=C, Z=None, predict=True)
return super().predict(np.hstack([X, C]), *args, **kwargs)

def summary(self, X, y, Z=None, C=None, test="wald (liml)", alpha=0.05, **kwargs):
def summary(self, X, y, Z=None, C=None, test="wald", alpha=0.05, **kwargs):
"""
Create Summary object for the fitted model.
Expand All @@ -504,8 +505,9 @@ def summary(self, X, y, Z=None, C=None, test="wald (liml)", alpha=0.05, **kwargs
specified, ``C`` must be ``None``. If ``C`` is specified,
``exogenous_names`` and ``exogenous_regex`` must be ``None``.
test: str, optional, default="wald (liml)"
The test to use. Must be one of "wald (liml)", "wald (tsls)", "anderson
rubin", "AR", or "lagrange multiplier".
The test to use. Must be one of "wald", "anderson-rubin",
"lagrange multiplier", "likelihood-ratio", or
"conditional likelihood-ratio".
alpha: float, optional, default=0.05
The significance level.
**kwargs
Expand Down
6 changes: 3 additions & 3 deletions ivmodels/quadric.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,9 @@ def _boundary(self, num=200):
)

if np.all(self.D > 0) and (self.c_standardized > 0):
return np.zeros(shape=(0, 1))
return np.zeros(shape=(0, len(self.D)))
if np.all(self.D < 0) and (self.c_standardized <= 0):
return np.zeros(shape=(0, 1))
return np.zeros(shape=(0, len(self.D)))

# If both entries of D have the opposite sign as c_standardized, the
# boundary of the quadric is an ellipse.
Expand Down Expand Up @@ -239,7 +239,7 @@ def project(self, coordinates):
if mask.all():
return self

if ( # [~mask, ~mask] will to a .reshape(-1) on the matrix
if ( # [~mask, ~mask] does a .reshape(-1) on the matrix
scipy.linalg.eigvalsh(self.A[:, ~mask][~mask, :], subset_by_index=[0, 0])[0]
< 0
):
Expand Down
44 changes: 27 additions & 17 deletions ivmodels/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


def simulate_guggenberger12(
n, *, k, seed=0, h11=100, h12=1, rho=0.95, cov=None, return_beta=False
n, *, k, seed=0, h11=100, h12=1, rho=0.95, cov=None, return_beta=False, md=0
):
"""
Generate data by process as proposed by :cite:t:`guggenberger2012asymptotic`.
Expand Down Expand Up @@ -96,14 +96,18 @@ def simulate_guggenberger12(

Z = rng.normal(0, 1, (n, k))

Pi_ZD = rng.normal(0, 1, (k, md))
D = rng.normal(0, 1, (n, md)) + Z @ Pi_ZD
delta = rng.normal(0, 0.1, (md, 1))

X = Z @ Pi_X + noise[:, 1:2]
W = Z @ Pi_W + noise[:, 2:]
y = X @ beta + W @ gamma + noise[:, 0:1]
y = X @ beta + W @ gamma + D @ delta + noise[:, 0:1]

if return_beta:
return Z, X, y.flatten(), None, W, beta.flatten()
return Z, X, y.flatten(), None, W, D, np.concatenate([beta, delta]).flatten()
else:
return Z, X, y.flatten(), None, W
return Z, X, y.flatten(), None, W, D


def simulate_gaussian_iv(
Expand All @@ -114,6 +118,7 @@ def simulate_gaussian_iv(
u=None,
mw=0,
mc=0,
md=0,
seed=0,
include_intercept=True,
return_beta=False,
Expand Down Expand Up @@ -171,32 +176,37 @@ def simulate_gaussian_iv(
ux = rng.normal(0, 1, (u, m))
uy = rng.normal(0, 1, (u, 1))

alpha = rng.normal(0, 1, (mc, 1))
alpha = rng.normal(0, 1, (mc + md, 1))
beta = rng.normal(0, 1, (m, 1))

Pi_ZX = rng.normal(0, 1, (k, m))
Pi_CX = rng.normal(0, 1, (mc, m))
Pi_CZ = rng.normal(0, 1, (mc, k))
Pi_CX = rng.normal(0, 1, (mc + md, m))
Pi_CZ = rng.normal(0, 1, (mc + md, k))

U = rng.normal(0, 1, (n, u))
C = rng.normal(0, 1, (n, mc)) + include_intercept * rng.normal(0, 1, (1, mc))
C = rng.normal(0, 1, (n, mc + md))
if include_intercept:
C += rng.normal(0, 1, (1, mc + md))

Z = (
rng.normal(0, 1, (n, k))
+ include_intercept * rng.normal(0, 1, (1, k))
+ C @ Pi_CZ
)
Z = rng.normal(0, 1, (n, k)) + C @ Pi_CZ
if include_intercept:
Z += rng.normal(0, 1, (1, k))

X = Z @ Pi_ZX + C @ Pi_CX + U @ ux
X += rng.normal(0, 1, (n, m)) + include_intercept * rng.normal(0, 1, (1, m))
y = C @ alpha + X @ beta + U @ uy
y += rng.normal(0, 1, (n, 1)) + include_intercept * rng.normal(0, 1, (1, 1))

X, W, C, D = X[:, :mx], X[:, mx:], C[:, :mc], C[:, mc:]

gamma0 = beta[mx:]
beta0 = np.concatenate([beta[:mx], alpha[mc:]])

if return_beta and return_gamma:
return Z, X[:, :mx], y.flatten(), C, X[:, mx:], beta[:mx], beta[mx:]
return Z, X, y.flatten(), C, W, D, beta0, gamma0
elif return_beta:
return Z, X[:, :mx], y.flatten(), C, X[:, mx:], beta[:mx]
return Z, X, y.flatten(), C, W, D, beta0
elif return_gamma:
return Z, X[:, :mx], y.flatten(), C, X[:, mx:], beta[mx:]
return Z, X, y.flatten(), C, W, D, gamma0
else:
return Z, X[:, :mx], y.flatten(), C, X[:, mx:]
return Z, X, y.flatten(), C, W, D
31 changes: 18 additions & 13 deletions ivmodels/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
if not str(self.test).lower() in _TESTS:
raise ValueError(f"Test {self.test} not recognized.")

n = X.shape[0]

test_, inverse_test_ = _TESTS.get(str(self.test).lower())

self.estimates_ = self.kclass.coef_.tolist()
Expand All @@ -102,40 +104,42 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):

(X, Z, C), _ = self.kclass._X_Z_C(X, Z, C, predict=False)

self.feature_names_ = self.kclass.endogenous_names_
# + self.kclass.exogenous_names_
self.feature_names_ = (
self.kclass.endogenous_names_ + self.kclass.exogenous_names_
)

idx = 0
# if self.kclass.fit_intercept:
# self.feature_names_ = ["intercept"] + self.feature_names_
# self.estimates_ = [self.kclass.intercept_] + self.estimates_
# idx -= 1
if self.kclass.fit_intercept:
self.feature_names_ = ["intercept"] + self.feature_names_
self.estimates_ = [self.kclass.intercept_] + self.estimates_
idx -= 1

for name in self.feature_names_:
if name in self.kclass.endogenous_names_:
mask = np.zeros(len(self.kclass.endogenous_names_), dtype=bool)
mask[idx] = True

X_, W_, C_, Z_ = X[:, mask], X[:, ~mask], C, Z
X_, W_, C_, D_ = X[:, mask], X[:, ~mask], C, np.zeros((n, 0))
fit_intercept_ = True

elif name in self.kclass.exogenous_names_:
mask = np.zeros(len(self.kclass.exogenous_names_), dtype=bool)
mask[idx - len(self.kclass.endogenous_names_)] = True

X_, W_, C_, Z_ = C[:, mask], X, C[:, ~mask], np.hstack([Z, C[:, mask]])
X_, W_, C_, D_ = np.zeros((n, 0)), X, C[:, ~mask], C[:, mask]
fit_intercept_ = True

elif name == "intercept":
ones = np.ones((X.shape[0], 1))
X_, W_, C_, Z_ = ones, X, C, np.hstack([Z, ones])
X_, W_, C_, D_ = np.zeros((n, 0)), X, C, np.ones((n, 1))
fit_intercept_ = False

test_result = test_(
Z=Z_,
Z=Z,
X=X_,
W=W_,
y=y,
C=C_,
D=D_,
beta=np.array([0]),
fit_intercept=fit_intercept_,
**kwargs,
Expand All @@ -144,11 +148,12 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs):
self.p_values_.append(test_result[1])

confidence_set = inverse_test_(
Z=Z_,
Z=Z,
X=X_,
W=W_,
y=y,
C=C_,
D=D_,
alpha=self.alpha,
fit_intercept=fit_intercept_,
**kwargs,
Expand All @@ -173,7 +178,7 @@ def __format__(self, format_spec: str) -> str: # noqa D
return "Summary not fitted yet."

def format_p_value(x):
return f"{x:{format_spec}}" if x > 1e-16 else "<1e-16"
return f"{x:{format_spec}}" if np.isnan(x) or x > 1e-16 else "<1e-16"

estimate_str = [f"{e:{format_spec}}" for e in self.estimates_]
statistics_str = [f"{s:{format_spec}}" for s in self.statistics_]
Expand Down
Loading

0 comments on commit 94b8f8d

Please sign in to comment.