From eb0e8265f27e8784dc0c301af076a1b3fea5e03a Mon Sep 17 00:00:00 2001 From: Roni Kobrosly Date: Sun, 25 Apr 2021 19:48:54 -0400 Subject: [PATCH] linted tmle_core (#39) --- causal_curve/core.py | 2 +- causal_curve/gps_core.py | 1 - causal_curve/tmle_core.py | 173 +++++++++++++++++++------------------- docs/changelog.rst | 4 + docs/conf.py | 2 +- setup.py | 2 +- 6 files changed, 93 insertions(+), 91 deletions(-) diff --git a/causal_curve/core.py b/causal_curve/core.py index c943a9d..94691e0 100644 --- a/causal_curve/core.py +++ b/causal_curve/core.py @@ -12,7 +12,7 @@ class Core: def __init__(self): pass - __version__ = "1.0.5" + __version__ = "1.0.6" def get_params(self): """Returns a dict of all of the object's user-facing parameters diff --git a/causal_curve/gps_core.py b/causal_curve/gps_core.py index 57bb899..b5f4855 100644 --- a/causal_curve/gps_core.py +++ b/causal_curve/gps_core.py @@ -9,7 +9,6 @@ import pandas as pd from pandas.api.types import is_float_dtype, is_integer_dtype, is_numeric_dtype from pygam import LinearGAM, LogisticGAM, s -from scipy.special import logit from scipy.stats import gamma, norm import statsmodels.api as sm from statsmodels.genmod.families.links import inverse_power as Inverse_Power diff --git a/causal_curve/tmle_core.py b/causal_curve/tmle_core.py index 72e0a56..e957623 100644 --- a/causal_curve/tmle_core.py +++ b/causal_curve/tmle_core.py @@ -118,7 +118,8 @@ class TMLE_Core(Core): ---------- Kennedy EH, Ma Z, McHugh MD, Small DS. Nonparametric methods for doubly robust estimation - of continuous treatment effects. Journal of the Royal Statistical Society, Series B. 79(4), 2017, pp.1229-1245. + of continuous treatment effects. Journal of the Royal Statistical Society, + Series B. 79(4), 2017, pp.1229-1245. van der Laan MJ and Rubin D. Targeted maximum likelihood learning. In: The International Journal of Biostatistics, 2(1), 2006. @@ -362,10 +363,16 @@ def fit(self, T, X, y): # Produce expanded versions of the inputs self.if_verbose_print("Transforming data for the Q-model and G-model") - self.grid_values, self.fully_expanded_x , self.fully_expanded_t_and_x = self._transform_inputs() + ( + self.grid_values, + self.fully_expanded_x, + self.fully_expanded_t_and_x, + ) = self._transform_inputs() # Fit G-model and get relevent predictions - self.if_verbose_print("Fitting G-model and making treatment assignment predictions...") + self.if_verbose_print( + "Fitting G-model and making treatment assignment predictions..." + ) self.g_model_preds, self.g_model_2_preds = self._g_model() # Fit Q-model and get relevent predictions @@ -373,8 +380,13 @@ def fit(self, T, X, y): self.q_model_preds = self._q_model() # Calculating treatment assignment adjustment using G-model's predictions - self.if_verbose_print("Calculating treatment assignment adjustment using G-model's predictions...") - self.n_interpd_values, self.var_n_interpd_values = self._treatment_assignment_correction() + self.if_verbose_print( + "Calculating treatment assignment adjustment using G-model's predictions..." + ) + ( + self.n_interpd_values, + self.var_n_interpd_values, + ) = self._treatment_assignment_correction() # Adjusting outcome using Q-model's predictions self.if_verbose_print("Adjusting outcome using Q-model's predictions...") @@ -382,13 +394,14 @@ def fit(self, T, X, y): # Calculating corrected pseudo-outcome values self.if_verbose_print("Calculating corrected pseudo-outcome values...") - self.pseudo_out = (self.y_data - self.outcome_adjust) / (self.n_interpd_values / self.var_n_interpd_values) + self.expand_outcome_adjust + self.pseudo_out = (self.y_data - self.outcome_adjust) / ( + self.n_interpd_values / self.var_n_interpd_values + ) + self.expand_outcome_adjust # Training final GAM model using pseudo-outcome values self.if_verbose_print("Training final GAM model using pseudo-outcome values...") self.final_gam = self._fit_final_gam() - def calculate_CDRC(self, ci=0.95): """Using the results of the fitted model, this generates a dataframe of CDRC point estimates at each of the values of the treatment grid. Connecting these estimates will produce @@ -413,7 +426,8 @@ def calculate_CDRC(self, ci=0.95): self._validate_calculate_CDRC_params(ci) - self.if_verbose_print(""" + self.if_verbose_print( + """ Generating predictions for each value of treatment grid, and averaging to get the CDRC...""" ) @@ -423,47 +437,42 @@ def calculate_CDRC(self, ci=0.95): self._cdrc_preds = self._cdrc_predictions_continuous(ci) return pd.DataFrame( - self._cdrc_preds, columns=["Treatment", "Causal_Dose_Response", "Lower_CI", "Upper_CI"] + self._cdrc_preds, + columns=["Treatment", "Causal_Dose_Response", "Lower_CI", "Upper_CI"], ).round(3) - def _transform_inputs(self): """Takes the treatment and covariates and transforms so they can be used by the algo""" # Create treatment grid grid_values = np.linspace( - start=self.t_data.min(), - stop=self.t_data.max(), - num=self.treatment_grid_num + start=self.t_data.min(), stop=self.t_data.max(), num=self.treatment_grid_num ) # Create expanded treatment array expanded_t = np.array([]) for treat_value in grid_values: - expanded_t = np.append(expanded_t, ([treat_value] * self.num_rows)) + expanded_t = np.append(expanded_t, ([treat_value] * self.num_rows)) # Create expanded treatment array with covariates expanded_t_and_x = pd.concat( [ pd.DataFrame(expanded_t), - pd.concat( - [self.x_data] * self.treatment_grid_num - ).reset_index(drop = True, inplace = False), + pd.concat([self.x_data] * self.treatment_grid_num).reset_index( + drop=True, inplace=False + ), ], - axis = 1, - ignore_index = True + axis=1, + ignore_index=True, ) expanded_t_and_x.columns = [self.treatment_col_name] + self.covariate_col_names fully_expanded_t_and_x = pd.concat( - [ - pd.concat([self.x_data, self.t_data], axis=1), - expanded_t_and_x - ], - axis = 0, - ignore_index = True + [pd.concat([self.x_data, self.t_data], axis=1), expanded_t_and_x], + axis=0, + ignore_index=True, ) fully_expanded_x = fully_expanded_t_and_x[self.covariate_col_names] @@ -480,16 +489,16 @@ def _g_model(self): n_estimators=self.n_estimators, max_depth=self.max_depth, learning_rate=self.learning_rate, - random_state=self.random_seed - ).fit(X = X, y = t) + random_state=self.random_seed, + ).fit(X=X, y=t) g_model_preds = g_model.predict(self.fully_expanded_x) g_model2 = GradientBoostingRegressor( n_estimators=self.n_estimators, max_depth=self.max_depth, learning_rate=self.learning_rate, - random_state=self.random_seed - ).fit(X = X, y = ((t - g_model_preds[0:self.num_rows])**2)) + random_state=self.random_seed, + ).fit(X=X, y=((t - g_model_preds[0 : self.num_rows]) ** 2)) g_model_2_preds = g_model2.predict(self.fully_expanded_x) return g_model_preds, g_model_2_preds @@ -506,105 +515,95 @@ def _q_model(self): n_estimators=self.n_estimators, max_depth=self.max_depth, learning_rate=self.learning_rate, - random_state=self.random_seed - ).fit(X = X, y = y) + random_state=self.random_seed, + ).fit(X=X, y=y) q_model_preds = q_model.predict(self.fully_expanded_t_and_x) return q_model_preds - def _treatment_assignment_correction(self): - """Uses the G-model and its predictions to adjust treatment assignment - """ + """Uses the G-model and its predictions to adjust treatment assignment""" t_standard = ( - (self.fully_expanded_t_and_x[self.treatment_col_name] - self.g_model_preds) / np.sqrt(self.g_model_2_preds) + self.fully_expanded_t_and_x[self.treatment_col_name] - self.g_model_preds + ) / np.sqrt(self.g_model_2_preds) + + interpd_values = ( + interp1d( + self.one_dim_estimate_density(t_standard.values)[0], + self.one_dim_estimate_density(t_standard.values[0 : self.num_rows])[1], + kind="linear", + )(t_standard) + / np.sqrt(self.g_model_2_preds) ) - interpd_values = interp1d( - self.one_dim_estimate_density(t_standard.values)[0], - self.one_dim_estimate_density(t_standard.values[0:self.num_rows])[1], - kind='linear' - )(t_standard) / np.sqrt(self.g_model_2_preds) + n_interpd_values = interpd_values[0 : self.num_rows] - n_interpd_values = interpd_values[0:self.num_rows] - - temp_interpd = interpd_values[self.num_rows:] + temp_interpd = interpd_values[self.num_rows :] zeros_mat = np.zeros((self.num_rows, self.treatment_grid_num)) for i in range(0, self.treatment_grid_num): - lower = i * self.num_rows - upper = i * self.num_rows + self.num_rows - zeros_mat[:,i] = temp_interpd[lower:upper] + lower = i * self.num_rows + upper = i * self.num_rows + self.num_rows + zeros_mat[:, i] = temp_interpd[lower:upper] var_n_interpd_values = self.pred_from_loess( - train_x = self.grid_values, - train_y = zeros_mat.mean(axis = 0), - x_to_pred = self.t_data + train_x=self.grid_values, + train_y=zeros_mat.mean(axis=0), + x_to_pred=self.t_data, ) return n_interpd_values, var_n_interpd_values - def _outcome_adjustment(self): - """Uses the Q-model and its predictions to adjust the outcome - """ + """Uses the Q-model and its predictions to adjust the outcome""" - outcome_adjust = self.q_model_preds[0:self.num_rows] + outcome_adjust = self.q_model_preds[0 : self.num_rows] - temp_outcome_adjust = self.q_model_preds[self.num_rows:] + temp_outcome_adjust = self.q_model_preds[self.num_rows :] zero_mat = np.zeros((self.num_rows, self.treatment_grid_num)) for i in range(0, self.treatment_grid_num): - lower = i * self.num_rows - upper = i * self.num_rows + self.num_rows - zero_mat[:,i] = temp_outcome_adjust[lower:upper] + lower = i * self.num_rows + upper = i * self.num_rows + self.num_rows + zero_mat[:, i] = temp_outcome_adjust[lower:upper] expand_outcome_adjust = self.pred_from_loess( - train_x = self.grid_values, - train_y = zero_mat.mean(axis = 0), - x_to_pred = self.t_data + train_x=self.grid_values, + train_y=zero_mat.mean(axis=0), + x_to_pred=self.t_data, ) return outcome_adjust, expand_outcome_adjust def _fit_final_gam(self): - """We now regress the original treatment values against the pseudo-outcome values - """ + """We now regress the original treatment values against the pseudo-outcome values""" return LinearGAM( - s(0, n_splines=30, spline_order=3), - max_iter=500, - lam=self.bandwidth - ).fit(self.t_data, y = self.pseudo_out) + s(0, n_splines=30, spline_order=3), max_iter=500, lam=self.bandwidth + ).fit(self.t_data, y=self.pseudo_out) def one_dim_estimate_density(self, series): - """ - Takes in a numpy array, returns grid values for KDE and predicted probabilities - """ - series_grid = np.linspace( - start=series.min(), - stop=series.max(), - num=self.num_rows + """ + Takes in a numpy array, returns grid values for KDE and predicted probabilities + """ + series_grid = np.linspace( + start=series.min(), stop=series.max(), num=self.num_rows ) - kde = KernelDensity( - kernel='gaussian', - bandwidth=self.bandwidth - ).fit(series.reshape(-1, 1)) + kde = KernelDensity(kernel="gaussian", bandwidth=self.bandwidth).fit( + series.reshape(-1, 1) + ) - return series_grid, np.exp(kde.score_samples(series_grid.reshape(-1, 1))) + return series_grid, np.exp(kde.score_samples(series_grid.reshape(-1, 1))) def pred_from_loess(self, train_x, train_y, x_to_pred): - """ - Trains simple loess regression and returns predictions - """ - kr_model = KernelReg( - endog = train_y, - exog = train_x, - var_type = 'c', - bw = [self.bandwidth] + """ + Trains simple loess regression and returns predictions + """ + kr_model = KernelReg( + endog=train_y, exog=train_x, var_type="c", bw=[self.bandwidth] ) - return kr_model.fit(x_to_pred)[0] + return kr_model.fit(x_to_pred)[0] diff --git a/docs/changelog.rst b/docs/changelog.rst index a4c31aa..8c802e7 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,10 @@ Change Log ========== +Version 1.0.6 +------------- +- Latest version of python black can now run. Linted tmle_core.py. + Version 1.0.5 ------------- - Removed `master` branch, replaced with `main` diff --git a/docs/conf.py b/docs/conf.py index 100f3b2..4fb1bb7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,7 +23,7 @@ author = "Roni Kobrosly" # The full version, including alpha/beta/rc tags -release = "1.0.5" +release = "1.0.6" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 36a12a9..6e5b312 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="causal-curve", - version="1.0.5", + version="1.0.6", author="Roni Kobrosly", author_email="roni.kobrosly@gmail.com", description="A python library with tools to perform causal inference using \