From 985bb9ebd9a73f6a5a0801bf77ed50e9d799b3f8 Mon Sep 17 00:00:00 2001 From: Roni Kobrosly Date: Sun, 5 Jul 2020 08:53:35 -0500 Subject: [PATCH] small big fixes for all classes --- causal_curve/gps.py | 62 ++++++++++++++++----------------- causal_curve/mediation.py | 72 +++++++++++++++++++-------------------- causal_curve/tmle.py | 34 +++++++++--------- docs/changelog.rst | 7 ++++ docs/conf.py | 2 +- setup.py | 2 +- 6 files changed, 93 insertions(+), 86 deletions(-) diff --git a/causal_curve/gps.py b/causal_curve/gps.py index 41a4d6b..5946f4f 100644 --- a/causal_curve/gps.py +++ b/causal_curve/gps.py @@ -189,29 +189,29 @@ def _validate_init_params(self): # Checks for gps_family param if not isinstance(self.gps_family, (str, type(None))): raise TypeError( - f"gps_family parameter must be a string or None, \ - but found type {type(self.gps_family)}" + f"gps_family parameter must be a string or None " + f"but found type {type(self.gps_family)}" ) if (isinstance(self.gps_family, str)) and ( self.gps_family not in ["normal", "lognormal", "gamma"] ): raise ValueError( - f"gps_family parameter must take on values of \ - 'normal', 'lognormal', or 'gamma', but found {self.gps_family}" + f"gps_family parameter must take on values of " + f"'normal', 'lognormal', or 'gamma', but found {self.gps_family}" ) # Checks for treatment_grid_num if not isinstance(self.treatment_grid_num, int): raise TypeError( - f"treatment_grid_num parameter must be an integer, \ - but found type {type(self.treatment_grid_num)}" + f"treatment_grid_num parameter must be an integer, " + f"but found type {type(self.treatment_grid_num)}" ) if (isinstance(self.treatment_grid_num, int)) and self.treatment_grid_num < 10: raise ValueError( - f"treatment_grid_num parameter should be >= 10 so your final curve \ - has enough resolution, but found value {self.treatment_grid_num}" + f"treatment_grid_num parameter should be >= 10 so your final curve " + f"has enough resolution, but found value {self.treatment_grid_num}" ) if ( @@ -222,47 +222,47 @@ def _validate_init_params(self): # Checks for lower_grid_constraint if not isinstance(self.lower_grid_constraint, float): raise TypeError( - f"lower_grid_constraint parameter must be a float, \ - but found type {type(self.lower_grid_constraint)}" + f"lower_grid_constraint parameter must be a float, " + f"but found type {type(self.lower_grid_constraint)}" ) if ( isinstance(self.lower_grid_constraint, float) ) and self.lower_grid_constraint < 0: raise ValueError( - f"lower_grid_constraint parameter cannot be < 0, \ - but found value {self.lower_grid_constraint}" + f"lower_grid_constraint parameter cannot be < 0, " + f"but found value {self.lower_grid_constraint}" ) if ( isinstance(self.lower_grid_constraint, float) ) and self.lower_grid_constraint >= 1.0: raise ValueError( - f"lower_grid_constraint parameter cannot >= 1.0, \ - but found value {self.lower_grid_constraint}" + f"lower_grid_constraint parameter cannot >= 1.0, " + f"but found value {self.lower_grid_constraint}" ) # Checks for upper_grid_constraint if not isinstance(self.upper_grid_constraint, float): raise TypeError( - f"upper_grid_constraint parameter must be a float, \ - but found type {type(self.upper_grid_constraint)}" + f"upper_grid_constraint parameter must be a float, " + f"but found type {type(self.upper_grid_constraint)}" ) if ( isinstance(self.upper_grid_constraint, float) ) and self.upper_grid_constraint <= 0: raise ValueError( - f"upper_grid_constraint parameter cannot be <= 0, \ - but found value {self.upper_grid_constraint}" + f"upper_grid_constraint parameter cannot be <= 0, " + f"but found value {self.upper_grid_constraint}" ) if ( isinstance(self.upper_grid_constraint, float) ) and self.upper_grid_constraint > 1.0: raise ValueError( - f"upper_grid_constraint parameter cannot > 1.0, \ - but found value {self.upper_grid_constraint}" + f"upper_grid_constraint parameter cannot > 1.0, " + f"but found value {self.upper_grid_constraint}" ) # Checks for lower_grid_constraint isn't higher than upper_grid_constraint @@ -274,8 +274,8 @@ def _validate_init_params(self): # Checks for spline_order if not isinstance(self.spline_order, int): raise TypeError( - f"spline_order parameter must be an integer, \ - but found type {type(self.spline_order)}" + f"spline_order parameter must be an integer, " + f"but found type {type(self.spline_order)}" ) if (isinstance(self.spline_order, int)) and self.spline_order < 1: @@ -361,8 +361,8 @@ def _validate_fit_data(self): for column in self.X: if not is_numeric_dtype(self.X[column]): raise TypeError( - f"All covariate (X) columns must be int or float type \ - (i.e. must be numeric)" + f"All covariate (X) columns must be int or float type " + f"(i.e. must be numeric)" ) # Checks for Y column @@ -399,9 +399,9 @@ def fit(self, T, X, y): self : object """ - self.T = T - self.X = X - self.y = y + self.T = T.reset_index(drop=True, inplace=False) + self.X = X.reset_index(drop=True, inplace=False) + self.y = y.reset_index(drop=True, inplace=False) # Validate this input data self._validate_fit_data() @@ -425,8 +425,8 @@ def fit(self, T, X, y): if self.verbose: print( - f"Best fitting model was {self.best_gps_family}, which \ - produced a deviance of {self.gps_deviance}" + f"Best fitting model was {self.best_gps_family}, which " + f"produced a deviance of {self.gps_deviance}" ) # Otherwise, go with the what the user provided... @@ -500,8 +500,8 @@ def calculate_CDRC(self, ci=0.95): if self.verbose: print( - f"Generating predictions for each value of treatment grid, \ - and averaging to get the CDRC..." + """Generating predictions for each value of treatment grid, + and averaging to get the CDRC...""" ) # For each column of _cdrc_preds, calculate the mean and confidence interval bounds diff --git a/causal_curve/mediation.py b/causal_curve/mediation.py index b66c783..8d85eb8 100644 --- a/causal_curve/mediation.py +++ b/causal_curve/mediation.py @@ -149,14 +149,14 @@ def _validate_init_params(self): # Checks for treatment_grid_num if not isinstance(self.treatment_grid_num, int): raise TypeError( - f"treatment_grid_num parameter must be an integer, \ - but found type {type(self.treatment_grid_num)}" + f"treatment_grid_num parameter must be an integer, " + f"but found type {type(self.treatment_grid_num)}" ) if (isinstance(self.treatment_grid_num, int)) and self.treatment_grid_num < 4: raise ValueError( - f"treatment_grid_num parameter should be >= 4 so the internal models \ - have enough resolution, but found value {self.treatment_grid_num}" + f"treatment_grid_num parameter should be >= 4 so the internal models " + f"have enough resolution, but found value {self.treatment_grid_num}" ) if (isinstance(self.treatment_grid_num, int)) and self.treatment_grid_num > 100: @@ -165,89 +165,89 @@ def _validate_init_params(self): # Checks for lower_grid_constraint if not isinstance(self.lower_grid_constraint, float): raise TypeError( - f"lower_grid_constraint parameter must be a float, \ - but found type {type(self.lower_grid_constraint)}" + f"lower_grid_constraint parameter must be a float, " + f"but found type {type(self.lower_grid_constraint)}" ) if ( isinstance(self.lower_grid_constraint, float) ) and self.lower_grid_constraint < 0: raise ValueError( - f"lower_grid_constraint parameter cannot be < 0, \ - but found value {self.lower_grid_constraint}" + f"lower_grid_constraint parameter cannot be < 0, " + f"but found value {self.lower_grid_constraint}" ) if ( isinstance(self.lower_grid_constraint, float) ) and self.lower_grid_constraint >= 1.0: raise ValueError( - f"lower_grid_constraint parameter cannot >= 1.0, \ - but found value {self.lower_grid_constraint}" + f"lower_grid_constraint parameter cannot >= 1.0, " + f"but found value {self.lower_grid_constraint}" ) # Checks for upper_grid_constraint if not isinstance(self.upper_grid_constraint, float): raise TypeError( - f"upper_grid_constraint parameter must be a float, \ - but found type {type(self.upper_grid_constraint)}" + f"upper_grid_constraint parameter must be a float, " + f"but found type {type(self.upper_grid_constraint)}" ) if ( isinstance(self.upper_grid_constraint, float) ) and self.upper_grid_constraint <= 0: raise ValueError( - f"upper_grid_constraint parameter cannot be <= 0, \ - but found value {self.upper_grid_constraint}" + f"upper_grid_constraint parameter cannot be <= 0, " + f"but found value {self.upper_grid_constraint}" ) if ( isinstance(self.upper_grid_constraint, float) ) and self.upper_grid_constraint > 1.0: raise ValueError( - f"upper_grid_constraint parameter cannot > 1.0, \ - but found value {self.upper_grid_constraint}" + f"upper_grid_constraint parameter cannot > 1.0, " + f"but found value {self.upper_grid_constraint}" ) # Checks for bootstrap_draws if not isinstance(self.bootstrap_draws, int): raise TypeError( - f"bootstrap_draws parameter must be a int, \ - but found type {type(self.bootstrap_draws)}" + f"bootstrap_draws parameter must be a int, " + f"but found type {type(self.bootstrap_draws)}" ) if (isinstance(self.bootstrap_draws, int)) and self.bootstrap_draws < 100: raise ValueError( - f"bootstrap_draws parameter cannot be < 100, \ - but found value {self.bootstrap_draws}" + f"bootstrap_draws parameter cannot be < 100, " + f"but found value {self.bootstrap_draws}" ) if (isinstance(self.bootstrap_draws, int)) and self.bootstrap_draws > 500000: raise ValueError( - f"bootstrap_draws parameter cannot > 500000, \ - but found value {self.bootstrap_draws}" + f"bootstrap_draws parameter cannot > 500000, " + f"but found value {self.bootstrap_draws}" ) # Checks for bootstrap_replicates if not isinstance(self.bootstrap_replicates, int): raise TypeError( - f"bootstrap_replicates parameter must be a int, \ - but found type {type(self.bootstrap_replicates)}" + f"bootstrap_replicates parameter must be a int, " + f"but found type {type(self.bootstrap_replicates)}" ) if ( isinstance(self.bootstrap_replicates, int) ) and self.bootstrap_replicates < 50: raise ValueError( - f"bootstrap_replicates parameter cannot be < 50, \ - but found value {self.bootstrap_replicates}" + f"bootstrap_replicates parameter cannot be < 50, " + f"but found value {self.bootstrap_replicates}" ) if ( isinstance(self.bootstrap_replicates, int) ) and self.bootstrap_replicates > 100000: raise ValueError( - f"bootstrap_replicates parameter cannot > 100000, \ - but found value {self.bootstrap_replicates}" + f"bootstrap_replicates parameter cannot > 100000, " + f"but found value {self.bootstrap_replicates}" ) # Checks for lower_grid_constraint isn't higher than upper_grid_constraint @@ -259,8 +259,8 @@ def _validate_init_params(self): # Checks for spline_order if not isinstance(self.spline_order, int): raise TypeError( - f"spline_order parameter must be an integer, \ - but found type {type(self.spline_order)}" + f"spline_order parameter must be an integer, " + f"but found type {type(self.spline_order)}" ) if (isinstance(self.spline_order, int)) and self.spline_order < 3: @@ -394,9 +394,9 @@ def fit(self, T, M, y): self : object """ - self.T = T - self.M = M - self.y = y + self.T = T.reset_index(drop=True, inplace=False) + self.M = M.reset_index(drop=True, inplace=False) + self.y = y.reset_index(drop=True, inplace=False) # Validate this input data self._validate_fit_data() @@ -504,7 +504,7 @@ def calculate_mediation(self, ci=0.95): for i in range(0, 1000): bootstrap_overall_means.append( general_indirect.sample( - frac=0.25, replace=True, random_state=self.random_seed + frac=0.25, replace=True ).mean() ) @@ -519,7 +519,7 @@ def calculate_mediation(self, ci=0.95): } ) .round(4) - .clip(lower=0) + .clip(lower=0, upper=1.0) ) total_prop_mean = round(np.array(self.prop_indirect_list).mean(), 4) @@ -586,7 +586,7 @@ def _create_bootstrap_replicate(self): """Creates a single bootstrap replicate from the data """ temp_t = self.T.sample( - n=self.bootstrap_draws, replace=True, random_state=self.random_seed + n=self.bootstrap_draws, replace=True ) temp_m = self.M.iloc[temp_t.index] temp_y = self.y.iloc[temp_t.index] diff --git a/causal_curve/tmle.py b/causal_curve/tmle.py index 1bcee0f..636a5da 100644 --- a/causal_curve/tmle.py +++ b/causal_curve/tmle.py @@ -144,15 +144,15 @@ def _validate_init_params(self): # Checks for treatment_grid_bins if not isinstance(self.treatment_grid_bins, list): raise TypeError( - f"treatment_grid_bins parameter must be a list, \ - but found type {type(self.treatment_grid_bins)}" + f"treatment_grid_bins parameter must be a list, " + f"but found type {type(self.treatment_grid_bins)}" ) for element in self.treatment_grid_bins: if not isinstance(element, (int, float)): raise TypeError( - f"'{element}' in `treatment_grid_bins` list is not of type float or int, \ - it is {type(element)}" + f"'{element}' in `treatment_grid_bins` list is not of type float or int, " + f"it is {type(element)}" ) if len(self.treatment_grid_bins) < 2: @@ -161,8 +161,8 @@ def _validate_init_params(self): # Checks for n_estimators if not isinstance(self.n_estimators, int): raise TypeError( - f"n_estimators parameter must be an integer, \ - but found type {type(self.n_estimators)}" + f"n_estimators parameter must be an integer, " + f"but found type {type(self.n_estimators)}" ) if (self.n_estimators < 10) or (self.n_estimators > 100000): @@ -171,8 +171,8 @@ def _validate_init_params(self): # Checks for learning_rate if not isinstance(self.learning_rate, (int, float)): raise TypeError( - f"learning_rate parameter must be an integer or float, \ - but found type {type(self.learning_rate)}" + f"learning_rate parameter must be an integer or float, " + f"but found type {type(self.learning_rate)}" ) if (self.learning_rate <= 0) or (self.learning_rate >= 1000): @@ -183,8 +183,8 @@ def _validate_init_params(self): # Checks for max_depth if not isinstance(self.max_depth, int): raise TypeError( - f"max_depth parameter must be an integer, \ - but found type {type(self.max_depth)}" + f"max_depth parameter must be an integer, " + f"but found type {type(self.max_depth)}" ) if self.max_depth <= 0: @@ -193,8 +193,8 @@ def _validate_init_params(self): # Checks for gamma if not isinstance(self.gamma, float): raise TypeError( - f"gamma parameter must be a float, \ - but found type {type(self.gamma)}" + f"gamma parameter must be a float, " + f"but found type {type(self.gamma)}" ) if self.gamma <= 0: @@ -233,8 +233,8 @@ def _validate_fit_data(self): for column in self.x_data: if not is_numeric_dtype(self.x_data[column]): raise TypeError( - f"All covariate (X) columns must be int or float type \ - (i.e. must be numeric)" + """All covariate (X) columns must be int or float type + (i.e. must be numeric)""" ) # Checks for Y column @@ -332,9 +332,9 @@ def fit(self, T, X, y): self : object """ - self.t_data = T - self.x_data = X - self.y_data = y + self.t_data = T.reset_index(drop=True, inplace=False) + self.x_data = X.reset_index(drop=True, inplace=False) + self.y_data = y.reset_index(drop=True, inplace=False) # Validate this input data self._validate_fit_data() diff --git a/docs/changelog.rst b/docs/changelog.rst index 68c052d..35e811b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,13 @@ Change Log ========== +Version 0.3.2 +------------- +- Fixed random seed issue with Mediation tool +- Fixed Mediation bootstrap issue. Confidence interval bounded [0,1] +- Fixed issue with all classes not accepting non-sequential indicies in pandas Dataframes/Series +- Class init checks for all classes now print cleaner errors if bad input + Version 0.3.1 ------------- diff --git a/docs/conf.py b/docs/conf.py index d5354ae..698f703 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'Roni Kobrosly' # The full version, including alpha/beta/rc tags -release = '0.3.1' +release = '0.3.2' # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 282c98d..5da9ca7 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="causal-curve", - version="0.3.1", + version="0.3.2", author="Roni Kobrosly", author_email="roni.kobrosly@gmail.com", description="A python library with tools to perform causal inference using \