From 5d35451b614cce1fff8ec731ccef5a8171b190ab Mon Sep 17 00:00:00 2001 From: Roni Kobrosly Date: Mon, 12 Oct 2020 14:26:06 -0500 Subject: [PATCH] fixed negative treatment bug (#23) --- .travis.yml | 2 +- causal_curve/core.py | 3 ++ causal_curve/gps.py | 91 ++++++++++++++++++++++++++------------------ docs/changelog.rst | 8 ++++ docs/conf.py | 2 +- docs/install.rst | 1 + setup.py | 2 +- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/.travis.yml b/.travis.yml index e60156f..ded57b0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ env: before_install: # Here we download miniconda and install the dependencies -- pip install black coverage future joblib numpy numpydoc pandas patsy progressbar2 pygam pytest python-dateutil python-utils pytz scikit-learn scipy six statsmodels +- pip install black coverage future joblib numpy numpydoc pandas patsy progressbar2 pygam pytest python-dateutil python-utils pytz scikit-learn scipy six sphinx_rtd_theme statsmodels install: - python setup.py install diff --git a/causal_curve/core.py b/causal_curve/core.py index 06772a2..3e0b083 100644 --- a/causal_curve/core.py +++ b/causal_curve/core.py @@ -1,6 +1,7 @@ """ Core classes (with basic methods) that will be invoked when other, model classes are defined """ +import pkg_resources class Core: @@ -24,3 +25,5 @@ def get_params(self): return dict( [(k, v) for k, v in list(attrs.items()) if (k[0] != "_") and (k[-1] != "_")] ) + + __version__ = pkg_resources.require("causal-curve")[0].version diff --git a/causal_curve/gps.py b/causal_curve/gps.py index f801efc..089cfb1 100644 --- a/causal_curve/gps.py +++ b/causal_curve/gps.py @@ -429,43 +429,8 @@ def fit(self, T, X, y): # Create grid_values self.grid_values = self._grid_values() - # Estimating the GPS - self.best_gps_family = self.gps_family - - # If no family specified, pick the best family - if self.gps_family == None: - if self.verbose: - print(f"Fitting several GPS models and picking the best fitting one...") - - ( - self.best_gps_family, - self.gps_function, - self.gps_deviance, - ) = self._find_best_gps_model() - - if self.verbose: - print( - f"Best fitting model was {self.best_gps_family}, which " - f"produced a deviance of {self.gps_deviance}" - ) - - # Otherwise, go with the what the user provided... - else: - if self.verbose: - print(f"Fitting GPS model of family '{self.best_gps_family}'...") - - if self.best_gps_family == "normal": - ( - self.gps_function, - self.gps_deviance, - ) = self._create_normal_gps_function() - elif self.best_gps_family == "lognormal": - ( - self.gps_function, - self.gps_deviance, - ) = self._create_lognormal_gps_function() - elif self.best_gps_family == "gamma": - self.gps_function, self.gps_deviance = self._create_gamma_gps_function() + # Determine which GPS family to use + self._determine_gps_function() # Estimate the GPS if self.verbose: @@ -710,6 +675,58 @@ def _fit_gam(self): lam=self.lambda_, ).fit(X, y) + def _determine_gps_function(self): + """Based on the user input, distribution of treatment values, and/or model deviances, + this function determines which GPS function family should be used. + """ + + # If any negative values in treatment, you must use the normal GLM family. + if any(self.T <= 0): + self.best_gps_family = "normal" + self.gps_function, self.gps_deviance = self._create_normal_gps_function() + if self.verbose: + print( + f"Must fit `normal` GLM family to model treatment since treatment takes on zero or negative values..." + ) + + # If treatment has no negative values and user provides in put, use that. + elif (all(self.T > 0)) & (not isinstance(self.gps_family, type(None))): + if self.verbose: + print(f"Fitting GPS model of family '{self.gps_family}'...") + + if self.gps_family == "normal": + self.best_gps_family = "normal" + ( + self.gps_function, + self.gps_deviance, + ) = self._create_normal_gps_function() + elif self.gps_family == "lognormal": + self.best_gps_family = "lognormal" + ( + self.gps_function, + self.gps_deviance, + ) = self._create_lognormal_gps_function() + elif self.gps_family == "gamma": + self.best_gps_family = "gamma" + self.gps_function, self.gps_deviance = self._create_gamma_gps_function() + + # If no zero or negative treatment values and user didn't provide input, figure out best-fitting family + elif (all(self.T > 0)) & (isinstance(self.gps_family, type(None))): + if self.verbose: + print(f"Fitting several GPS models and picking the best fitting one...") + + ( + self.best_gps_family, + self.gps_function, + self.gps_deviance, + ) = self._find_best_gps_model() + + if self.verbose: + print( + f"Best fitting model was {self.best_gps_family}, which " + f"produced a deviance of {self.gps_deviance}" + ) + def _create_normal_gps_function(self): """Models the GPS using a GLM of the Gaussian family""" normal_gps_model = sm.GLM( diff --git a/docs/changelog.rst b/docs/changelog.rst index 7908bcd..2c35e5b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,14 @@ Change Log ========== + +Version 0.4.1 +------------- +- When using GPS tool with a treatment with negative values, only the normal GLM family can be picked +- Added 'sphinx_rtd_theme' to dependency list in `.travis.yml` and `install.rst` +- core.py base class now has __version__ attribute + + Version 0.4.0 ------------- - Added support for binary outcomes in GPS tool diff --git a/docs/conf.py b/docs/conf.py index 4181576..39e1cab 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'Roni Kobrosly' # The full version, including alpha/beta/rc tags -release = '0.4.0' +release = '0.4.1' # -- General configuration --------------------------------------------------- diff --git a/docs/install.rst b/docs/install.rst index b683e34..962b165 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -26,6 +26,7 @@ causal-curve requires: - scikit-learn - scipy - six +- sphinx_rtd_theme - statsmodels diff --git a/setup.py b/setup.py index 8d44cf9..cb8282f 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="causal-curve", - version="0.4.0", + version="0.4.1", author="Roni Kobrosly", author_email="roni.kobrosly@gmail.com", description="A python library with tools to perform causal inference using \