From 21816ec8f39a085b306f975a30c7a40fc86ecead Mon Sep 17 00:00:00 2001 From: Sebastian Daza Date: Wed, 11 Dec 2024 17:46:49 +0100 Subject: [PATCH 1/5] regression covariates --- experiment_utils/experiment_analyzer.py | 31 +++++++++++++++++++++---- tests/test_experiment_analyzer.py | 22 ++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/experiment_utils/experiment_analyzer.py b/experiment_utils/experiment_analyzer.py index b3a07bf..57bc260 100644 --- a/experiment_utils/experiment_analyzer.py +++ b/experiment_utils/experiment_analyzer.py @@ -35,7 +35,7 @@ def __init__( instrument_col: str = None, alpha: float = 0.05, regression_covariates: List = None, - assess_overlap=False): + assess_overlap=False): """ Initialize ExperimentAnalyzer @@ -95,6 +95,10 @@ def __check_input(self): if self.data.isEmpty(): log_and_raise_error(self.logger, "Dataframe is empty!") + # impute covariates from regression covariates + if (len(self.covariates) == 0) & (len(self.regression_covariates) > 0): + self.covariates = self.regression_covariates + # regression covariates has to be a subset of covariates if len(self.regression_covariates) > 0: if not set(self.regression_covariates).issubset(set(self.covariates)): @@ -166,6 +170,25 @@ def standardize_covariates(self, data: pd.DataFrame, covariates: List[str]) -> p data[f"z_{covariate}"] = (data[covariate] - data[covariate].mean()) / data[covariate].std() return data + def __create_formula(self, outcome_variable, type: str ='regression'): + """ + Create formula for final regression model + """ + + formula_dict = { + 'regression':f"{outcome_variable} ~ {self.treatment_col}", + 'iv': f"{outcome_variable} ~ 1 + [{self.treatment_col} ~ {self.instrument_col}]" + } + reg_covs = list(set(self.final_covariates) & set(self.regression_covariates)) + + if len(reg_covs) > 0: + zreg_covs = [f"z_{cov}" for cov in reg_covs] + formula = formula_dict[type] + ' + '.join(zreg_covs) + else: + formula = formula_dict[type] + + return formula + def linear_regression(self, data: pd.DataFrame, outcome_variable: str) -> Dict: """ Runs a linear regression of the outcome variable on the treatment variable. @@ -183,7 +206,7 @@ def linear_regression(self, data: pd.DataFrame, outcome_variable: str) -> Dict: Regression results """ - formula = f"{outcome_variable} ~ {self.treatment_col}" + formula = self.__create_formula(outcome_variable=outcome_variable) model = smf.ols(formula, data=data) results = model.fit(cov_type="HC3") @@ -223,7 +246,7 @@ def weighted_least_squares(self, data: pd.DataFrame, outcome_variable: str) -> D Regression results """ - formula = f"{outcome_variable} ~ 1 + {self.treatment_col}" + formula = self.__create_formula(outcome_variable=outcome_variable) model = smf.wls( formula, data=data, @@ -270,7 +293,7 @@ def iv_regression(self, data: pd.DataFrame, outcome_variable: str) -> Dict: if not self.instrument_col: log_and_raise_error(self.logger, "Instrument column must be specified for IV adjustment") - formula = f"{outcome_variable} ~ 1 + [{self.treatment_col} ~ {self.instrument_col}]" + formula = self.__create_formula(outcome_variable=outcome_variable, type='iv') model = IV2SLS.from_formula(formula, data) results = model.fit(cov_type='robust') diff --git a/tests/test_experiment_analyzer.py b/tests/test_experiment_analyzer.py index 5c9781e..2d65ab5 100644 --- a/tests/test_experiment_analyzer.py +++ b/tests/test_experiment_analyzer.py @@ -94,6 +94,28 @@ def test_no_covariates(sample_data): pytest.fail(f" raised an exception: {e}") +def test_regression_covariates(sample_data): + """Test get_effects regression covariates""" + outcomes = "conversion" + treatment_col = "treatment" + experiment_identifier = "experiment" + regression_covariates = "baseline_conversion" + + analyzer = ExperimentAnalyzer( + data=sample_data, + outcomes=outcomes, + treatment_col=treatment_col, + experiment_identifier=experiment_identifier, + regression_covariates=regression_covariates) + + try: + analyzer.get_effects() + analyzer.results + assert True + except Exception as e: + pytest.fail(f" raised an exception: {e}") + + def test_no_adjustment(sample_data): """Test get_effects no adjustments""" outcomes = "conversion" From eb6db317e0648350c61bbfa0bc6cb3128ca204cc Mon Sep 17 00:00:00 2001 From: Sebastian Daza Date: Wed, 11 Dec 2024 17:50:54 +0100 Subject: [PATCH 2/5] fixing flake8 --- .github/workflows/ci.yaml | 2 +- experiment_utils/experiment_analyzer.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0c2acc8..26b98e6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -35,7 +35,7 @@ jobs: # Install flake8 pip install flake8 # Run flake8 - flake8 . --ignore=E501,F401,F403,F405,W504 + flake8 . --ignore=E501,F401,F403,F405,W504,E125 - name: Run tests run: | diff --git a/experiment_utils/experiment_analyzer.py b/experiment_utils/experiment_analyzer.py index 57bc260..9d3a393 100644 --- a/experiment_utils/experiment_analyzer.py +++ b/experiment_utils/experiment_analyzer.py @@ -35,7 +35,7 @@ def __init__( instrument_col: str = None, alpha: float = 0.05, regression_covariates: List = None, - assess_overlap=False): + assess_overlap=False): """ Initialize ExperimentAnalyzer @@ -170,13 +170,13 @@ def standardize_covariates(self, data: pd.DataFrame, covariates: List[str]) -> p data[f"z_{covariate}"] = (data[covariate] - data[covariate].mean()) / data[covariate].std() return data - def __create_formula(self, outcome_variable, type: str ='regression'): + def __create_formula(self, outcome_variable, type: str = 'regression'): """ Create formula for final regression model """ formula_dict = { - 'regression':f"{outcome_variable} ~ {self.treatment_col}", + 'regression': f"{outcome_variable} ~ {self.treatment_col}", 'iv': f"{outcome_variable} ~ 1 + [{self.treatment_col} ~ {self.instrument_col}]" } reg_covs = list(set(self.final_covariates) & set(self.regression_covariates)) From 9085b5ed2ff68e166c7767445669067996201d36 Mon Sep 17 00:00:00 2001 From: sdaza Date: Wed, 11 Dec 2024 20:23:08 +0000 Subject: [PATCH 3/5] update --- experiment_utils/experiment_analyzer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experiment_utils/experiment_analyzer.py b/experiment_utils/experiment_analyzer.py index 9d3a393..05498ca 100644 --- a/experiment_utils/experiment_analyzer.py +++ b/experiment_utils/experiment_analyzer.py @@ -35,7 +35,7 @@ def __init__( instrument_col: str = None, alpha: float = 0.05, regression_covariates: List = None, - assess_overlap=False): + assess_overlap=False): """ Initialize ExperimentAnalyzer @@ -176,7 +176,7 @@ def __create_formula(self, outcome_variable, type: str = 'regression'): """ formula_dict = { - 'regression': f"{outcome_variable} ~ {self.treatment_col}", + 'regression': f"{outcome_variable} ~ 1 + {self.treatment_col}", 'iv': f"{outcome_variable} ~ 1 + [{self.treatment_col} ~ {self.instrument_col}]" } reg_covs = list(set(self.final_covariates) & set(self.regression_covariates)) From a06a921aed586a9e6eecd54c568c157139d1ae19 Mon Sep 17 00:00:00 2001 From: sdaza Date: Wed, 11 Dec 2024 20:39:21 +0000 Subject: [PATCH 4/5] update --- experiment_utils/experiment_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment_utils/experiment_analyzer.py b/experiment_utils/experiment_analyzer.py index 05498ca..bb3593d 100644 --- a/experiment_utils/experiment_analyzer.py +++ b/experiment_utils/experiment_analyzer.py @@ -183,7 +183,7 @@ def __create_formula(self, outcome_variable, type: str = 'regression'): if len(reg_covs) > 0: zreg_covs = [f"z_{cov}" for cov in reg_covs] - formula = formula_dict[type] + ' + '.join(zreg_covs) + formula = formula_dict[type] + ' + ' + ' + '.join(zreg_covs) else: formula = formula_dict[type] From 8d672c3e466ba8a211ef78a7b3dbd0634daa1e8f Mon Sep 17 00:00:00 2001 From: Sebastian Daza Date: Wed, 11 Dec 2024 21:44:28 +0100 Subject: [PATCH 5/5] update --- experiment_utils/experiment_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment_utils/experiment_analyzer.py b/experiment_utils/experiment_analyzer.py index bb3593d..3a8e0bd 100644 --- a/experiment_utils/experiment_analyzer.py +++ b/experiment_utils/experiment_analyzer.py @@ -183,7 +183,7 @@ def __create_formula(self, outcome_variable, type: str = 'regression'): if len(reg_covs) > 0: zreg_covs = [f"z_{cov}" for cov in reg_covs] - formula = formula_dict[type] + ' + ' + ' + '.join(zreg_covs) + formula = formula_dict[type] + ' + ' + ' + '.join(zreg_covs) else: formula = formula_dict[type]