From 708cc4190f53a792bb53c2f18a46dad394942842 Mon Sep 17 00:00:00 2001 From: Timothy Willard Date: Fri, 21 Jun 2024 16:26:34 -0400 Subject: [PATCH 1/7] Added missing boto3 dependency for gempyor, GH-238 --- flepimop/gempyor_pkg/setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/flepimop/gempyor_pkg/setup.cfg b/flepimop/gempyor_pkg/setup.cfg index f63f130da..f93c20221 100644 --- a/flepimop/gempyor_pkg/setup.cfg +++ b/flepimop/gempyor_pkg/setup.cfg @@ -35,6 +35,7 @@ install_requires = dask scipy graphviz + boto3 # see https://stackoverflow.com/questions/58826164/dependencies-requirements-for-setting-up-testing-and-installing-a-python-lib # installed for pip install -e ".[test]" From 0b32f2e2bd32a33b85a6751278fbbabed08de835 Mon Sep 17 00:00:00 2001 From: Timothy Willard Date: Fri, 21 Jun 2024 16:33:36 -0400 Subject: [PATCH 2/7] Moved .Rproj.user gitignore to the right place, added venv & .venv for isolated installs of gempyor. --- .gitignore | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index db7e7fcf9..9cc3a7c09 100644 --- a/.gitignore +++ b/.gitignore @@ -36,7 +36,7 @@ model_output/ /*.Rcheck/ # RStudio files -.Rproj.user/ +.Rproj.user flepiMoP.Rproj *.Rproj @@ -64,7 +64,8 @@ packrat/lib*/ dist/ SEIR.egg-info/ Outcomes.egg-info/ -.Rproj.user +venv/ +.venv/ # R package manuals man/ From 7a832793cd8c76d9b38c634fec0579ddcdd700bc Mon Sep 17 00:00:00 2001 From: Timothy Willard Date: Tue, 25 Jun 2024 15:57:10 -0400 Subject: [PATCH 3/7] Made boto3/botocore optional installs and moved boto3/botocore imports into gempyor.utils.download_file_from_s3, GH-238 --- flepimop/gempyor_pkg/setup.cfg | 5 +++-- flepimop/gempyor_pkg/src/gempyor/utils.py | 11 ++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/flepimop/gempyor_pkg/setup.cfg b/flepimop/gempyor_pkg/setup.cfg index f93c20221..e5fb0902a 100644 --- a/flepimop/gempyor_pkg/setup.cfg +++ b/flepimop/gempyor_pkg/setup.cfg @@ -35,7 +35,6 @@ install_requires = dask scipy graphviz - boto3 # see https://stackoverflow.com/questions/58826164/dependencies-requirements-for-setting-up-testing-and-installing-a-python-lib # installed for pip install -e ".[test]" @@ -43,7 +42,9 @@ install_requires = test = pytest mock - +aws = + boto3 + botocore [options.entry_points] console_scripts = diff --git a/flepimop/gempyor_pkg/src/gempyor/utils.py b/flepimop/gempyor_pkg/src/gempyor/utils.py index 4d3209061..6c3b45d5a 100644 --- a/flepimop/gempyor_pkg/src/gempyor/utils.py +++ b/flepimop/gempyor_pkg/src/gempyor/utils.py @@ -12,10 +12,8 @@ import subprocess import shutil import logging -import boto3 from gempyor import file_paths from typing import List, Dict -from botocore.exceptions import ClientError logger = logging.getLogger(__name__) @@ -509,12 +507,19 @@ def download_file_from_s3(name_map: Dict[str, str]) -> None: >>> download_file_from_s3(name_map) # This will raise a ValueError indicating the invalid S3 URI format. """ + try: + import boto3 + from botocore.exceptions import ClientError + except: + raise ModuleNotFoundError(( + "No module named 'boto3', which is required for " + "gempyor.utils.download_file_from_s3. Please install the aws target." + )) s3 = boto3.client("s3") first_output_filename = next(iter(name_map.values())) output_dir = os.path.dirname(first_output_filename) if not os.path.exists(output_dir): os.makedirs(output_dir) - for s3_uri in name_map: try: if s3_uri.startswith("s3://"): From bed8d8b537c1cd722a4551262eb4766001df497b Mon Sep 17 00:00:00 2001 From: Timothy Willard Date: Thu, 27 Jun 2024 12:52:32 -0400 Subject: [PATCH 4/7] Change except to only catch ModuleNotFoundError, let other errors raise. --- flepimop/gempyor_pkg/src/gempyor/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/utils.py b/flepimop/gempyor_pkg/src/gempyor/utils.py index 6c3b45d5a..eb2f00da4 100644 --- a/flepimop/gempyor_pkg/src/gempyor/utils.py +++ b/flepimop/gempyor_pkg/src/gempyor/utils.py @@ -510,7 +510,7 @@ def download_file_from_s3(name_map: Dict[str, str]) -> None: try: import boto3 from botocore.exceptions import ClientError - except: + except ModuleNotFoundError: raise ModuleNotFoundError(( "No module named 'boto3', which is required for " "gempyor.utils.download_file_from_s3. Please install the aws target." From 9e1bbcecfc1c09ecbe81675d001fc9e8af59fa39 Mon Sep 17 00:00:00 2001 From: Timothy Willard Date: Fri, 28 Jun 2024 08:39:46 -0400 Subject: [PATCH 5/7] Reduced multiple pd.concat calls into one Consolidated multiple pd.concat calls into one in compute_all_multioutcomes building hpar df. Addresses pandas FutureWarning in concating an empty df and slightly more performant. --- flepimop/gempyor_pkg/src/gempyor/outcomes.py | 71 +++++++++----------- 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/outcomes.py b/flepimop/gempyor_pkg/src/gempyor/outcomes.py index 8d81df565..ae35b5d95 100644 --- a/flepimop/gempyor_pkg/src/gempyor/outcomes.py +++ b/flepimop/gempyor_pkg/src/gempyor/outcomes.py @@ -307,7 +307,7 @@ def compute_all_multioutcomes( bypass_seir_xr: xr.Dataset = None, ): """Compute delay frame based on temporally varying input. We load the seir sim corresponding to sim_id to write""" - hpar = pd.DataFrame(columns=["subpop", "quantity", "outcome", "value"]) + hpar_list = [] all_data = {} dates = pd.date_range(modinf.ti, modinf.tf, freq="D") @@ -381,29 +381,24 @@ def compute_all_multioutcomes( probabilities = np.repeat(probabilities[:, np.newaxis], len(dates), axis=1).T # duplicate in time delays = np.repeat(delays[:, np.newaxis], len(dates), axis=1).T # duplicate in time delays = np.round(delays).astype(int) - # write hpar before NPI - hpar = pd.concat( - [ - hpar, - pd.DataFrame.from_dict( - { - "subpop": modinf.subpop_struct.subpop_names, - "quantity": ["probability"] * len(modinf.subpop_struct.subpop_names), - "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names), - "value": probabilities[0] * np.ones(len(modinf.subpop_struct.subpop_names)), - } - ), - pd.DataFrame.from_dict( - { - "subpop": modinf.subpop_struct.subpop_names, - "quantity": ["delay"] * len(modinf.subpop_struct.subpop_names), - "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names), - "value": delays[0] * np.ones(len(modinf.subpop_struct.subpop_names)), - } + # Write hpar before NPI + subpop_names_len = len(modinf.subpop_struct.subpop_names) + hpar = pd.DataFrame( + { + "subpop": 2 * modinf.subpop_struct.subpop_names, + "quantity": (subpop_names_len * ["probability"]) + + (subpop_names_len * ["delay"]), + "outcome": 2 * subpop_names_len * [new_comp], + "value": np.concatenate( + ( + probabilities[0] * np.ones(subpop_names_len), + delays[0] * np.ones(subpop_names_len), + ) ), - ], - axis=0, - ) + } + ).set_index(pd.Index(2 * list(range(0, subpop_names_len)))) + hpar_list.append(hpar) + # Now tackle NPI if npi is not None: delays = NPI.reduce_parameter( parameter=delays, @@ -444,22 +439,15 @@ def compute_all_multioutcomes( ) # one draw per subpop durations = np.repeat(durations[:, np.newaxis], len(dates), axis=1).T # duplicate in time durations = np.round(durations).astype(int) - - hpar = pd.concat( - [ - hpar, - pd.DataFrame.from_dict( - { - "subpop": modinf.subpop_struct.subpop_names, - "quantity": ["duration"] * len(modinf.subpop_struct.subpop_names), - "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names), - "value": durations[0] * np.ones(len(modinf.subpop_struct.subpop_names)), - } - ), - ], - axis=0, + hpar = pd.DataFrame( + data={ + "subpop": modinf.subpop_struct.subpop_names, + "quantity": subpop_names_len * ["duration"], + "outcome": subpop_names_len * [new_comp], + "value": durations[0] * np.ones(subpop_names_len), + } ) - + hpar_list.append(hpar) if npi is not None: # import matplotlib.pyplot as plt # plt.imshow(durations) @@ -506,7 +494,12 @@ def compute_all_multioutcomes( all_data[new_comp] = sum_outcome df_p = dataframe_from_array(sum_outcome, modinf.subpop_struct.subpop_names, dates, new_comp) outcomes = pd.merge(outcomes, df_p) - + # Concat our hpar dataframes + hpar = ( + pd.concat(hpar_list) + if hpar_list + else pd.DataFrame(columns=["subpop", "quantity", "outcome", "value"]) + ) return outcomes, hpar From 58c312c2b12425c953dd7c311db73faf67a5d1aa Mon Sep 17 00:00:00 2001 From: Timothy Willard Date: Wed, 10 Jul 2024 15:17:48 -0400 Subject: [PATCH 6/7] Add `.env` to `.gitignore` Add dotenv file to gitignore since flepiMoP depends on having FLEPI_PATH and PROJECT_PATH env vars set. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 9cc3a7c09..b1caaf8a8 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,6 @@ flepimop/gempyor_pkg/get_value.prof flepimop/gempyor_pkg/tests/seir/.coverage flepimop/gempyor_pkg/tests/seir/.coverage.kojis-mbp-8.sph.ad.jhsph.edu.90615.974746 flepimop/gempyor_pkg/.coverage + +# Environment variables +.env From 88d6559d360d212d8749de8a2c0383282bbd8de4 Mon Sep 17 00:00:00 2001 From: Timothy Willard Date: Thu, 11 Jul 2024 08:21:55 -0400 Subject: [PATCH 7/7] Remove unneeded `set_index` `set_index` call maintained prior behavior of creating an index like 0,1,...,N,0,1,...,N. Now the index goes 0,1,...,2N. This index does not get used so it is a harmless breaking change. --- flepimop/gempyor_pkg/src/gempyor/outcomes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/outcomes.py b/flepimop/gempyor_pkg/src/gempyor/outcomes.py index ae35b5d95..5563f4d85 100644 --- a/flepimop/gempyor_pkg/src/gempyor/outcomes.py +++ b/flepimop/gempyor_pkg/src/gempyor/outcomes.py @@ -396,7 +396,7 @@ def compute_all_multioutcomes( ) ), } - ).set_index(pd.Index(2 * list(range(0, subpop_names_len)))) + ) hpar_list.append(hpar) # Now tackle NPI if npi is not None: