diff --git a/.gitignore b/.gitignore index db7e7fcf9..b1caaf8a8 100644 --- a/.gitignore +++ b/.gitignore @@ -36,7 +36,7 @@ model_output/ /*.Rcheck/ # RStudio files -.Rproj.user/ +.Rproj.user flepiMoP.Rproj *.Rproj @@ -64,7 +64,8 @@ packrat/lib*/ dist/ SEIR.egg-info/ Outcomes.egg-info/ -.Rproj.user +venv/ +.venv/ # R package manuals man/ @@ -74,3 +75,6 @@ flepimop/gempyor_pkg/get_value.prof flepimop/gempyor_pkg/tests/seir/.coverage flepimop/gempyor_pkg/tests/seir/.coverage.kojis-mbp-8.sph.ad.jhsph.edu.90615.974746 flepimop/gempyor_pkg/.coverage + +# Environment variables +.env diff --git a/flepimop/gempyor_pkg/setup.cfg b/flepimop/gempyor_pkg/setup.cfg index f63f130da..e5fb0902a 100644 --- a/flepimop/gempyor_pkg/setup.cfg +++ b/flepimop/gempyor_pkg/setup.cfg @@ -42,7 +42,9 @@ install_requires = test = pytest mock - +aws = + boto3 + botocore [options.entry_points] console_scripts = diff --git a/flepimop/gempyor_pkg/src/gempyor/outcomes.py b/flepimop/gempyor_pkg/src/gempyor/outcomes.py index 8d81df565..5563f4d85 100644 --- a/flepimop/gempyor_pkg/src/gempyor/outcomes.py +++ b/flepimop/gempyor_pkg/src/gempyor/outcomes.py @@ -307,7 +307,7 @@ def compute_all_multioutcomes( bypass_seir_xr: xr.Dataset = None, ): """Compute delay frame based on temporally varying input. We load the seir sim corresponding to sim_id to write""" - hpar = pd.DataFrame(columns=["subpop", "quantity", "outcome", "value"]) + hpar_list = [] all_data = {} dates = pd.date_range(modinf.ti, modinf.tf, freq="D") @@ -381,29 +381,24 @@ def compute_all_multioutcomes( probabilities = np.repeat(probabilities[:, np.newaxis], len(dates), axis=1).T # duplicate in time delays = np.repeat(delays[:, np.newaxis], len(dates), axis=1).T # duplicate in time delays = np.round(delays).astype(int) - # write hpar before NPI - hpar = pd.concat( - [ - hpar, - pd.DataFrame.from_dict( - { - "subpop": modinf.subpop_struct.subpop_names, - "quantity": ["probability"] * len(modinf.subpop_struct.subpop_names), - "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names), - "value": probabilities[0] * np.ones(len(modinf.subpop_struct.subpop_names)), - } - ), - pd.DataFrame.from_dict( - { - "subpop": modinf.subpop_struct.subpop_names, - "quantity": ["delay"] * len(modinf.subpop_struct.subpop_names), - "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names), - "value": delays[0] * np.ones(len(modinf.subpop_struct.subpop_names)), - } + # Write hpar before NPI + subpop_names_len = len(modinf.subpop_struct.subpop_names) + hpar = pd.DataFrame( + { + "subpop": 2 * modinf.subpop_struct.subpop_names, + "quantity": (subpop_names_len * ["probability"]) + + (subpop_names_len * ["delay"]), + "outcome": 2 * subpop_names_len * [new_comp], + "value": np.concatenate( + ( + probabilities[0] * np.ones(subpop_names_len), + delays[0] * np.ones(subpop_names_len), + ) ), - ], - axis=0, + } ) + hpar_list.append(hpar) + # Now tackle NPI if npi is not None: delays = NPI.reduce_parameter( parameter=delays, @@ -444,22 +439,15 @@ def compute_all_multioutcomes( ) # one draw per subpop durations = np.repeat(durations[:, np.newaxis], len(dates), axis=1).T # duplicate in time durations = np.round(durations).astype(int) - - hpar = pd.concat( - [ - hpar, - pd.DataFrame.from_dict( - { - "subpop": modinf.subpop_struct.subpop_names, - "quantity": ["duration"] * len(modinf.subpop_struct.subpop_names), - "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names), - "value": durations[0] * np.ones(len(modinf.subpop_struct.subpop_names)), - } - ), - ], - axis=0, + hpar = pd.DataFrame( + data={ + "subpop": modinf.subpop_struct.subpop_names, + "quantity": subpop_names_len * ["duration"], + "outcome": subpop_names_len * [new_comp], + "value": durations[0] * np.ones(subpop_names_len), + } ) - + hpar_list.append(hpar) if npi is not None: # import matplotlib.pyplot as plt # plt.imshow(durations) @@ -506,7 +494,12 @@ def compute_all_multioutcomes( all_data[new_comp] = sum_outcome df_p = dataframe_from_array(sum_outcome, modinf.subpop_struct.subpop_names, dates, new_comp) outcomes = pd.merge(outcomes, df_p) - + # Concat our hpar dataframes + hpar = ( + pd.concat(hpar_list) + if hpar_list + else pd.DataFrame(columns=["subpop", "quantity", "outcome", "value"]) + ) return outcomes, hpar diff --git a/flepimop/gempyor_pkg/src/gempyor/utils.py b/flepimop/gempyor_pkg/src/gempyor/utils.py index 85820d5b2..990acdf30 100644 --- a/flepimop/gempyor_pkg/src/gempyor/utils.py +++ b/flepimop/gempyor_pkg/src/gempyor/utils.py @@ -9,8 +9,6 @@ import time from typing import List, Dict, Literal -import boto3 -from botocore.exceptions import ClientError import confuse import numpy as np import numpy.typing as npt @@ -347,7 +345,7 @@ def as_random_distribution(self): def list_filenames( - folder: str | bytes | os.PathLike = ".", + folder: str | bytes | os.PathLike = ".", filters: str | list[str] = [], ) -> list[str]: """Return the list of all filenames and paths in the provided folder. @@ -636,12 +634,19 @@ def download_file_from_s3(name_map: Dict[str, str]) -> None: >>> download_file_from_s3(name_map) # This will raise a ValueError indicating the invalid S3 URI format. """ + try: + import boto3 + from botocore.exceptions import ClientError + except ModuleNotFoundError: + raise ModuleNotFoundError(( + "No module named 'boto3', which is required for " + "gempyor.utils.download_file_from_s3. Please install the aws target." + )) s3 = boto3.client("s3") first_output_filename = next(iter(name_map.values())) output_dir = os.path.dirname(first_output_filename) if not os.path.exists(output_dir): os.makedirs(output_dir) - for s3_uri in name_map: try: if s3_uri.startswith("s3://"):