Merge branch 'main' into readability

HopkinsIDD · Jul 12, 2024 · 77c3cab · 77c3cab
2 parents 66096f1 + ad08cce
commit 77c3cab
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 45 deletions.
diff --git a/.gitignore b/.gitignore
@@ -36,7 +36,7 @@ model_output/
 /*.Rcheck/
 
 # RStudio files
-.Rproj.user/
+.Rproj.user
 flepiMoP.Rproj
 *.Rproj
 
@@ -64,7 +64,8 @@ packrat/lib*/
 dist/
 SEIR.egg-info/
 Outcomes.egg-info/
-.Rproj.user
+venv/
+.venv/
 
 # R package manuals
 man/
@@ -74,3 +75,6 @@ flepimop/gempyor_pkg/get_value.prof
 flepimop/gempyor_pkg/tests/seir/.coverage
 flepimop/gempyor_pkg/tests/seir/.coverage.kojis-mbp-8.sph.ad.jhsph.edu.90615.974746
 flepimop/gempyor_pkg/.coverage
+
+# Environment variables
+.env
diff --git a/flepimop/gempyor_pkg/setup.cfg b/flepimop/gempyor_pkg/setup.cfg
@@ -42,7 +42,9 @@ install_requires =
 test =
     pytest
     mock
-
+aws =
+    boto3
+    botocore
 
 [options.entry_points]
 console_scripts =

diff --git a/flepimop/gempyor_pkg/src/gempyor/outcomes.py b/flepimop/gempyor_pkg/src/gempyor/outcomes.py
@@ -307,7 +307,7 @@ def compute_all_multioutcomes(
     bypass_seir_xr: xr.Dataset = None,
 ):
     """Compute delay frame based on temporally varying input. We load the seir sim corresponding to sim_id to write"""
-    hpar = pd.DataFrame(columns=["subpop", "quantity", "outcome", "value"])
+    hpar_list = []
     all_data = {}
     dates = pd.date_range(modinf.ti, modinf.tf, freq="D")
 
@@ -381,29 +381,24 @@ def compute_all_multioutcomes(
             probabilities = np.repeat(probabilities[:, np.newaxis], len(dates), axis=1).T  # duplicate in time
             delays = np.repeat(delays[:, np.newaxis], len(dates), axis=1).T  # duplicate in time
             delays = np.round(delays).astype(int)
-            # write hpar before NPI
-            hpar = pd.concat(
-                [
-                    hpar,
-                    pd.DataFrame.from_dict(
-                        {
-                            "subpop": modinf.subpop_struct.subpop_names,
-                            "quantity": ["probability"] * len(modinf.subpop_struct.subpop_names),
-                            "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names),
-                            "value": probabilities[0] * np.ones(len(modinf.subpop_struct.subpop_names)),
-                        }
-                    ),
-                    pd.DataFrame.from_dict(
-                        {
-                            "subpop": modinf.subpop_struct.subpop_names,
-                            "quantity": ["delay"] * len(modinf.subpop_struct.subpop_names),
-                            "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names),
-                            "value": delays[0] * np.ones(len(modinf.subpop_struct.subpop_names)),
-                        }
+            # Write hpar before NPI
+            subpop_names_len = len(modinf.subpop_struct.subpop_names)
+            hpar = pd.DataFrame(
+                {
+                    "subpop": 2 * modinf.subpop_struct.subpop_names,
+                    "quantity": (subpop_names_len * ["probability"])
+                        + (subpop_names_len * ["delay"]),
+                    "outcome": 2 * subpop_names_len * [new_comp],
+                    "value": np.concatenate(
+                        (
+                            probabilities[0] * np.ones(subpop_names_len),
+                            delays[0] * np.ones(subpop_names_len),
+                        )
                     ),
-                ],
-                axis=0,
+                }
             )
+            hpar_list.append(hpar)
+            # Now tackle NPI
             if npi is not None:
                 delays = NPI.reduce_parameter(
                     parameter=delays,
@@ -444,22 +439,15 @@ def compute_all_multioutcomes(
                     )  # one draw per subpop
                 durations = np.repeat(durations[:, np.newaxis], len(dates), axis=1).T  # duplicate in time
                 durations = np.round(durations).astype(int)
-
-                hpar = pd.concat(
-                    [
-                        hpar,
-                        pd.DataFrame.from_dict(
-                            {
-                                "subpop": modinf.subpop_struct.subpop_names,
-                                "quantity": ["duration"] * len(modinf.subpop_struct.subpop_names),
-                                "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names),
-                                "value": durations[0] * np.ones(len(modinf.subpop_struct.subpop_names)),
-                            }
-                        ),
-                    ],
-                    axis=0,
+                hpar = pd.DataFrame(
+                    data={
+                        "subpop": modinf.subpop_struct.subpop_names,
+                        "quantity": subpop_names_len * ["duration"],
+                        "outcome": subpop_names_len * [new_comp],
+                        "value": durations[0] * np.ones(subpop_names_len),
+                    }
                 )
-
+                hpar_list.append(hpar)
                 if npi is not None:
                     # import matplotlib.pyplot as plt
                     # plt.imshow(durations)
@@ -506,7 +494,12 @@ def compute_all_multioutcomes(
             all_data[new_comp] = sum_outcome
             df_p = dataframe_from_array(sum_outcome, modinf.subpop_struct.subpop_names, dates, new_comp)
             outcomes = pd.merge(outcomes, df_p)
-
+    # Concat our hpar dataframes
+    hpar = (
+        pd.concat(hpar_list)
+        if hpar_list
+        else pd.DataFrame(columns=["subpop", "quantity", "outcome", "value"])
+    )
     return outcomes, hpar
 
 

diff --git a/flepimop/gempyor_pkg/src/gempyor/utils.py b/flepimop/gempyor_pkg/src/gempyor/utils.py
@@ -9,8 +9,6 @@
 import time
 from typing import List, Dict, Literal
 
-import boto3
-from botocore.exceptions import ClientError
 import confuse
 import numpy as np
 import numpy.typing as npt
@@ -347,7 +345,7 @@ def as_random_distribution(self):
 
 
 def list_filenames(
-    folder: str | bytes | os.PathLike = ".", 
+    folder: str | bytes | os.PathLike = ".",
     filters: str | list[str] = [],
 ) -> list[str]:
     """Return the list of all filenames and paths in the provided folder.
@@ -636,12 +634,19 @@ def download_file_from_s3(name_map: Dict[str, str]) -> None:
         >>> download_file_from_s3(name_map)
         # This will raise a ValueError indicating the invalid S3 URI format.
     """
+    try:
+        import boto3
+        from botocore.exceptions import ClientError
+    except ModuleNotFoundError:
+        raise ModuleNotFoundError((
+            "No module named 'boto3', which is required for "
+            "gempyor.utils.download_file_from_s3. Please install the aws target."
+        ))
     s3 = boto3.client("s3")
     first_output_filename = next(iter(name_map.values()))
     output_dir = os.path.dirname(first_output_filename)
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
-
     for s3_uri in name_map:
         try:
             if s3_uri.startswith("s3://"):
-Original file line number
+Diff line change
@@ Expand Up / @@ -42,7 +42,9 @@ install_requires = @@
     test =
         pytest
         mock
+    aws =
+        boto3
+        botocore
     [options.entry_points]
     console_scripts =
@@ Expand Down @@