From 9e1bbcecfc1c09ecbe81675d001fc9e8af59fa39 Mon Sep 17 00:00:00 2001
From: Timothy Willard <timothywillard22@gmail.com>
Date: Fri, 28 Jun 2024 08:39:46 -0400
Subject: [PATCH 1/2] Reduced multiple pd.concat calls into one

Consolidated multiple pd.concat calls into one in
compute_all_multioutcomes building hpar df. Addresses pandas
FutureWarning in concating an empty df and slightly more performant.
---
 flepimop/gempyor_pkg/src/gempyor/outcomes.py | 71 +++++++++-----------
 1 file changed, 32 insertions(+), 39 deletions(-)

diff --git a/flepimop/gempyor_pkg/src/gempyor/outcomes.py b/flepimop/gempyor_pkg/src/gempyor/outcomes.py
index 8d81df565..ae35b5d95 100644
--- a/flepimop/gempyor_pkg/src/gempyor/outcomes.py
+++ b/flepimop/gempyor_pkg/src/gempyor/outcomes.py
@@ -307,7 +307,7 @@ def compute_all_multioutcomes(
     bypass_seir_xr: xr.Dataset = None,
 ):
     """Compute delay frame based on temporally varying input. We load the seir sim corresponding to sim_id to write"""
-    hpar = pd.DataFrame(columns=["subpop", "quantity", "outcome", "value"])
+    hpar_list = []
     all_data = {}
     dates = pd.date_range(modinf.ti, modinf.tf, freq="D")
 
@@ -381,29 +381,24 @@ def compute_all_multioutcomes(
             probabilities = np.repeat(probabilities[:, np.newaxis], len(dates), axis=1).T  # duplicate in time
             delays = np.repeat(delays[:, np.newaxis], len(dates), axis=1).T  # duplicate in time
             delays = np.round(delays).astype(int)
-            # write hpar before NPI
-            hpar = pd.concat(
-                [
-                    hpar,
-                    pd.DataFrame.from_dict(
-                        {
-                            "subpop": modinf.subpop_struct.subpop_names,
-                            "quantity": ["probability"] * len(modinf.subpop_struct.subpop_names),
-                            "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names),
-                            "value": probabilities[0] * np.ones(len(modinf.subpop_struct.subpop_names)),
-                        }
-                    ),
-                    pd.DataFrame.from_dict(
-                        {
-                            "subpop": modinf.subpop_struct.subpop_names,
-                            "quantity": ["delay"] * len(modinf.subpop_struct.subpop_names),
-                            "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names),
-                            "value": delays[0] * np.ones(len(modinf.subpop_struct.subpop_names)),
-                        }
+            # Write hpar before NPI
+            subpop_names_len = len(modinf.subpop_struct.subpop_names)
+            hpar = pd.DataFrame(
+                {
+                    "subpop": 2 * modinf.subpop_struct.subpop_names,
+                    "quantity": (subpop_names_len * ["probability"])
+                        + (subpop_names_len * ["delay"]),
+                    "outcome": 2 * subpop_names_len * [new_comp],
+                    "value": np.concatenate(
+                        (
+                            probabilities[0] * np.ones(subpop_names_len),
+                            delays[0] * np.ones(subpop_names_len),
+                        )
                     ),
-                ],
-                axis=0,
-            )
+                }
+            ).set_index(pd.Index(2 * list(range(0, subpop_names_len))))
+            hpar_list.append(hpar)
+            # Now tackle NPI
             if npi is not None:
                 delays = NPI.reduce_parameter(
                     parameter=delays,
@@ -444,22 +439,15 @@ def compute_all_multioutcomes(
                     )  # one draw per subpop
                 durations = np.repeat(durations[:, np.newaxis], len(dates), axis=1).T  # duplicate in time
                 durations = np.round(durations).astype(int)
-
-                hpar = pd.concat(
-                    [
-                        hpar,
-                        pd.DataFrame.from_dict(
-                            {
-                                "subpop": modinf.subpop_struct.subpop_names,
-                                "quantity": ["duration"] * len(modinf.subpop_struct.subpop_names),
-                                "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names),
-                                "value": durations[0] * np.ones(len(modinf.subpop_struct.subpop_names)),
-                            }
-                        ),
-                    ],
-                    axis=0,
+                hpar = pd.DataFrame(
+                    data={
+                        "subpop": modinf.subpop_struct.subpop_names,
+                        "quantity": subpop_names_len * ["duration"],
+                        "outcome": subpop_names_len * [new_comp],
+                        "value": durations[0] * np.ones(subpop_names_len),
+                    }
                 )
-
+                hpar_list.append(hpar)
                 if npi is not None:
                     # import matplotlib.pyplot as plt
                     # plt.imshow(durations)
@@ -506,7 +494,12 @@ def compute_all_multioutcomes(
             all_data[new_comp] = sum_outcome
             df_p = dataframe_from_array(sum_outcome, modinf.subpop_struct.subpop_names, dates, new_comp)
             outcomes = pd.merge(outcomes, df_p)
-
+    # Concat our hpar dataframes
+    hpar = (
+        pd.concat(hpar_list)
+        if hpar_list
+        else pd.DataFrame(columns=["subpop", "quantity", "outcome", "value"])
+    )
     return outcomes, hpar
 
 

From 88d6559d360d212d8749de8a2c0383282bbd8de4 Mon Sep 17 00:00:00 2001
From: Timothy Willard <timothywillard22@gmail.com>
Date: Thu, 11 Jul 2024 08:21:55 -0400
Subject: [PATCH 2/2] Remove unneeded `set_index`

`set_index` call maintained prior behavior of creating an index like
0,1,...,N,0,1,...,N. Now the index goes 0,1,...,2N. This index does not
get used so it is a harmless breaking change.
---
 flepimop/gempyor_pkg/src/gempyor/outcomes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flepimop/gempyor_pkg/src/gempyor/outcomes.py b/flepimop/gempyor_pkg/src/gempyor/outcomes.py
index ae35b5d95..5563f4d85 100644
--- a/flepimop/gempyor_pkg/src/gempyor/outcomes.py
+++ b/flepimop/gempyor_pkg/src/gempyor/outcomes.py
@@ -396,7 +396,7 @@ def compute_all_multioutcomes(
                         )
                     ),
                 }
-            ).set_index(pd.Index(2 * list(range(0, subpop_names_len))))
+            )
             hpar_list.append(hpar)
             # Now tackle NPI
             if npi is not None: