From 9e1bbcecfc1c09ecbe81675d001fc9e8af59fa39 Mon Sep 17 00:00:00 2001 From: Timothy Willard Date: Fri, 28 Jun 2024 08:39:46 -0400 Subject: [PATCH 1/2] Reduced multiple pd.concat calls into one Consolidated multiple pd.concat calls into one in compute_all_multioutcomes building hpar df. Addresses pandas FutureWarning in concating an empty df and slightly more performant. --- flepimop/gempyor_pkg/src/gempyor/outcomes.py | 71 +++++++++----------- 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/outcomes.py b/flepimop/gempyor_pkg/src/gempyor/outcomes.py index 8d81df565..ae35b5d95 100644 --- a/flepimop/gempyor_pkg/src/gempyor/outcomes.py +++ b/flepimop/gempyor_pkg/src/gempyor/outcomes.py @@ -307,7 +307,7 @@ def compute_all_multioutcomes( bypass_seir_xr: xr.Dataset = None, ): """Compute delay frame based on temporally varying input. We load the seir sim corresponding to sim_id to write""" - hpar = pd.DataFrame(columns=["subpop", "quantity", "outcome", "value"]) + hpar_list = [] all_data = {} dates = pd.date_range(modinf.ti, modinf.tf, freq="D") @@ -381,29 +381,24 @@ def compute_all_multioutcomes( probabilities = np.repeat(probabilities[:, np.newaxis], len(dates), axis=1).T # duplicate in time delays = np.repeat(delays[:, np.newaxis], len(dates), axis=1).T # duplicate in time delays = np.round(delays).astype(int) - # write hpar before NPI - hpar = pd.concat( - [ - hpar, - pd.DataFrame.from_dict( - { - "subpop": modinf.subpop_struct.subpop_names, - "quantity": ["probability"] * len(modinf.subpop_struct.subpop_names), - "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names), - "value": probabilities[0] * np.ones(len(modinf.subpop_struct.subpop_names)), - } - ), - pd.DataFrame.from_dict( - { - "subpop": modinf.subpop_struct.subpop_names, - "quantity": ["delay"] * len(modinf.subpop_struct.subpop_names), - "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names), - "value": delays[0] * np.ones(len(modinf.subpop_struct.subpop_names)), - } + # Write hpar before NPI + subpop_names_len = len(modinf.subpop_struct.subpop_names) + hpar = pd.DataFrame( + { + "subpop": 2 * modinf.subpop_struct.subpop_names, + "quantity": (subpop_names_len * ["probability"]) + + (subpop_names_len * ["delay"]), + "outcome": 2 * subpop_names_len * [new_comp], + "value": np.concatenate( + ( + probabilities[0] * np.ones(subpop_names_len), + delays[0] * np.ones(subpop_names_len), + ) ), - ], - axis=0, - ) + } + ).set_index(pd.Index(2 * list(range(0, subpop_names_len)))) + hpar_list.append(hpar) + # Now tackle NPI if npi is not None: delays = NPI.reduce_parameter( parameter=delays, @@ -444,22 +439,15 @@ def compute_all_multioutcomes( ) # one draw per subpop durations = np.repeat(durations[:, np.newaxis], len(dates), axis=1).T # duplicate in time durations = np.round(durations).astype(int) - - hpar = pd.concat( - [ - hpar, - pd.DataFrame.from_dict( - { - "subpop": modinf.subpop_struct.subpop_names, - "quantity": ["duration"] * len(modinf.subpop_struct.subpop_names), - "outcome": [new_comp] * len(modinf.subpop_struct.subpop_names), - "value": durations[0] * np.ones(len(modinf.subpop_struct.subpop_names)), - } - ), - ], - axis=0, + hpar = pd.DataFrame( + data={ + "subpop": modinf.subpop_struct.subpop_names, + "quantity": subpop_names_len * ["duration"], + "outcome": subpop_names_len * [new_comp], + "value": durations[0] * np.ones(subpop_names_len), + } ) - + hpar_list.append(hpar) if npi is not None: # import matplotlib.pyplot as plt # plt.imshow(durations) @@ -506,7 +494,12 @@ def compute_all_multioutcomes( all_data[new_comp] = sum_outcome df_p = dataframe_from_array(sum_outcome, modinf.subpop_struct.subpop_names, dates, new_comp) outcomes = pd.merge(outcomes, df_p) - + # Concat our hpar dataframes + hpar = ( + pd.concat(hpar_list) + if hpar_list + else pd.DataFrame(columns=["subpop", "quantity", "outcome", "value"]) + ) return outcomes, hpar From 88d6559d360d212d8749de8a2c0383282bbd8de4 Mon Sep 17 00:00:00 2001 From: Timothy Willard Date: Thu, 11 Jul 2024 08:21:55 -0400 Subject: [PATCH 2/2] Remove unneeded `set_index` `set_index` call maintained prior behavior of creating an index like 0,1,...,N,0,1,...,N. Now the index goes 0,1,...,2N. This index does not get used so it is a harmless breaking change. --- flepimop/gempyor_pkg/src/gempyor/outcomes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/outcomes.py b/flepimop/gempyor_pkg/src/gempyor/outcomes.py index ae35b5d95..5563f4d85 100644 --- a/flepimop/gempyor_pkg/src/gempyor/outcomes.py +++ b/flepimop/gempyor_pkg/src/gempyor/outcomes.py @@ -396,7 +396,7 @@ def compute_all_multioutcomes( ) ), } - ).set_index(pd.Index(2 * list(range(0, subpop_names_len)))) + ) hpar_list.append(hpar) # Now tackle NPI if npi is not None: