From 1eee5275b8afa5fd1dad62abbfb9bdf137c71193 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:43:02 -0500 Subject: [PATCH 1/6] Minor styling edits to `seeding.py` * Added explicit exports, * Reordered imports, * Added general structure comments, and * Refactored all lines to be less than 92 characters. --- flepimop/gempyor_pkg/src/gempyor/seeding.py | 44 +++++++++++++++------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding.py b/flepimop/gempyor_pkg/src/gempyor/seeding.py index bbfeaa07f..760da66bb 100644 --- a/flepimop/gempyor_pkg/src/gempyor/seeding.py +++ b/flepimop/gempyor_pkg/src/gempyor/seeding.py @@ -1,20 +1,27 @@ -from typing import Dict, TYPE_CHECKING +# Exports +__all__ = ("Seeding", "SeedingFactory") + +# Imports +import logging + +import confuse +import numba as nb import numpy as np import pandas as pd -import confuse -import logging + from .simulation_component import SimulationComponent from . import utils -import numba as nb -import os + +# Globals logger = logging.getLogger(__name__) ## TODO: ideally here path_prefix should not be used and all files loaded from modinf +# Internal functionality def _DataFrame2NumbaDict(df, amounts, modinf) -> nb.typed.Dict: if not df["date"].is_monotonic_increasing: raise ValueError("The `df` given is not sorted by the 'date' column.") @@ -40,7 +47,8 @@ def _DataFrame2NumbaDict(df, amounts, modinf) -> nb.typed.Dict: for idx, (row_index, row) in enumerate(df.iterrows()): if row["subpop"] not in modinf.subpop_struct.subpop_names: logging.debug( - f"Invalid subpop '{row['subpop']}' in row {row_index + 1} of seeding::lambda_file. Not found in geodata... Skipping" + f"Invalid subpop '{row['subpop']}' in row {row_index + 1} of " + "seeding::lambda_file. Not found in geodata... Skipping" ) elif (row["date"].date() - modinf.ti).days >= 0: if (row["date"].date() - modinf.ti).days < len(nb_seed_perday): @@ -55,19 +63,24 @@ def _DataFrame2NumbaDict(df, amounts, modinf) -> nb.typed.Dict: } seeding_dict["seeding_sources"][idx] = modinf.compartments.get_comp_idx( source_dict, - error_info=f"(seeding source at idx={idx}, row_index={row_index}, row=>>{row}<<)", + error_info=( + f"(seeding source at idx={idx}, " + f"row_index={row_index}, row=>>{row}<<)" + ), ) seeding_dict["seeding_destinations"][idx] = ( modinf.compartments.get_comp_idx( destination_dict, - error_info=f"(seeding destination at idx={idx}, row_index={row_index}, row=>>{row}<<)", + error_info=( + f"(seeding destination at idx={idx}, " + f"row_index={row_index}, row=>>{row}<<)" + ), ) ) seeding_dict["seeding_subpops"][idx] = ( modinf.subpop_struct.subpop_names.index(row["subpop"]) ) seeding_amounts[idx] = amounts[idx] - # id_seed+=1 else: n_seeding_ignored_after += 1 else: @@ -75,11 +88,13 @@ def _DataFrame2NumbaDict(df, amounts, modinf) -> nb.typed.Dict: if n_seeding_ignored_before > 0: logging.critical( - f"Seeding ignored {n_seeding_ignored_before} rows because they were before the start of the simulation." + f"Seeding ignored {n_seeding_ignored_before} rows " + "because they were before the start of the simulation." ) if n_seeding_ignored_after > 0: logging.critical( - f"Seeding ignored {n_seeding_ignored_after} rows because they were after the end of the simulation." + f"Seeding ignored {n_seeding_ignored_after} rows " + "because they were after the end of the simulation." ) day_start_idx = np.zeros(modinf.n_days + 1, dtype=np.int64) @@ -89,6 +104,7 @@ def _DataFrame2NumbaDict(df, amounts, modinf) -> nb.typed.Dict: return seeding_dict, seeding_amounts +# Exported functionality class Seeding(SimulationComponent): def __init__(self, config: confuse.ConfigView, path_prefix: str = "."): self.seeding_config = config @@ -109,7 +125,8 @@ def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict: dupes = seeding[seeding.duplicated(["subpop", "date"])].index + 1 if not dupes.empty: raise ValueError( - f"There are repeating subpop-date in rows '{dupes.tolist()}' of `seeding::lambda_file`." + f"There are repeating subpop-date in rows '{dupes.tolist()}' " + "of `seeding::lambda_file`." ) elif method == "FolderDraw": seeding = pd.read_csv( @@ -154,7 +171,8 @@ def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict: amounts = np.random.poisson(seeding["amount"]) elif method == "NegativeBinomialDistributed": raise ValueError( - "Seeding method 'NegativeBinomialDistributed' is not supported by flepiMoP anymore." + "Seeding method 'NegativeBinomialDistributed' " + "is not supported by flepiMoP anymore." ) elif method == "FolderDraw" or method == "FromFile": amounts = seeding["amount"] From 2d6885615482b9489d9b756053350685489b7181 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Mon, 9 Dec 2024 17:15:39 -0500 Subject: [PATCH 2/6] Remove `modinf` arg from `_DataFrame2NumbaDict` Remvoed the `modinf` arg from the `_DataFrame2NumbaDict` internal utility and replaced it with the individual attributes from the `modinf` object that are needed. --- flepimop/gempyor_pkg/src/gempyor/seeding.py | 69 ++++++++++++++------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding.py b/flepimop/gempyor_pkg/src/gempyor/seeding.py index 760da66bb..4dbaa70b1 100644 --- a/flepimop/gempyor_pkg/src/gempyor/seeding.py +++ b/flepimop/gempyor_pkg/src/gempyor/seeding.py @@ -3,14 +3,18 @@ # Imports +from datetime import date import logging import confuse import numba as nb import numpy as np +import numpy.typing as npt import pandas as pd +from .compartments import Compartments from .simulation_component import SimulationComponent +from .subpopulation_structure import SubpopulationStructure from . import utils @@ -22,13 +26,18 @@ # Internal functionality -def _DataFrame2NumbaDict(df, amounts, modinf) -> nb.typed.Dict: +def _DataFrame2NumbaDict( + df: pd.DataFrame, + amounts: list[float], + compartments: Compartments, + subpop_struct: SubpopulationStructure, + n_days: int, + ti: date, +) -> tuple[nb.typed.Dict, npt.NDArray[np.number]]: if not df["date"].is_monotonic_increasing: raise ValueError("The `df` given is not sorted by the 'date' column.") - cmp_grp_names = [ - col for col in modinf.compartments.compartments.columns if col != "name" - ] + cmp_grp_names = [col for col in compartments.compartments.columns if col != "name"] seeding_dict: nb.typed.Dict = nb.typed.Dict.empty( key_type=nb.types.unicode_type, value_type=nb.types.int64[:], @@ -38,22 +47,22 @@ def _DataFrame2NumbaDict(df, amounts, modinf) -> nb.typed.Dict: seeding_dict["seeding_subpops"] = np.zeros(len(amounts), dtype=np.int64) seeding_amounts = np.zeros(len(amounts), dtype=np.float64) - nb_seed_perday = np.zeros(modinf.n_days, dtype=np.int64) + nb_seed_perday = np.zeros(n_days, dtype=np.int64) n_seeding_ignored_before = 0 n_seeding_ignored_after = 0 # id_seed = 0 for idx, (row_index, row) in enumerate(df.iterrows()): - if row["subpop"] not in modinf.subpop_struct.subpop_names: + if row["subpop"] not in subpop_struct.subpop_names: logging.debug( f"Invalid subpop '{row['subpop']}' in row {row_index + 1} of " "seeding::lambda_file. Not found in geodata... Skipping" ) - elif (row["date"].date() - modinf.ti).days >= 0: - if (row["date"].date() - modinf.ti).days < len(nb_seed_perday): - nb_seed_perday[(row["date"].date() - modinf.ti).days] = ( - nb_seed_perday[(row["date"].date() - modinf.ti).days] + 1 + elif (row["date"].date() - ti).days >= 0: + if (row["date"].date() - ti).days < len(nb_seed_perday): + nb_seed_perday[(row["date"].date() - ti).days] = ( + nb_seed_perday[(row["date"].date() - ti).days] + 1 ) source_dict = { grp_name: row[f"source_{grp_name}"] for grp_name in cmp_grp_names @@ -61,24 +70,22 @@ def _DataFrame2NumbaDict(df, amounts, modinf) -> nb.typed.Dict: destination_dict = { grp_name: row[f"destination_{grp_name}"] for grp_name in cmp_grp_names } - seeding_dict["seeding_sources"][idx] = modinf.compartments.get_comp_idx( + seeding_dict["seeding_sources"][idx] = compartments.get_comp_idx( source_dict, error_info=( f"(seeding source at idx={idx}, " f"row_index={row_index}, row=>>{row}<<)" ), ) - seeding_dict["seeding_destinations"][idx] = ( - modinf.compartments.get_comp_idx( - destination_dict, - error_info=( - f"(seeding destination at idx={idx}, " - f"row_index={row_index}, row=>>{row}<<)" - ), - ) + seeding_dict["seeding_destinations"][idx] = compartments.get_comp_idx( + destination_dict, + error_info=( + f"(seeding destination at idx={idx}, " + f"row_index={row_index}, row=>>{row}<<)" + ), ) - seeding_dict["seeding_subpops"][idx] = ( - modinf.subpop_struct.subpop_names.index(row["subpop"]) + seeding_dict["seeding_subpops"][idx] = subpop_struct.subpop_names.index( + row["subpop"] ) seeding_amounts[idx] = amounts[idx] else: @@ -97,7 +104,7 @@ def _DataFrame2NumbaDict(df, amounts, modinf) -> nb.typed.Dict: "because they were after the end of the simulation." ) - day_start_idx = np.zeros(modinf.n_days + 1, dtype=np.int64) + day_start_idx = np.zeros(n_days + 1, dtype=np.int64) day_start_idx[1:] = np.cumsum(nb_seed_perday) seeding_dict["day_start_idx"] = day_start_idx @@ -149,7 +156,14 @@ def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict: ) elif method == "NoSeeding": seeding = pd.DataFrame(columns=["date", "subpop"]) - return _DataFrame2NumbaDict(df=seeding, amounts=[], modinf=modinf) + return _DataFrame2NumbaDict( + seeding, + [], + modinf.compartments, + modinf.subpop_struct, + modinf.n_days, + modinf.ti, + ) else: raise ValueError(f"Unknown seeding method given, '{method}'.") @@ -177,7 +191,14 @@ def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict: elif method == "FolderDraw" or method == "FromFile": amounts = seeding["amount"] - return _DataFrame2NumbaDict(df=seeding, amounts=amounts, modinf=modinf) + return _DataFrame2NumbaDict( + seeding, + amounts, + modinf.compartments, + modinf.subpop_struct, + modinf.n_days, + modinf.ti, + ) def get_from_file(self, sim_id: int, modinf) -> nb.typed.Dict: """only difference with draw seeding is that the sim_id is now sim_id2load""" From a12150c6127cea452fee05c1ee42957745eceb3d Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 10 Dec 2024 09:16:21 -0500 Subject: [PATCH 3/6] Add `ModelInfo.get_seeding_data` Untangled the `Seeding` class from the `ModelInfo` class by adding a method to `ModelInfo` that interfaces with the `Seeding` class. --- flepimop/gempyor_pkg/src/gempyor/inference.py | 11 ++-- .../gempyor_pkg/src/gempyor/model_info.py | 56 +++++++++++-------- flepimop/gempyor_pkg/src/gempyor/seeding.py | 42 ++++++-------- flepimop/gempyor_pkg/src/gempyor/seir.py | 8 +-- .../gempyor_pkg/tests/seir/interface.ipynb | 2 +- .../gempyor_pkg/tests/seir/test_seeding.py | 13 ++++- flepimop/gempyor_pkg/tests/seir/test_seir.py | 18 +++--- 7 files changed, 79 insertions(+), 71 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/inference.py b/flepimop/gempyor_pkg/src/gempyor/inference.py index 525ce5cd6..28c7e5f30 100644 --- a/flepimop/gempyor_pkg/src/gempyor/inference.py +++ b/flepimop/gempyor_pkg/src/gempyor/inference.py @@ -164,7 +164,7 @@ def get_static_arguments(modinf: model_info.ModelInfo): ) initial_conditions = modinf.initial_conditions.get_from_config(sim_id=0, modinf=modinf) - seeding_data, seeding_amounts = modinf.seeding.get_from_config(sim_id=0, modinf=modinf) + seeding_data, seeding_amounts = modinf.get_seeding_data(0) # reduce them parameters = modinf.parameters.parameters_reduce(p_draw, npi_seir) @@ -672,20 +672,17 @@ def one_simulation( self.lastsim_parsed_parameters = parsed_parameters with Timer("onerun_SEIR.seeding"): + seeding_data, seeding_amounts = self.modinf.get_seeding_data( + sim_id2load if load_ID else sim_id2write + ) if load_ID: initial_conditions = self.modinf.initial_conditions.get_from_file( sim_id2load, modinf=self.modinf ) - seeding_data, seeding_amounts = self.modinf.seeding.get_from_file( - sim_id2load, modinf=self.modinf - ) else: initial_conditions = self.modinf.initial_conditions.get_from_config( sim_id2write, modinf=self.modinf ) - seeding_data, seeding_amounts = self.modinf.seeding.get_from_config( - sim_id2write, modinf=self.modinf - ) self.lastsim_seeding_data = seeding_data self.lastsim_seeding_amounts = seeding_amounts self.lastsim_initial_conditions = initial_conditions diff --git a/flepimop/gempyor_pkg/src/gempyor/model_info.py b/flepimop/gempyor_pkg/src/gempyor/model_info.py index 2c0f88efa..d5e040f33 100644 --- a/flepimop/gempyor_pkg/src/gempyor/model_info.py +++ b/flepimop/gempyor_pkg/src/gempyor/model_info.py @@ -1,5 +1,14 @@ +import datetime +import logging +import os +import pathlib + +import confuse +import numba as nb +import numpy as np +import numpy.typing as npt import pandas as pd -import datetime, os, logging, pathlib, confuse + from . import ( seeding, subpopulation_structure, @@ -296,30 +305,15 @@ def get_output_filename(self, ftype: str, sim_id: int, extension_override: str = def get_filename( self, ftype: str, sim_id: int, input: bool, extension_override: str = "" ): - """return a CSP formated filename.""" - - if extension_override: # empty strings are Falsy - extension = extension_override - else: # Constructed like this because in some test, extension is not defined - extension = self.extension - - if input: - run_id = self.in_run_id - prefix = self.in_prefix - else: - run_id = self.out_run_id - prefix = self.out_prefix - - fn = self.path_prefix / file_paths.create_file_name( - run_id=run_id, - prefix=prefix, - index=sim_id + self.first_sim_index - 1, + return self.path_prefix / file_paths.create_file_name( + self.in_run_id if input else self.out_run_id, + self.in_prefix if input else self.out_prefix, + sim_id + self.first_sim_index - 1, + ftype, + extension=extension_override if extension_override else self.extension, inference_filepath_suffix=self.inference_filepath_suffix, inference_filename_prefix=self.inference_filename_prefix, - ftype=ftype, - extension=extension, ) - return fn def get_setup_name(self): return self.setup_name @@ -359,3 +353,21 @@ def write_simID( df=df, ) return fname + + def get_seeding_data(self, sim_id: int) -> tuple[nb.typed.Dict, npt.NDArray[np.number]]: + return self.seeding.get_from_config( + self.compartments, + self.subpop_struct, + self.n_days, + self.ti, + self.tf, + ( + None + if self.seeding_config is None + else self.get_input_filename( + ftype=self.seeding_config["seeding_file_type"].get(), + sim_id=sim_id, + extension_override="csv", + ) + ), + ) diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding.py b/flepimop/gempyor_pkg/src/gempyor/seeding.py index 4dbaa70b1..737e26aab 100644 --- a/flepimop/gempyor_pkg/src/gempyor/seeding.py +++ b/flepimop/gempyor_pkg/src/gempyor/seeding.py @@ -5,6 +5,7 @@ # Imports from datetime import date import logging +from typing import Any import confuse import numba as nb @@ -117,7 +118,15 @@ def __init__(self, config: confuse.ConfigView, path_prefix: str = "."): self.seeding_config = config self.path_prefix = path_prefix - def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict: + def get_from_config( + self, + compartments: Compartments, + subpop_struct: SubpopulationStructure, + n_days: int, + ti: date, + tf: date, + input_filename: str | None, + ) -> tuple[nb.typed.Dict, npt.NDArray[np.number]]: method = "NoSeeding" if self.seeding_config is not None and "method" in self.seeding_config.keys(): method = self.seeding_config["method"].as_str() @@ -137,12 +146,7 @@ def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict: ) elif method == "FolderDraw": seeding = pd.read_csv( - self.path_prefix - / modinf.get_input_filename( - ftype=modinf.seeding_config["seeding_file_type"].get(), - sim_id=sim_id, - extension_override="csv", - ), + self.path_prefix / input_filename, converters={"subpop": lambda x: str(x)}, parse_dates=["date"], skipinitialspace=True, @@ -157,12 +161,7 @@ def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict: elif method == "NoSeeding": seeding = pd.DataFrame(columns=["date", "subpop"]) return _DataFrame2NumbaDict( - seeding, - [], - modinf.compartments, - modinf.subpop_struct, - modinf.n_days, - modinf.ti, + seeding, [], compartments, subpop_struct, n_days, ti ) else: raise ValueError(f"Unknown seeding method given, '{method}'.") @@ -171,9 +170,7 @@ def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict: # print(seeding.shape) seeding = seeding.sort_values(by="date", axis="index").reset_index() # print(seeding) - mask = (seeding["date"].dt.date > modinf.ti) & ( - seeding["date"].dt.date <= modinf.tf - ) + mask = (seeding["date"].dt.date > ti) & (seeding["date"].dt.date <= tf) seeding = seeding.loc[mask].reset_index() # print(seeding.shape) # print(seeding) @@ -192,17 +189,14 @@ def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict: amounts = seeding["amount"] return _DataFrame2NumbaDict( - seeding, - amounts, - modinf.compartments, - modinf.subpop_struct, - modinf.n_days, - modinf.ti, + seeding, amounts, compartments, subpop_struct, n_days, ti ) - def get_from_file(self, sim_id: int, modinf) -> nb.typed.Dict: + def get_from_file( + self, *args: Any, **kwargs: Any + ) -> tuple[nb.typed.Dict, npt.NDArray[np.number]]: """only difference with draw seeding is that the sim_id is now sim_id2load""" - return self.get_from_config(sim_id=sim_id, modinf=modinf) + return self.get_from_config(*args, **kwargs) def SeedingFactory(config: confuse.ConfigView, path_prefix: str = "."): diff --git a/flepimop/gempyor_pkg/src/gempyor/seir.py b/flepimop/gempyor_pkg/src/gempyor/seir.py index 2492a3825..72a96bbe9 100644 --- a/flepimop/gempyor_pkg/src/gempyor/seir.py +++ b/flepimop/gempyor_pkg/src/gempyor/seir.py @@ -277,16 +277,12 @@ def onerun_SEIR( initial_conditions = modinf.initial_conditions.get_from_file( sim_id2load, modinf=modinf ) - seeding_data, seeding_amounts = modinf.seeding.get_from_file( - sim_id2load, modinf=modinf - ) + seeding_data, seeding_amounts = modinf.get_seeding_data(sim_id2load) else: initial_conditions = modinf.initial_conditions.get_from_config( sim_id2write, modinf=modinf ) - seeding_data, seeding_amounts = modinf.seeding.get_from_config( - sim_id2write, modinf=modinf - ) + seeding_data, seeding_amounts = modinf.get_seeding_data(sim_id2write) with Timer("onerun_SEIR.parameters"): # Draw or load parameters diff --git a/flepimop/gempyor_pkg/tests/seir/interface.ipynb b/flepimop/gempyor_pkg/tests/seir/interface.ipynb index 57bbea87d..6c14cce47 100644 --- a/flepimop/gempyor_pkg/tests/seir/interface.ipynb +++ b/flepimop/gempyor_pkg/tests/seir/interface.ipynb @@ -169,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "63618148-81db-4fe0-9395-7f21878c1372", "metadata": { "execution": { diff --git a/flepimop/gempyor_pkg/tests/seir/test_seeding.py b/flepimop/gempyor_pkg/tests/seir/test_seeding.py index 52d23e0e4..19bd7ad30 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_seeding.py +++ b/flepimop/gempyor_pkg/tests/seir/test_seeding.py @@ -39,5 +39,16 @@ def test_Seeding_draw_success(self): sic = seeding.SeedingFactory(config=s.seeding_config) s.seeding_config["method"] = "NoSeeding" - seeding_result = sic.get_from_config(sim_id=100, modinf=s) + seeding_result = sic.get_from_config( + s.compartments, + s.subpop_struct, + s.n_days, + s.ti, + s.tf, + s.get_input_filename( + ftype=s.seeding_config["seeding_file_type"].get(), + sim_id=0, + extension_override="csv", + ), + ) print(seeding_result) diff --git a/flepimop/gempyor_pkg/tests/seir/test_seir.py b/flepimop/gempyor_pkg/tests/seir/test_seir.py index 16bbd390c..8fc7ef024 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_seir.py +++ b/flepimop/gempyor_pkg/tests/seir/test_seir.py @@ -73,7 +73,7 @@ def test_constant_population_legacy_integration(): ) integration_method = "legacy" - seeding_data, seeding_amounts = modinf.seeding.get_from_file(sim_id=100, modinf=modinf) + seeding_data, seeding_amounts = modinf.get_seeding_data(100) initial_conditions = modinf.initial_conditions.get_from_config( sim_id=100, modinf=modinf ) @@ -152,9 +152,7 @@ def test_constant_population_rk4jit_integration_fail(): ) modinf.seir_config["integration"]["method"] = "rk4.jit" - seeding_data, seeding_amounts = modinf.seeding.get_from_file( - sim_id=100, modinf=modinf - ) + seeding_data, seeding_amounts = modinf.get_seeding_data(100) initial_conditions = modinf.initial_conditions.get_from_config( sim_id=100, modinf=modinf ) @@ -231,7 +229,7 @@ def test_constant_population_rk4jit_integration(): # s.integration_method = "rk4.jit" assert modinf.seir_config["integration"]["method"].get() == "rk4" - seeding_data, seeding_amounts = modinf.seeding.get_from_file(sim_id=100, modinf=modinf) + seeding_data, seeding_amounts = modinf.get_seeding_data(100) initial_conditions = modinf.initial_conditions.get_from_config( sim_id=100, modinf=modinf ) @@ -306,7 +304,7 @@ def test_steps_SEIR_nb_simple_spread_with_txt_matrices(): out_prefix=prefix, ) - seeding_data, seeding_amounts = modinf.seeding.get_from_file(sim_id=100, modinf=modinf) + seeding_data, seeding_amounts = modinf.get_seeding_data(100) initial_conditions = modinf.initial_conditions.get_from_config( sim_id=100, modinf=modinf ) @@ -415,7 +413,7 @@ def test_steps_SEIR_nb_simple_spread_with_csv_matrices(): out_prefix=prefix, ) - seeding_data, seeding_amounts = modinf.seeding.get_from_file(sim_id=100, modinf=modinf) + seeding_data, seeding_amounts = modinf.get_seeding_data(100) initial_conditions = modinf.initial_conditions.get_from_config( sim_id=100, modinf=modinf ) @@ -492,7 +490,7 @@ def test_steps_SEIR_no_spread(): out_prefix=prefix, ) - seeding_data, seeding_amounts = modinf.seeding.get_from_file(sim_id=100, modinf=modinf) + seeding_data, seeding_amounts = modinf.get_seeding_data(100) initial_conditions = modinf.initial_conditions.get_from_config( sim_id=100, modinf=modinf ) @@ -769,7 +767,7 @@ def test_parallel_compartments_with_vacc(): out_prefix=prefix, ) - seeding_data, seeding_amounts = modinf.seeding.get_from_file(sim_id=100, modinf=modinf) + seeding_data, seeding_amounts = modinf.get_seeding_data(100) initial_conditions = modinf.initial_conditions.get_from_config( sim_id=100, modinf=modinf ) @@ -863,7 +861,7 @@ def test_parallel_compartments_no_vacc(): out_prefix=prefix, ) - seeding_data, seeding_amounts = modinf.seeding.get_from_file(sim_id=100, modinf=modinf) + seeding_data, seeding_amounts = modinf.get_seeding_data(100) initial_conditions = modinf.initial_conditions.get_from_config( sim_id=100, modinf=modinf ) From 8ba5f0c8dc5c0415e054f77bbf6e644111220794 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 10 Dec 2024 09:23:04 -0500 Subject: [PATCH 4/6] Update other references to `Seeding.get_from_*` Updated remaining references in inactive code, notebooks and the `gempyor.dev` module. --- flepimop/gempyor_pkg/docs/integration_benchmark.ipynb | 4 ++-- flepimop/gempyor_pkg/docs/integration_doc.ipynb | 4 ++-- flepimop/gempyor_pkg/docs/interface.ipynb | 9 ++------- flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py | 2 +- flepimop/gempyor_pkg/tests/seir/interface.ipynb | 7 +------ 5 files changed, 8 insertions(+), 18 deletions(-) diff --git a/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb b/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb index ce2880787..01513fb98 100644 --- a/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb +++ b/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb @@ -410,7 +410,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "07bfd952-5c42-4704-81cc-5de0c917c0ab", "metadata": { "execution": { @@ -450,7 +450,7 @@ "\n", "with Timer(\"onerun_SEIR.seeding\"):\n", " initial_conditions = s.initial_conditions.get_from_config(sim_id, modinf=s)\n", - " seeding_data, seeding_amounts = s.seeding.get_from_config(sim_id, modinf=s)\n", + " seeding_data, seeding_amounts = s.get_seeding_data(sim_id)\n", "\n", "mobility_subpop_indices = s.mobility.indices\n", "mobility_data_indices = s.mobility.indptr\n", diff --git a/flepimop/gempyor_pkg/docs/integration_doc.ipynb b/flepimop/gempyor_pkg/docs/integration_doc.ipynb index cfbf2e5d9..763ae9d37 100644 --- a/flepimop/gempyor_pkg/docs/integration_doc.ipynb +++ b/flepimop/gempyor_pkg/docs/integration_doc.ipynb @@ -70,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "96230106-73e3-4681-b562-6a7269513375", "metadata": { "execution": { @@ -109,7 +109,7 @@ "\n", "\n", "initial_conditions = gempyor_inference.s.initial_conditions.get_from_config(sim_id2write, modinf=gempyor_inference.s)\n", - "seeding_data, seeding_amounts = gempyor_inference.s.seeding.get_from_config(sim_id2write, modinf=gempyor_inference.s)\n", + "seeding_data, seeding_amounts = gempyor_inference.s.get_seeding_data(sim_id2write)\n", "\n", "\n", "p_draw = gempyor_inference.s.parameters.parameters_quick_draw(\n", diff --git a/flepimop/gempyor_pkg/docs/interface.ipynb b/flepimop/gempyor_pkg/docs/interface.ipynb index 1b0de2e43..64c0850dd 100644 --- a/flepimop/gempyor_pkg/docs/interface.ipynb +++ b/flepimop/gempyor_pkg/docs/interface.ipynb @@ -169,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "63618148-81db-4fe0-9395-7f21878c1372", "metadata": { "execution": { @@ -254,20 +254,15 @@ "\n", "### Run every time:\n", "with Timer(\"onerun_SEIR.seeding\"):\n", + " seeding_data, seeding_amounts = gempyor_inference.s.get_seeding_data(sim_id2load if load_ID else sim_id2write)\n", " if load_ID:\n", " initial_conditions = gempyor_inference.s.initial_conditions.get_from_file(\n", " sim_id2load, modinf=gempyor_inference.s\n", " )\n", - " seeding_data, seeding_amounts = gempyor_inference.s.seeding.get_from_file(\n", - " sim_id2load, modinf=gempyor_inference.s\n", - " )\n", " else:\n", " initial_conditions = gempyor_inference.s.initial_conditions.get_from_config(\n", " sim_id2write, modinf=gempyor_inference.s\n", " )\n", - " seeding_data, seeding_amounts = gempyor_inference.s.seeding.get_from_config(\n", - " sim_id2write, modinf=gempyor_inference.s\n", - " )\n", "\n", "with Timer(\"SEIR.parameters\"):\n", " # Draw or load parameters\n", diff --git a/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py b/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py index 09745f44a..9a2219471 100644 --- a/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py +++ b/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py @@ -35,7 +35,7 @@ out_prefix=prefix, ) -seeding_data = modinf.seeding.get_from_config(sim_id=100, modinf=modinf) +seeding_data = modinf.get_seeding_data(100) initial_conditions = modinf.initial_conditions.get_from_config(sim_id=100, modinf=modinf) mobility_subpop_indices = modinf.mobility.indices diff --git a/flepimop/gempyor_pkg/tests/seir/interface.ipynb b/flepimop/gempyor_pkg/tests/seir/interface.ipynb index 6c14cce47..b331bd9e7 100644 --- a/flepimop/gempyor_pkg/tests/seir/interface.ipynb +++ b/flepimop/gempyor_pkg/tests/seir/interface.ipynb @@ -250,16 +250,11 @@ "\n", "### Run every time:\n", "with Timer(\"onerun_SEIR.seeding\"):\n", + " seeding_data, seeding_amounts = gempyor_inference.s.get_seeding_data(sim_id2load if load_ID else sim_id2write)\n", " if load_ID:\n", " initial_conditions = gempyor_inference.s.initial_conditions.get_from_file(sim_id2load, modinf=gempyor_inference.s)\n", - " seeding_data, seeding_amounts = gempyor_inference.s.seeding.get_from_file(\n", - " sim_id2load, modinf=gempyor_inference.s\n", - " )\n", " else:\n", " initial_conditions = gempyor_inference.s.initial_conditions.get_from_config(sim_id2write, modinf=gempyor_inference.s)\n", - " seeding_data, seeding_amounts = gempyor_inference.s.seeding.get_from_config(\n", - " sim_id2write, modinf=gempyor_inference.s\n", - " )\n", "\n", "with Timer(\"SEIR.parameters\"):\n", " # Draw or load parameters\n", From e31cac881c85504612c23a69e098a0d86dc8cb23 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:23:06 -0500 Subject: [PATCH 5/6] Add documentation to the `Seeding` class --- flepimop/gempyor_pkg/src/gempyor/seeding.py | 78 +++++++++++++++++---- 1 file changed, 66 insertions(+), 12 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding.py b/flepimop/gempyor_pkg/src/gempyor/seeding.py index 737e26aab..a06763b4a 100644 --- a/flepimop/gempyor_pkg/src/gempyor/seeding.py +++ b/flepimop/gempyor_pkg/src/gempyor/seeding.py @@ -6,6 +6,7 @@ from datetime import date import logging from typing import Any +import warnings import confuse import numba as nb @@ -23,9 +24,6 @@ logger = logging.getLogger(__name__) -## TODO: ideally here path_prefix should not be used and all files loaded from modinf - - # Internal functionality def _DataFrame2NumbaDict( df: pd.DataFrame, @@ -114,7 +112,22 @@ def _DataFrame2NumbaDict( # Exported functionality class Seeding(SimulationComponent): + """ + Class to handle the seeding of the simulation. + + Attributes: + seeding_config: The configuration for the seeding. + path_prefix: The path prefix to use when reading files. + """ + def __init__(self, config: confuse.ConfigView, path_prefix: str = "."): + """ + Initialize a seeding instance. + + Args: + config: The configuration for the seeding. + path_prefix: The path prefix to use when reading files. + """ self.seeding_config = config self.path_prefix = path_prefix @@ -127,6 +140,27 @@ def get_from_config( tf: date, input_filename: str | None, ) -> tuple[nb.typed.Dict, npt.NDArray[np.number]]: + """ + Get seeding data from the configuration. + + Args: + compartments: The compartments for the simulation. + subpop_struct: The subpopulation structure for the simulation. + n_days: The number of days in the simulation. + ti: The start date of the simulation. + tf: The end date of the simulation. + input_filename: The input filename to use for seeding data. Only used if + the seeding method is 'FolderDraw'. + + Returns: + A tuple containing the seeding data as a Numba dictionary and the seeding + amounts as a Numpy array. The seeding data is a dictionary with the + following keys: + - "seeding_sources": The source compartments for the seeding. + - "seeding_destinations": The destination compartments for the seeding. + - "seeding_subpops": The subpopulations for the seeding. + - "day_start_idx": The start index for each day in the seeding data. + """ method = "NoSeeding" if self.seeding_config is not None and "method" in self.seeding_config.keys(): method = self.seeding_config["method"].as_str() @@ -166,16 +200,10 @@ def get_from_config( else: raise ValueError(f"Unknown seeding method given, '{method}'.") - # Sorting by date is very important here for the seeding format necessary !!!! - # print(seeding.shape) + # Sorting by date is important for the seeding format seeding = seeding.sort_values(by="date", axis="index").reset_index() - # print(seeding) mask = (seeding["date"].dt.date > ti) & (seeding["date"].dt.date <= tf) seeding = seeding.loc[mask].reset_index() - # print(seeding.shape) - # print(seeding) - - # TODO: print. amounts = np.zeros(len(seeding)) if method == "PoissonDistributed": @@ -195,11 +223,37 @@ def get_from_config( def get_from_file( self, *args: Any, **kwargs: Any ) -> tuple[nb.typed.Dict, npt.NDArray[np.number]]: - """only difference with draw seeding is that the sim_id is now sim_id2load""" + """ + This method is deprecated. Use `get_from_config` instead. + + Args: + *args: Positional arguments to pass to `get_from_config`. + **kwargs: Keyword arguments to pass to `get_from_config`. + + Returns: + The result of `get_from_config`. + """ + warnings.warn( + "The 'get_from_file' method is deprecated. Use 'get_from_config' instead.", + DeprecationWarning, + ) return self.get_from_config(*args, **kwargs) -def SeedingFactory(config: confuse.ConfigView, path_prefix: str = "."): +def SeedingFactory(config: confuse.ConfigView, path_prefix: str = ".") -> Seeding: + """ + Create a Seeding instance based on the given configuration. + + This function will use the given configuration to either lookup a plugin class for + the seeding instance or fallback to the default Seeding class. + + Args: + config: The configuration for the seeding. + path_prefix: The path prefix to use when reading files. + + Returns: + A Seeding instance. + """ if config is not None and "method" in config.keys(): if config["method"].as_str() == "plugin": klass = utils.search_and_import_plugins_class( From 692865fa2659b94dfb072634b555b7369e22775f Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:24:36 -0500 Subject: [PATCH 6/6] Document `ModelInfo.get_seeding_data` --- flepimop/gempyor_pkg/src/gempyor/model_info.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/flepimop/gempyor_pkg/src/gempyor/model_info.py b/flepimop/gempyor_pkg/src/gempyor/model_info.py index d5e040f33..459598ab6 100644 --- a/flepimop/gempyor_pkg/src/gempyor/model_info.py +++ b/flepimop/gempyor_pkg/src/gempyor/model_info.py @@ -355,6 +355,18 @@ def write_simID( return fname def get_seeding_data(self, sim_id: int) -> tuple[nb.typed.Dict, npt.NDArray[np.number]]: + """ + Pull the seeding data for the info represented by this model info instance. + + Args: + sim_id: The simulation ID to pull seeding data for. + + Returns: + A tuple containing the seeding data dictionary and the seeding data array. + + See Also: + `gempyor.seeding.Seeding.get_from_config` + """ return self.seeding.get_from_config( self.compartments, self.subpop_struct,