From 281407b65055a03254a26c7e08929ef4db1cacaa Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 28 Mar 2024 14:13:12 -0400 Subject: [PATCH 1/4] sep seeding and ic --- .../src/gempyor/initial_conditions.py | 214 ++++++++++++++++++ flepimop/gempyor_pkg/src/gempyor/seeding.py | 155 +++++++++++++ .../src/gempyor/simulation_component.py | 12 + 3 files changed, 381 insertions(+) create mode 100644 flepimop/gempyor_pkg/src/gempyor/initial_conditions.py create mode 100644 flepimop/gempyor_pkg/src/gempyor/seeding.py create mode 100644 flepimop/gempyor_pkg/src/gempyor/simulation_component.py diff --git a/flepimop/gempyor_pkg/src/gempyor/initial_conditions.py b/flepimop/gempyor_pkg/src/gempyor/initial_conditions.py new file mode 100644 index 000000000..ed473523d --- /dev/null +++ b/flepimop/gempyor_pkg/src/gempyor/initial_conditions.py @@ -0,0 +1,214 @@ +from typing import Dict + +import numpy as np +import pandas as pd +from numba.typed import Dict +import confuse +import logging +from simulation_component import SimulationComponent +from . import utils +from .utils import read_df + +logger = logging.getLogger(__name__) + + +class InitialConditions(SimulationComponent): + def __init__(self, config: confuse.ConfigView): + self.initial_conditions_config = config + + def get_from_config(self, sim_id: int, setup) -> np.ndarray: + method = "Default" + if self.initial_conditions_config is not None and "method" in self.initial_conditions_config.keys(): + method = self.initial_conditions_config["method"].as_str() + + if method == "Default": + ## JK : This could be specified in the config + y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) + y0[0, :] = setup.subpop_pop + return y0 # we finish here: no rest and not proportionallity applies + + allow_missing_subpops = False + allow_missing_compartments = False + if "allow_missing_subpops" in self.initial_conditions_config.keys(): + if self.initial_conditions_config["allow_missing_subpops"].get(): + allow_missing_subpops = True + if "allow_missing_compartments" in self.initial_conditions_config.keys(): + if self.initial_conditions_config["allow_missing_compartments"].get(): + allow_missing_compartments = True + + # Places to allocate the rest of the population + rests = [] + + if method == "SetInitialConditions" or method == "SetInitialConditionsFolderDraw": + # TODO Think about - Does not support the new way of doing compartment indexing + if method == "SetInitialConditionsFolderDraw": + ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"], sim_id=sim_id) + else: + ic_df = read_df( + self.initial_conditions_config["initial_conditions_file"].get(), + ) + + y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) + for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): # + if pl in list(ic_df["subpop"]): + states_pl = ic_df[ic_df["subpop"] == pl] + for comp_idx, comp_name in setup.compartments.compartments["name"].items(): + if "mc_name" in states_pl.columns: + ic_df_compartment_val = states_pl[states_pl["mc_name"] == comp_name]["amount"] + else: + filters = setup.compartments.compartments.iloc[comp_idx].drop("name") + ic_df_compartment_val = states_pl.copy() + for mc_name, mc_value in filters.items(): + ic_df_compartment_val = ic_df_compartment_val[ic_df_compartment_val["mc_" + mc_name] == mc_value][ + "amount" + ] + if len(ic_df_compartment_val) > 1: + raise ValueError( + f"ERROR: Several ({len(ic_df_compartment_val)}) rows are matches for compartment {comp_name} in init file: filters returned {ic_df_compartment_val}" + ) + elif ic_df_compartment_val.empty: + if allow_missing_compartments: + ic_df_compartment_val = 0.0 + else: + raise ValueError( + f"Initial Conditions: Could not set compartment {comp_name} (id: {comp_idx}) in subpop {pl} (id: {pl_idx}). The data from the init file is {states_pl}. \n \ + Use 'allow_missing_compartments' to default to 0 for compartments without initial conditions" + ) + if "rest" in str(ic_df_compartment_val).strip().lower(): + rests.append([comp_idx, pl_idx]) + else: + if isinstance(ic_df_compartment_val, pd.Series): # it can also be float if we allow allow_missing_compartments + ic_df_compartment_val = float(ic_df_compartment_val.iloc[0]) + y0[comp_idx, pl_idx] = float(ic_df_compartment_val) + elif allow_missing_subpops: + logger.critical( + f"No initial conditions for for subpop {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartment ({setup.compartments.compartments['name'].iloc[0]})" + ) + if "proportional" in self.initial_conditions_config.keys(): + if self.initial_conditions_config["proportional"].get(): + y0[0, pl_idx] = 1.0 + else: + y0[0, pl_idx] = setup.subpop_pop[pl_idx] + else: + y0[0, pl_idx] = setup.subpop_pop[pl_idx] + else: + raise ValueError( + f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_subpops=TRUE to bypass this error" + ) + elif method == "InitialConditionsFolderDraw" or method == "FromFile": + if method == "InitialConditionsFolderDraw": + ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"].get(), sim_id=sim_id) + elif method == "FromFile": + ic_df = read_df( + self.initial_conditions_config["initial_conditions_file"].get(), + ) + + # annoying conversion because sometime the parquet columns get attributed a timezone... + ic_df["date"] = pd.to_datetime(ic_df["date"], utc=True) # force date to be UTC + ic_df["date"] = ic_df["date"].dt.date + ic_df["date"] = ic_df["date"].astype(str) + + ic_df = ic_df[(ic_df["date"] == str(setup.ti)) & (ic_df["mc_value_type"] == "prevalence")] + if ic_df.empty: + raise ValueError( + f"There is no entry for initial time ti in the provided initial_conditions::states_file." + ) + y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) + + for comp_idx, comp_name in setup.compartments.compartments["name"].items(): + # rely on all the mc's instead of mc_name to avoid errors due to e.g order. + # before: only + # ic_df_compartment = ic_df[ic_df["mc_name"] == comp_name] + filters = setup.compartments.compartments.iloc[comp_idx].drop("name") + ic_df_compartment = ic_df.copy() + for mc_name, mc_value in filters.items(): + ic_df_compartment = ic_df_compartment[ic_df_compartment["mc_" + mc_name] == mc_value] + + if len(ic_df_compartment) > 1: + # ic_df_compartment = ic_df_compartment.iloc[0] + raise ValueError( + f"ERROR: Several ({len(ic_df_compartment)}) rows are matches for compartment {mc_name} in init file: filter {filters} returned {ic_df_compartment}" + ) + elif ic_df_compartment.empty: + if allow_missing_compartments: + ic_df_compartment = pd.DataFrame(0, columns=ic_df_compartment.columns, index=[0]) + else: + raise ValueError( + f"Initial Conditions: Could not set compartment {comp_name} (id: {comp_idx}) in subpop {pl} (id: {pl_idx}). The data from the init file is {ic_df_compartment[pl]}." + ) + elif ic_df_compartment["mc_name"].iloc[0] != comp_name: + print( + f"WARNING: init file mc_name {ic_df_compartment['mc_name'].iloc[0]} does not match compartment mc_name {comp_name}" + ) + + for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): + if pl in ic_df.columns: + y0[comp_idx, pl_idx] = float(ic_df_compartment[pl].iloc[0]) + elif allow_missing_subpops: + logger.critical( + f"No initial conditions for for subpop {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartments ({setup.compartments.compartments['name'].iloc[0]})" + ) + if "proportion" in self.initial_conditions_config.keys(): + if self.initial_conditions_config["proportion"].get(): + y0[0, pl_idx] = 1.0 + y0[0, pl_idx] = setup.subpop_pop[pl_idx] + else: + raise ValueError( + f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_subpops=TRUE to bypass this error" + ) + else: + raise NotImplementedError(f"unknown initial conditions method [got: {method}]") + + # rest + if rests: # not empty + for comp_idx, pl_idx in rests: + total = setup.subpop_pop[pl_idx] + if "proportional" in self.initial_conditions_config.keys(): + if self.initial_conditions_config["proportional"].get(): + total = 1.0 + y0[comp_idx, pl_idx] = total - y0[:, pl_idx].sum() + + if "proportional" in self.initial_conditions_config.keys(): + if self.initial_conditions_config["proportional"].get(): + y0 = y0 * setup.subpop_pop + + # check that the inputed values sums to the subpop population: + error = False + for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): + n_y0 = y0[:, pl_idx].sum() + n_pop = setup.subpop_pop[pl_idx] + if abs(n_y0 - n_pop) > 1: + error = True + print( + f"ERROR: subpop_names {pl} (idx: pl_idx) has a population from initial condition of {n_y0} while population from geodata is {n_pop} (absolute difference should be < 1, here is {abs(n_y0-n_pop)})" + ) + ignore_population_checks = False + if "ignore_population_checks" in self.initial_conditions_config.keys(): + if self.initial_conditions_config["ignore_population_checks"].get(): + ignore_population_checks = True + if error and not ignore_population_checks: + raise ValueError( + """ geodata and initial condition do not agree on population size (see messages above). Use ignore_population_checks: True to ignore""" + ) + elif error and ignore_population_checks: + print( + """ Ignoring the previous population mismatch errors because you added flag 'ignore_population_checks'. This is dangerous""" + ) + return y0 + + def get_from_file(self, sim_id: int, setup) -> np.ndarray: + return self.get_from_config(sim_id=sim_id, setup=setup) + +# TODO: rename config to initial_conditions_config as it shadows the global config + + +def InitialConditionsFactory(config: confuse.ConfigView): + if config is not None and "method" in config.keys(): + if config["method"].as_str() == "plugin": + klass = utils.search_and_import_plugins_class( + plugin_file_path=config["plugin_file_path"].as_str(), + class_name="InitialConditions", + config=config + ) + return klass + return InitialConditions(config) diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding.py b/flepimop/gempyor_pkg/src/gempyor/seeding.py new file mode 100644 index 000000000..258510e96 --- /dev/null +++ b/flepimop/gempyor_pkg/src/gempyor/seeding.py @@ -0,0 +1,155 @@ +from typing import Dict + +import numpy as np +import pandas as pd +import confuse +import logging +from simulation_component import SimulationComponent +from . import utils +import numba as nb + +logger = logging.getLogger(__name__) + + +def _DataFrame2NumbaDict(df, amounts, setup) -> nb.typed.Dict: + if not df["date"].is_monotonic_increasing: + raise ValueError("_DataFrame2NumbaDict got an unsorted dataframe, exposing itself to non-sense") + + cmp_grp_names = [col for col in setup.compartments.compartments.columns if col != "name"] + seeding_dict: nb.typed.Dict = nb.typed.Dict.empty( + key_type=nb.types.unicode_type, + value_type=nb.types.int64[:], + ) + seeding_dict["seeding_sources"] = np.zeros(len(amounts), dtype=np.int64) + seeding_dict["seeding_destinations"] = np.zeros(len(amounts), dtype=np.int64) + seeding_dict["seeding_subpops"] = np.zeros(len(amounts), dtype=np.int64) + seeding_amounts = np.zeros(len(amounts), dtype=np.float64) + + nb_seed_perday = np.zeros(setup.n_days, dtype=np.int64) + + n_seeding_ignored_before = 0 + n_seeding_ignored_after = 0 + + # id_seed = 0 + for idx, (row_index, row) in enumerate(df.iterrows()): + if row["subpop"] not in setup.subpop_struct.subpop_names: + logging.debug( + f"Invalid subpop '{row['subpop']}' in row {row_index + 1} of seeding::lambda_file. Not found in geodata... Skipping" + ) + elif (row["date"].date() - setup.ti).days >= 0: + if (row["date"].date() - setup.ti).days < len(nb_seed_perday): + nb_seed_perday[(row["date"].date() - setup.ti).days] = ( + nb_seed_perday[(row["date"].date() - setup.ti).days] + 1 + ) + source_dict = {grp_name: row[f"source_{grp_name}"] for grp_name in cmp_grp_names} + destination_dict = {grp_name: row[f"destination_{grp_name}"] for grp_name in cmp_grp_names} + seeding_dict["seeding_sources"][idx] = setup.compartments.get_comp_idx( + source_dict, error_info=f"(seeding source at idx={idx}, row_index={row_index}, row=>>{row}<<)" + ) + seeding_dict["seeding_destinations"][idx] = setup.compartments.get_comp_idx( + destination_dict, + error_info=f"(seeding destination at idx={idx}, row_index={row_index}, row=>>{row}<<)", + ) + seeding_dict["seeding_subpops"][idx] = setup.subpop_struct.subpop_names.index(row["subpop"]) + seeding_amounts[idx] = amounts[idx] + # id_seed+=1 + else: + n_seeding_ignored_after += 1 + else: + n_seeding_ignored_before += 1 + + if n_seeding_ignored_before > 0: + logging.critical( + f"Seeding ignored {n_seeding_ignored_before} rows because they were before the start of the simulation." + ) + if n_seeding_ignored_after > 0: + logging.critical( + f"Seeding ignored {n_seeding_ignored_after} rows because they were after the end of the simulation." + ) + + day_start_idx = np.zeros(setup.n_days + 1, dtype=np.int64) + day_start_idx[1:] = np.cumsum(nb_seed_perday) + seeding_dict["day_start_idx"] = day_start_idx + + return seeding_dict, seeding_amounts + +class Seeding(SimulationComponent): + def __init__(self, config: confuse.ConfigView): + self.seeding_config = config + + def get_from_config(self, sim_id: int, setup) -> nb.typed.Dict: + method = "NoSeeding" + if self.seeding_config is not None and "method" in self.seeding_config.keys(): + method = self.seeding_config["method"].as_str() + + if method == "NegativeBinomialDistributed" or method == "PoissonDistributed": + seeding = pd.read_csv( + self.seeding_config["lambda_file"].as_str(), + converters={"subpop": lambda x: str(x)}, + parse_dates=["date"], + skipinitialspace=True, + ) + dupes = seeding[seeding.duplicated(["subpop", "date"])].index + 1 + if not dupes.empty: + raise ValueError(f"Repeated subpop-date in rows {dupes.tolist()} of seeding::lambda_file.") + elif method == "FolderDraw": + seeding = pd.read_csv( + setup.get_input_filename( + ftype=setup.seeding_config["seeding_file_type"].get(), + sim_id=sim_id, + extension_override="csv", + ), + converters={"subpop": lambda x: str(x)}, + parse_dates=["date"], + skipinitialspace=True, + ) + elif method == "FromFile": + seeding = pd.read_csv( + self.seeding_config["seeding_file"].get(), + converters={"subpop": lambda x: str(x)}, + parse_dates=["date"], + skipinitialspace=True, + ) + elif method == "NoSeeding": + seeding = pd.DataFrame(columns=["date", "subpop"]) + return _DataFrame2NumbaDict(df=seeding, amounts=[], setup=setup) + else: + raise NotImplementedError(f"unknown seeding method [got: {method}]") + + # Sorting by date is very important here for the seeding format necessary !!!! + # print(seeding.shape) + seeding = seeding.sort_values(by="date", axis="index").reset_index() + # print(seeding) + mask = (seeding["date"].dt.date > setup.ti) & (seeding["date"].dt.date <= setup.tf) + seeding = seeding.loc[mask].reset_index() + # print(seeding.shape) + # print(seeding) + + # TODO: print. + + amounts = np.zeros(len(seeding)) + if method == "PoissonDistributed": + amounts = np.random.poisson(seeding["amount"]) + elif method == "NegativeBinomialDistributed": + raise ValueError("Seeding method 'NegativeBinomialDistributed' is not supported by flepiMoP anymore.") + amounts = np.random.negative_binomial(n=5, p=5 / (seeding["amount"] + 5)) + elif method == "FolderDraw" or method == "FromFile": + amounts = seeding["amount"] + + return _DataFrame2NumbaDict(df=seeding, amounts=amounts, setup=setup) + + def get_from_file(self, sim_id: int, setup) -> nb.typed.Dict: + """only difference with draw seeding is that the sim_id is now sim_id2load""" + return self.get_from_config(sim_id=sim_id, setup=setup) + + +def SeedingFactory(config: confuse.ConfigView): + if config is not None and "method" in config.keys(): + if config["method"].as_str() == "plugin": + klass = utils.search_and_import_plugins_class( + plugin_file_path=config["plugin_file_path"].as_str(), + class_name="Seeding", + config=config + ) + return klass + return Seeding(config) diff --git a/flepimop/gempyor_pkg/src/gempyor/simulation_component.py b/flepimop/gempyor_pkg/src/gempyor/simulation_component.py new file mode 100644 index 000000000..2c2d181b9 --- /dev/null +++ b/flepimop/gempyor_pkg/src/gempyor/simulation_component.py @@ -0,0 +1,12 @@ +class SimulationComponent: + def __init__(self, config: confuse.ConfigView): + raise NotImplementedError("This method should be overridden in subclasses.") + + def get_from_file(self, sim_id: int, setup) -> np.ndarray: + raise NotImplementedError("This method should be overridden in subclasses.") + + def get_from_config(self, sim_id: int, setup) -> np.ndarray: + raise NotImplementedError("This method should be overridden in subclasses.") + + def write_to_file(self, sim_id: int, setup): + raise NotImplementedError("This method should be overridden in subclasses.") \ No newline at end of file From 7710987fc49420066fd1eba23fbc256718209bcc Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 28 Mar 2024 14:47:18 -0400 Subject: [PATCH 2/4] sunset the usage of seeding_ic --- flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py | 4 ++-- flepimop/gempyor_pkg/src/gempyor/model_info.py | 6 +++--- flepimop/gempyor_pkg/tests/seir/test_ic.py | 8 ++++---- flepimop/gempyor_pkg/tests/seir/test_seeding.py | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py b/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py index ebb29e1ed..500a2467c 100644 --- a/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py +++ b/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py @@ -35,8 +35,8 @@ out_prefix=prefix, ) -seeding_data = modinf.seedingAndIC.draw_seeding(sim_id=100, setup=modinf) -initial_conditions = modinf.seedingAndIC.draw_ic(sim_id=100, setup=modinf) +seeding_data = modinf.seeding.get_from_config(sim_id=100, setup=modinf) +initial_conditions = modinf.initial_conditions.get_from_config(sim_id=100, setup=modinf) mobility_subpop_indices = modinf.mobility.indices mobility_data_indices = modinf.mobility.indptr diff --git a/flepimop/gempyor_pkg/src/gempyor/model_info.py b/flepimop/gempyor_pkg/src/gempyor/model_info.py index 88141bbc1..6e4b019e5 100644 --- a/flepimop/gempyor_pkg/src/gempyor/model_info.py +++ b/flepimop/gempyor_pkg/src/gempyor/model_info.py @@ -1,6 +1,6 @@ import pandas as pd import datetime, os, logging, pathlib -from . import seeding_ic, subpopulation_structure, parameters, compartments, file_paths +from . import seeding, subpopulation_structure, parameters, compartments, file_paths, initial_conditions from .utils import read_df, write_df logger = logging.getLogger(__name__) @@ -115,8 +115,8 @@ def __init__( tf=self.tf, subpop_names=self.subpop_struct.subpop_names, ) - self.seeding = seeding_ic.SeedingFactory(config = self.seeding_config) - self.initial_conditions = seeding_ic.InitialConditionsFactory(config = self.initial_conditions_config) + self.seeding = seeding.SeedingFactory(config = self.seeding_config) + self.initial_conditions = initial_conditions.InitialConditionsFactory(config = self.initial_conditions_config) # really ugly references to the config globally here. if config["compartments"].exists() and self.seir_config is not None: self.compartments = compartments.Compartments( diff --git a/flepimop/gempyor_pkg/tests/seir/test_ic.py b/flepimop/gempyor_pkg/tests/seir/test_ic.py index 214ee5712..16f4abb30 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_ic.py +++ b/flepimop/gempyor_pkg/tests/seir/test_ic.py @@ -1,6 +1,6 @@ import os import pytest -from gempyor import seeding_ic, model_info +from gempyor import seeding, model_info, initial_conditions from gempyor.utils import config DATA_DIR = os.path.dirname(__file__) + "/data" @@ -21,7 +21,7 @@ def test_IC_success(self): outcome_modifiers_scenario=None, write_csv=False, ) - sic = seeding_ic.InitialConditionsFactory(config=s.initial_conditions_config) + sic = initial_conditions.InitialConditionsFactory(config=s.initial_conditions_config) assert sic.initial_conditions_config == s.initial_conditions_config def test_IC_allow_missing_node_compartments_success(self): @@ -40,7 +40,7 @@ def test_IC_allow_missing_node_compartments_success(self): s.initial_conditions_config["allow_missing_nodes"] = True s.initial_conditions_config["allow_missing_compartments"] = True - sic = seeding_ic.InitialConditionsFactory(config=s.initial_conditions_config) + sic = initial_conditions.InitialConditionsFactory(config=s.initial_conditions_config) assert sic.initial_conditions_config == s.initial_conditions_config initial_conditions = sic.get_from_config(sim_id=100, setup=s) @@ -61,6 +61,6 @@ def test_IC_IC_notImplemented_fail(self): write_csv=False, ) s.initial_conditions_config["method"] = "unknown" - sic = seeding_ic.InitialConditionsFactory(config=s.initial_conditions_config) + sic = initial_conditions.InitialConditionsFactory(config=s.initial_conditions_config) sic.get_from_config(sim_id=100, setup=s) diff --git a/flepimop/gempyor_pkg/tests/seir/test_seeding.py b/flepimop/gempyor_pkg/tests/seir/test_seeding.py index 34552ecce..b7a157012 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_seeding.py +++ b/flepimop/gempyor_pkg/tests/seir/test_seeding.py @@ -1,5 +1,5 @@ import os -from gempyor import seeding_ic, model_info +from gempyor import seeding, model_info from gempyor.utils import config DATA_DIR = os.path.dirname(__file__) + "/data" @@ -20,7 +20,7 @@ def test_Seeding_success(self): outcome_modifiers_scenario=None, write_csv=False, ) - sic = seeding_ic.SeedingFactory(config=s.seeding_config) + sic = seeding.SeedingFactory(config=s.seeding_config) assert sic.seeding_config == s.seeding_config def test_Seeding_draw_success(self): @@ -36,7 +36,7 @@ def test_Seeding_draw_success(self): outcome_modifiers_scenario=None, write_csv=False, ) - sic = seeding_ic.SeedingFactory(config=s.seeding_config) + sic = seeding.SeedingFactory(config=s.seeding_config) s.seeding_config["method"] = "NoSeeding" seeding = sic.get_from_config(sim_id=100, setup=s) From 1d07afcec2e1affc9a89f7012c61391e8d321e46 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Fri, 12 Apr 2024 09:07:57 -0400 Subject: [PATCH 3/4] update seeding- and initial_conditions- related tests --- .../src/gempyor/initial_conditions.py | 2 +- flepimop/gempyor_pkg/src/gempyor/seeding.py | 2 +- .../gempyor_pkg/src/gempyor/seeding_ic.py | 634 ------------------ .../src/gempyor/simulation_component.py | 4 + flepimop/gempyor_pkg/tests/seir/test_ic.py | 4 +- .../gempyor_pkg/tests/seir/test_seeding.py | 4 +- .../gempyor_pkg/tests/seir/test_seeding_ic.py | 106 --- 7 files changed, 9 insertions(+), 747 deletions(-) delete mode 100644 flepimop/gempyor_pkg/src/gempyor/seeding_ic.py delete mode 100644 flepimop/gempyor_pkg/tests/seir/test_seeding_ic.py diff --git a/flepimop/gempyor_pkg/src/gempyor/initial_conditions.py b/flepimop/gempyor_pkg/src/gempyor/initial_conditions.py index ed473523d..952e78fa8 100644 --- a/flepimop/gempyor_pkg/src/gempyor/initial_conditions.py +++ b/flepimop/gempyor_pkg/src/gempyor/initial_conditions.py @@ -5,7 +5,7 @@ from numba.typed import Dict import confuse import logging -from simulation_component import SimulationComponent +from .simulation_component import SimulationComponent from . import utils from .utils import read_df diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding.py b/flepimop/gempyor_pkg/src/gempyor/seeding.py index 258510e96..e917f10c2 100644 --- a/flepimop/gempyor_pkg/src/gempyor/seeding.py +++ b/flepimop/gempyor_pkg/src/gempyor/seeding.py @@ -4,7 +4,7 @@ import pandas as pd import confuse import logging -from simulation_component import SimulationComponent +from .simulation_component import SimulationComponent from . import utils import numba as nb diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py b/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py deleted file mode 100644 index fa2f3d162..000000000 --- a/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py +++ /dev/null @@ -1,634 +0,0 @@ -import pathlib -from typing import Dict, Any, Union - -import numpy as np -import pandas as pd -import pyarrow.parquet as pq -from numba.typed import Dict -from . import file_paths -import confuse -import logging -from . import compartments -from . import utils -import numba as nb -from .utils import read_df - -logger = logging.getLogger(__name__) - - -def _DataFrame2NumbaDict(df, amounts, setup) -> nb.typed.Dict: - if not df["date"].is_monotonic_increasing: - raise ValueError("_DataFrame2NumbaDict got an unsorted dataframe, exposing itself to non-sense") - - cmp_grp_names = [col for col in setup.compartments.compartments.columns if col != "name"] - seeding_dict: nb.typed.Dict = nb.typed.Dict.empty( - key_type=nb.types.unicode_type, - value_type=nb.types.int64[:], - ) - seeding_dict["seeding_sources"] = np.zeros(len(amounts), dtype=np.int64) - seeding_dict["seeding_destinations"] = np.zeros(len(amounts), dtype=np.int64) - seeding_dict["seeding_subpops"] = np.zeros(len(amounts), dtype=np.int64) - seeding_amounts = np.zeros(len(amounts), dtype=np.float64) - - nb_seed_perday = np.zeros(setup.n_days, dtype=np.int64) - - n_seeding_ignored_before = 0 - n_seeding_ignored_after = 0 - - # id_seed = 0 - for idx, (row_index, row) in enumerate(df.iterrows()): - if row["subpop"] not in setup.subpop_struct.subpop_names: - logging.debug( - f"Invalid subpop '{row['subpop']}' in row {row_index + 1} of seeding::lambda_file. Not found in geodata... Skipping" - ) - elif (row["date"].date() - setup.ti).days >= 0: - if (row["date"].date() - setup.ti).days < len(nb_seed_perday): - nb_seed_perday[(row["date"].date() - setup.ti).days] = ( - nb_seed_perday[(row["date"].date() - setup.ti).days] + 1 - ) - source_dict = {grp_name: row[f"source_{grp_name}"] for grp_name in cmp_grp_names} - destination_dict = {grp_name: row[f"destination_{grp_name}"] for grp_name in cmp_grp_names} - seeding_dict["seeding_sources"][idx] = setup.compartments.get_comp_idx( - source_dict, error_info=f"(seeding source at idx={idx}, row_index={row_index}, row=>>{row}<<)" - ) - seeding_dict["seeding_destinations"][idx] = setup.compartments.get_comp_idx( - destination_dict, - error_info=f"(seeding destination at idx={idx}, row_index={row_index}, row=>>{row}<<)", - ) - seeding_dict["seeding_subpops"][idx] = setup.subpop_struct.subpop_names.index(row["subpop"]) - seeding_amounts[idx] = amounts[idx] - # id_seed+=1 - else: - n_seeding_ignored_after += 1 - else: - n_seeding_ignored_before += 1 - - if n_seeding_ignored_before > 0: - logging.critical( - f"Seeding ignored {n_seeding_ignored_before} rows because they were before the start of the simulation." - ) - if n_seeding_ignored_after > 0: - logging.critical( - f"Seeding ignored {n_seeding_ignored_after} rows because they were after the end of the simulation." - ) - - day_start_idx = np.zeros(setup.n_days + 1, dtype=np.int64) - day_start_idx[1:] = np.cumsum(nb_seed_perday) - seeding_dict["day_start_idx"] = day_start_idx - - return seeding_dict, seeding_amounts - - -class SeedingAndIC: - def __init__( - self, - seeding_config: confuse.ConfigView, - initial_conditions_config: confuse.ConfigView, - ): - self.seeding_config = seeding_config - self.initial_conditions_config = initial_conditions_config - - def draw_ic(self, sim_id: int, setup) -> np.ndarray: - method = "Default" - if self.initial_conditions_config is not None and "method" in self.initial_conditions_config.keys(): - method = self.initial_conditions_config["method"].as_str() - - if method == "Default": - ## JK : This could be specified in the config - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - y0[0, :] = setup.subpop_pop - return y0 # we finish here: no rest and not proportionallity applies - - allow_missing_subpops = False - allow_missing_compartments = False - if "allow_missing_subpops" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["allow_missing_subpops"].get(): - allow_missing_subpops = True - if "allow_missing_compartments" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["allow_missing_compartments"].get(): - allow_missing_compartments = True - - # Places to allocate the rest of the population - rests = [] - - if method == "SetInitialConditions" or method == "SetInitialConditionsFolderDraw": - # TODO Think about - Does not support the new way of doing compartment indexing - if method == "SetInitialConditionsFolderDraw": - ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"], sim_id=sim_id) - else: - ic_df = read_df( - self.initial_conditions_config["initial_conditions_file"].get(), - ) - - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): # - if pl in list(ic_df["subpop"]): - states_pl = ic_df[ic_df["subpop"] == pl] - for comp_idx, comp_name in setup.compartments.compartments["name"].items(): - if "mc_name" in states_pl.columns: - ic_df_compartment_val = states_pl[states_pl["mc_name"] == comp_name]["amount"] - else: - filters = setup.compartments.compartments.iloc[comp_idx].drop("name") - ic_df_compartment = states_pl.copy() - for mc_name, mc_value in filters.items(): - ic_df_compartment = ic_df_compartment[ic_df_compartment["mc_" + mc_name] == mc_value][ - "amount" - ] - if len(ic_df_compartment_val) > 1: - raise ValueError( - f"ERROR: Several ({len(ic_df_compartment_val)}) rows are matches for compartment {comp_name} in init file: filters returned {ic_df_compartment_val}" - ) - elif ic_df_compartment_val.empty: - if allow_missing_compartments: - ic_df_compartment_val = 0.0 - else: - raise ValueError( - f"Initial Conditions: Could not set compartment {comp_name} (id: {comp_idx}) in subpop {pl} (id: {pl_idx}). The data from the init file is {states_pl}. \n \ - Use 'allow_missing_compartments' to default to 0 for compartments without initial conditions" - ) - if "rest" in str(ic_df_compartment_val).strip().lower(): - rests.append([comp_idx, pl_idx]) - else: - y0[comp_idx, pl_idx] = float(ic_df_compartment_val) - elif allow_missing_subpops: - logger.critical( - f"No initial conditions for for subpop {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartment ({setup.compartments.compartments['name'].iloc[0]})" - ) - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - y0[0, pl_idx] = 1.0 - else: - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - raise ValueError( - f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_subpops=TRUE to bypass this error" - ) - elif method == "InitialConditionsFolderDraw" or method == "FromFile": - if method == "InitialConditionsFolderDraw": - ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"].get(), sim_id=sim_id) - elif method == "FromFile": - ic_df = read_df( - self.initial_conditions_config["initial_conditions_file"].get(), - ) - - # annoying conversion because sometime the parquet columns get attributed a timezone... - ic_df["date"] = pd.to_datetime(ic_df["date"], utc=True) # force date to be UTC - ic_df["date"] = ic_df["date"].dt.date - ic_df["date"] = ic_df["date"].astype(str) - - ic_df = ic_df[(ic_df["date"] == str(setup.ti)) & (ic_df["mc_value_type"] == "prevalence")] - if ic_df.empty: - raise ValueError( - f"There is no entry for initial time ti in the provided initial_conditions::states_file." - ) - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - - for comp_idx, comp_name in setup.compartments.compartments["name"].items(): - # rely on all the mc's instead of mc_name to avoid errors due to e.g order. - # before: only - # ic_df_compartment = ic_df[ic_df["mc_name"] == comp_name] - filters = setup.compartments.compartments.iloc[comp_idx].drop("name") - ic_df_compartment = ic_df.copy() - for mc_name, mc_value in filters.items(): - ic_df_compartment = ic_df_compartment[ic_df_compartment["mc_" + mc_name] == mc_value] - - if len(ic_df_compartment) > 1: - # ic_df_compartment = ic_df_compartment.iloc[0] - raise ValueError( - f"ERROR: Several ({len(ic_df_compartment)}) rows are matches for compartment {mc_name} in init file: filter {filters} returned {ic_df_compartment}" - ) - elif ic_df_compartment.empty: - if allow_missing_compartments: - ic_df_compartment = pd.DataFrame(0, columns=ic_df_compartment.columns, index=[0]) - else: - raise ValueError( - f"Initial Conditions: Could not set compartment {comp_name} (id: {comp_idx}) in subpop {pl} (id: {pl_idx}). The data from the init file is {ic_df_compartment[pl]}." - ) - elif ic_df_compartment["mc_name"].iloc[0] != comp_name: - print( - f"WARNING: init file mc_name {ic_df_compartment['mc_name'].iloc[0]} does not match compartment mc_name {comp_name}" - ) - - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): - if pl in ic_df.columns: - y0[comp_idx, pl_idx] = float(ic_df_compartment[pl]) - elif allow_missing_subpops: - logger.critical( - f"No initial conditions for for subpop {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartments ({setup.compartments.compartments['name'].iloc[0]})" - ) - if "proportion" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportion"].get(): - y0[0, pl_idx] = 1.0 - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - raise ValueError( - f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_subpops=TRUE to bypass this error" - ) - else: - raise NotImplementedError(f"unknown initial conditions method [got: {method}]") - - # rest - if rests: # not empty - for comp_idx, pl_idx in rests: - total = setup.subpop_pop[pl_idx] - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - total = 1.0 - y0[comp_idx, pl_idx] = total - y0[:, pl_idx].sum() - - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - y0 = y0 * setup.subpop_pop[pl_idx] - - # check that the inputed values sums to the subpop population: - error = False - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): - n_y0 = y0[:, pl_idx].sum() - n_pop = setup.subpop_pop[pl_idx] - if abs(n_y0 - n_pop) > 1: - error = True - print( - f"ERROR: subpop_names {pl} (idx: pl_idx) has a population from initial condition of {n_y0} while population from geodata is {n_pop} (absolute difference should be < 1, here is {abs(n_y0-n_pop)})" - ) - ignore_population_checks = False - if "ignore_population_checks" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["ignore_population_checks"].get(): - ignore_population_checks = True - if error and not ignore_population_checks: - raise ValueError( - f""" geodata and initial condition do not agree on population size (see messages above). Use ignore_population_checks: True to ignore""" - ) - elif error and ignore_population_checks: - print( - """ Ignoring the previous population mismatch errors because you added flag 'ignore_population_checks'. This is dangerous""" - ) - return y0 - - def draw_seeding(self, sim_id: int, setup) -> nb.typed.Dict: - method = "NoSeeding" - if self.seeding_config is not None and "method" in self.seeding_config.keys(): - method = self.seeding_config["method"].as_str() - - if method == "NegativeBinomialDistributed" or method == "PoissonDistributed": - seeding = pd.read_csv( - self.seeding_config["lambda_file"].as_str(), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - dupes = seeding[seeding.duplicated(["subpop", "date"])].index + 1 - if not dupes.empty: - raise ValueError(f"Repeated subpop-date in rows {dupes.tolist()} of seeding::lambda_file.") - elif method == "FolderDraw": - seeding = pd.read_csv( - setup.get_input_filename( - ftype=setup.seeding_config["seeding_file_type"].get(), - sim_id=sim_id, - extension_override="csv", - ), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - elif method == "FromFile": - seeding = pd.read_csv( - self.seeding_config["seeding_file"].get(), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - elif method == "NoSeeding": - seeding = pd.DataFrame(columns=["date", "subpop"]) - return _DataFrame2NumbaDict(df=seeding, amounts=[], setup=setup) - else: - raise NotImplementedError(f"unknown seeding method [got: {method}]") - - # Sorting by date is very important here for the seeding format necessary !!!! - # print(seeding.shape) - seeding = seeding.sort_values(by="date", axis="index").reset_index() - # print(seeding) - mask = (seeding["date"].dt.date > setup.ti) & (seeding["date"].dt.date <= setup.tf) - seeding = seeding.loc[mask].reset_index() - # print(seeding.shape) - # print(seeding) - - # TODO: print. - - amounts = np.zeros(len(seeding)) - if method == "PoissonDistributed": - amounts = np.random.poisson(seeding["amount"]) - elif method == "NegativeBinomialDistributed": - raise ValueError("Seeding method 'NegativeBinomialDistributed' is not supported by flepiMoP anymore.") - amounts = np.random.negative_binomial(n=5, p=5 / (seeding["amount"] + 5)) - elif method == "FolderDraw" or method == "FromFile": - amounts = seeding["amount"] - - return _DataFrame2NumbaDict(df=seeding, amounts=amounts, setup=setup) - - def load_seeding(self, sim_id: int, setup) -> nb.typed.Dict: - """only difference with draw seeding is that the sim_id is now sim_id2load""" - return self.draw_seeding(sim_id=sim_id, setup=setup) - - def load_ic(self, sim_id: int, setup) -> nb.typed.Dict: - return self.draw_ic(sim_id=sim_id, setup=setup) - - # Write seeding used to file - def seeding_write(self, seeding, fname, extension): - raise NotImplementedError(f"It is not yet possible to write the seeding to a file") - - -class SimulationComponent: - def __init__(self, config: confuse.ConfigView): - raise NotImplementedError("This method should be overridden in subclasses.") - - def get_from_file(self, sim_id: int, setup) -> np.ndarray: - raise NotImplementedError("This method should be overridden in subclasses.") - - def get_from_config(self, sim_id: int, setup) -> np.ndarray: - raise NotImplementedError("This method should be overridden in subclasses.") - - def write_to_file(self, sim_id: int, setup): - raise NotImplementedError("This method should be overridden in subclasses.") - - -class Seeding(SimulationComponent): - def __init__(self, config: confuse.ConfigView): - self.seeding_config = config - - def get_from_config(self, sim_id: int, setup) -> nb.typed.Dict: - method = "NoSeeding" - if self.seeding_config is not None and "method" in self.seeding_config.keys(): - method = self.seeding_config["method"].as_str() - - if method == "NegativeBinomialDistributed" or method == "PoissonDistributed": - seeding = pd.read_csv( - self.seeding_config["lambda_file"].as_str(), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - dupes = seeding[seeding.duplicated(["subpop", "date"])].index + 1 - if not dupes.empty: - raise ValueError(f"Repeated subpop-date in rows {dupes.tolist()} of seeding::lambda_file.") - elif method == "FolderDraw": - seeding = pd.read_csv( - setup.get_input_filename( - ftype=setup.seeding_config["seeding_file_type"].get(), - sim_id=sim_id, - extension_override="csv", - ), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - elif method == "FromFile": - seeding = pd.read_csv( - self.seeding_config["seeding_file"].get(), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - elif method == "NoSeeding": - seeding = pd.DataFrame(columns=["date", "subpop"]) - return _DataFrame2NumbaDict(df=seeding, amounts=[], setup=setup) - else: - raise NotImplementedError(f"unknown seeding method [got: {method}]") - - # Sorting by date is very important here for the seeding format necessary !!!! - # print(seeding.shape) - seeding = seeding.sort_values(by="date", axis="index").reset_index() - # print(seeding) - mask = (seeding["date"].dt.date > setup.ti) & (seeding["date"].dt.date <= setup.tf) - seeding = seeding.loc[mask].reset_index() - # print(seeding.shape) - # print(seeding) - - # TODO: print. - - amounts = np.zeros(len(seeding)) - if method == "PoissonDistributed": - amounts = np.random.poisson(seeding["amount"]) - elif method == "NegativeBinomialDistributed": - raise ValueError("Seeding method 'NegativeBinomialDistributed' is not supported by flepiMoP anymore.") - amounts = np.random.negative_binomial(n=5, p=5 / (seeding["amount"] + 5)) - elif method == "FolderDraw" or method == "FromFile": - amounts = seeding["amount"] - - return _DataFrame2NumbaDict(df=seeding, amounts=amounts, setup=setup) - - def get_from_file(self, sim_id: int, setup) -> nb.typed.Dict: - """only difference with draw seeding is that the sim_id is now sim_id2load""" - return self.get_from_config(sim_id=sim_id, setup=setup) - -class InitialConditions(SimulationComponent): - def __init__(self, config: confuse.ConfigView): - self.initial_conditions_config = config - - def get_from_config(self, sim_id: int, setup) -> np.ndarray: - method = "Default" - if self.initial_conditions_config is not None and "method" in self.initial_conditions_config.keys(): - method = self.initial_conditions_config["method"].as_str() - - if method == "Default": - ## JK : This could be specified in the config - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - y0[0, :] = setup.subpop_pop - return y0 # we finish here: no rest and not proportionallity applies - - allow_missing_subpops = False - allow_missing_compartments = False - if "allow_missing_subpops" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["allow_missing_subpops"].get(): - allow_missing_subpops = True - if "allow_missing_compartments" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["allow_missing_compartments"].get(): - allow_missing_compartments = True - - # Places to allocate the rest of the population - rests = [] - - if method == "SetInitialConditions" or method == "SetInitialConditionsFolderDraw": - # TODO Think about - Does not support the new way of doing compartment indexing - if method == "SetInitialConditionsFolderDraw": - ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"], sim_id=sim_id) - else: - ic_df = read_df( - self.initial_conditions_config["initial_conditions_file"].get(), - ) - - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): # - if pl in list(ic_df["subpop"]): - states_pl = ic_df[ic_df["subpop"] == pl] - for comp_idx, comp_name in setup.compartments.compartments["name"].items(): - if "mc_name" in states_pl.columns: - ic_df_compartment_val = states_pl[states_pl["mc_name"] == comp_name]["amount"] - else: - filters = setup.compartments.compartments.iloc[comp_idx].drop("name") - ic_df_compartment_val = states_pl.copy() - for mc_name, mc_value in filters.items(): - ic_df_compartment_val = ic_df_compartment_val[ic_df_compartment_val["mc_" + mc_name] == mc_value][ - "amount" - ] - if len(ic_df_compartment_val) > 1: - raise ValueError( - f"ERROR: Several ({len(ic_df_compartment_val)}) rows are matches for compartment {comp_name} in init file: filters returned {ic_df_compartment_val}" - ) - elif ic_df_compartment_val.empty: - if allow_missing_compartments: - ic_df_compartment_val = 0.0 - else: - raise ValueError( - f"Initial Conditions: Could not set compartment {comp_name} (id: {comp_idx}) in subpop {pl} (id: {pl_idx}). The data from the init file is {states_pl}. \n \ - Use 'allow_missing_compartments' to default to 0 for compartments without initial conditions" - ) - if "rest" in str(ic_df_compartment_val).strip().lower(): - rests.append([comp_idx, pl_idx]) - else: - if isinstance(ic_df_compartment_val, pd.Series): # it can also be float if we allow allow_missing_compartments - ic_df_compartment_val = float(ic_df_compartment_val.iloc[0]) - y0[comp_idx, pl_idx] = float(ic_df_compartment_val) - elif allow_missing_subpops: - logger.critical( - f"No initial conditions for for subpop {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartment ({setup.compartments.compartments['name'].iloc[0]})" - ) - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - y0[0, pl_idx] = 1.0 - else: - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - raise ValueError( - f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_subpops=TRUE to bypass this error" - ) - elif method == "InitialConditionsFolderDraw" or method == "FromFile": - if method == "InitialConditionsFolderDraw": - ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"].get(), sim_id=sim_id) - elif method == "FromFile": - ic_df = read_df( - self.initial_conditions_config["initial_conditions_file"].get(), - ) - - # annoying conversion because sometime the parquet columns get attributed a timezone... - ic_df["date"] = pd.to_datetime(ic_df["date"], utc=True) # force date to be UTC - ic_df["date"] = ic_df["date"].dt.date - ic_df["date"] = ic_df["date"].astype(str) - - ic_df = ic_df[(ic_df["date"] == str(setup.ti)) & (ic_df["mc_value_type"] == "prevalence")] - if ic_df.empty: - raise ValueError( - f"There is no entry for initial time ti in the provided initial_conditions::states_file." - ) - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - - for comp_idx, comp_name in setup.compartments.compartments["name"].items(): - # rely on all the mc's instead of mc_name to avoid errors due to e.g order. - # before: only - # ic_df_compartment = ic_df[ic_df["mc_name"] == comp_name] - filters = setup.compartments.compartments.iloc[comp_idx].drop("name") - ic_df_compartment = ic_df.copy() - for mc_name, mc_value in filters.items(): - ic_df_compartment = ic_df_compartment[ic_df_compartment["mc_" + mc_name] == mc_value] - - if len(ic_df_compartment) > 1: - # ic_df_compartment = ic_df_compartment.iloc[0] - raise ValueError( - f"ERROR: Several ({len(ic_df_compartment)}) rows are matches for compartment {mc_name} in init file: filter {filters} returned {ic_df_compartment}" - ) - elif ic_df_compartment.empty: - if allow_missing_compartments: - ic_df_compartment = pd.DataFrame(0, columns=ic_df_compartment.columns, index=[0]) - else: - raise ValueError( - f"Initial Conditions: Could not set compartment {comp_name} (id: {comp_idx}) in subpop {pl} (id: {pl_idx}). The data from the init file is {ic_df_compartment[pl]}." - ) - elif ic_df_compartment["mc_name"].iloc[0] != comp_name: - print( - f"WARNING: init file mc_name {ic_df_compartment['mc_name'].iloc[0]} does not match compartment mc_name {comp_name}" - ) - - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): - if pl in ic_df.columns: - y0[comp_idx, pl_idx] = float(ic_df_compartment[pl].iloc[0]) - elif allow_missing_subpops: - logger.critical( - f"No initial conditions for for subpop {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartments ({setup.compartments.compartments['name'].iloc[0]})" - ) - if "proportion" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportion"].get(): - y0[0, pl_idx] = 1.0 - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - raise ValueError( - f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_subpops=TRUE to bypass this error" - ) - else: - raise NotImplementedError(f"unknown initial conditions method [got: {method}]") - - # rest - if rests: # not empty - for comp_idx, pl_idx in rests: - total = setup.subpop_pop[pl_idx] - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - total = 1.0 - y0[comp_idx, pl_idx] = total - y0[:, pl_idx].sum() - - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - y0 = y0 * setup.subpop_pop - - # check that the inputed values sums to the subpop population: - error = False - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): - n_y0 = y0[:, pl_idx].sum() - n_pop = setup.subpop_pop[pl_idx] - if abs(n_y0 - n_pop) > 1: - error = True - print( - f"ERROR: subpop_names {pl} (idx: pl_idx) has a population from initial condition of {n_y0} while population from geodata is {n_pop} (absolute difference should be < 1, here is {abs(n_y0-n_pop)})" - ) - ignore_population_checks = False - if "ignore_population_checks" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["ignore_population_checks"].get(): - ignore_population_checks = True - if error and not ignore_population_checks: - raise ValueError( - f""" geodata and initial condition do not agree on population size (see messages above). Use ignore_population_checks: True to ignore""" - ) - elif error and ignore_population_checks: - print( - """ Ignoring the previous population mismatch errors because you added flag 'ignore_population_checks'. This is dangerous""" - ) - return y0 - - def get_from_file(self, sim_id: int, setup) -> np.ndarray: - return self.get_from_config(sim_id=sim_id, setup=setup) - -# TODO: rename config to initial_conditions_config as it shadows the global config - -def InitialConditionsFactory(config: confuse.ConfigView): - if config is not None and "method" in config.keys(): - if config["method"].as_str() == "plugin": - klass = utils.search_and_import_plugins_class( - plugin_file_path=config["plugin_file_path"].as_str(), - class_name="InitialConditions", - config=config - ) - return klass - return InitialConditions(config) - -def SeedingFactory(config: confuse.ConfigView): - if config is not None and "method" in config.keys(): - if config["method"].as_str() == "plugin": - klass = utils.search_and_import_plugins_class( - plugin_file_path=config["plugin_file_path"].as_str(), - class_name="Seeding", - config=config - ) - return klass - return Seeding(config) \ No newline at end of file diff --git a/flepimop/gempyor_pkg/src/gempyor/simulation_component.py b/flepimop/gempyor_pkg/src/gempyor/simulation_component.py index 2c2d181b9..8fb26453e 100644 --- a/flepimop/gempyor_pkg/src/gempyor/simulation_component.py +++ b/flepimop/gempyor_pkg/src/gempyor/simulation_component.py @@ -1,3 +1,7 @@ +import confuse +import numpy as np + + class SimulationComponent: def __init__(self, config: confuse.ConfigView): raise NotImplementedError("This method should be overridden in subclasses.") diff --git a/flepimop/gempyor_pkg/tests/seir/test_ic.py b/flepimop/gempyor_pkg/tests/seir/test_ic.py index 16f4abb30..3cfcc369a 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_ic.py +++ b/flepimop/gempyor_pkg/tests/seir/test_ic.py @@ -41,10 +41,8 @@ def test_IC_allow_missing_node_compartments_success(self): s.initial_conditions_config["allow_missing_nodes"] = True s.initial_conditions_config["allow_missing_compartments"] = True sic = initial_conditions.InitialConditionsFactory(config=s.initial_conditions_config) - assert sic.initial_conditions_config == s.initial_conditions_config + sic.get_from_config(sim_id=100, setup=s) - initial_conditions = sic.get_from_config(sim_id=100, setup=s) - print(initial_conditions) def test_IC_IC_notImplemented_fail(self): with pytest.raises(NotImplementedError, match=r".*unknown.*initial.*conditions.*"): diff --git a/flepimop/gempyor_pkg/tests/seir/test_seeding.py b/flepimop/gempyor_pkg/tests/seir/test_seeding.py index b7a157012..f3a2e4d4a 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_seeding.py +++ b/flepimop/gempyor_pkg/tests/seir/test_seeding.py @@ -39,5 +39,5 @@ def test_Seeding_draw_success(self): sic = seeding.SeedingFactory(config=s.seeding_config) s.seeding_config["method"] = "NoSeeding" - seeding = sic.get_from_config(sim_id=100, setup=s) - print(seeding) + seeding_result = sic.get_from_config(sim_id=100, setup=s) + print(seeding_result) diff --git a/flepimop/gempyor_pkg/tests/seir/test_seeding_ic.py b/flepimop/gempyor_pkg/tests/seir/test_seeding_ic.py deleted file mode 100644 index eaf28a144..000000000 --- a/flepimop/gempyor_pkg/tests/seir/test_seeding_ic.py +++ /dev/null @@ -1,106 +0,0 @@ -import numpy as np -import os -import pytest -import warnings -import shutil - -import pathlib -import pyarrow as pa -import pyarrow.parquet as pq - -from gempyor import seir, NPI, file_paths, seeding_ic, model_info - -from gempyor.utils import config - -DATA_DIR = os.path.dirname(__file__) + "/data" -os.chdir(os.path.dirname(__file__)) - - -class TestSeedingAndIC: - def test_SeedingAndIC_success(self): - config.clear() - config.read(user=False) - config.set_file(f"{DATA_DIR}/config.yml") - - s = model_info.ModelInfo( - config=config, - setup_name="test_seeding and ic", - nslots=1, - seir_modifiers_scenario=None, - outcome_modifiers_scenario=None, - write_csv=False, - ) - sic = seeding_ic.SeedingAndIC( - seeding_config=s.seeding_config, initial_conditions_config=s.initial_conditions_config - ) - assert sic.seeding_config == s.seeding_config - assert sic.initial_conditions_config == s.initial_conditions_config - - def test_SeedingAndIC_allow_missing_node_compartments_success(self): - config.clear() - config.read(user=False) - config.set_file(f"{DATA_DIR}/config.yml") - - s = model_info.ModelInfo( - config=config, - setup_name="test_seeding and ic", - nslots=1, - seir_modifiers_scenario=None, - outcome_modifiers_scenario=None, - write_csv=False, - ) - - s.initial_conditions_config["allow_missing_nodes"] = True - s.initial_conditions_config["allow_missing_compartments"] = True - sic = seeding_ic.SeedingAndIC( - seeding_config=s.seeding_config, initial_conditions_config=s.initial_conditions_config - ) - - initial_conditions = sic.draw_ic(sim_id=100, setup=s) - - # print(initial_conditions) - # integration_method = "legacy" - - def test_SeedingAndIC_IC_notImplemented_fail(self): - with pytest.raises(NotImplementedError, match=r".*unknown.*initial.*conditions.*"): - config.clear() - config.read(user=False) - config.set_file(f"{DATA_DIR}/config.yml") - - s = model_info.ModelInfo( - config=config, - setup_name="test_seeding and ic", - nslots=1, - seir_modifiers_scenario=None, - outcome_modifiers_scenario=None, - write_csv=False, - ) - s.initial_conditions_config["method"] = "unknown" - sic = seeding_ic.SeedingAndIC( - seeding_config=s.seeding_config, initial_conditions_config=s.initial_conditions_config - ) - - sic.draw_ic(sim_id=100, setup=s) - - def test_SeedingAndIC_draw_seeding_success(self): - config.clear() - config.read(user=False) - config.set_file(f"{DATA_DIR}/config.yml") - - s = model_info.ModelInfo( - config=config, - setup_name="test_seeding and ic", - nslots=1, - seir_modifiers_scenario=None, - outcome_modifiers_scenario=None, - write_csv=False, - ) - sic = seeding_ic.SeedingAndIC( - seeding_config=s.seeding_config, initial_conditions_config=s.initial_conditions_config - ) - s.seeding_config["method"] = "NoSeeding" - - seeding = sic.draw_seeding(sim_id=100, setup=s) - print(seeding) - - # print(initial_conditions) From 48ad666700db8ecccaf94292727beeec54b8d056 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Mon, 15 Apr 2024 13:59:02 -0400 Subject: [PATCH 4/4] add Error and remove unreachable code --- flepimop/gempyor_pkg/src/gempyor/seeding.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding.py b/flepimop/gempyor_pkg/src/gempyor/seeding.py index e917f10c2..9c3b8e770 100644 --- a/flepimop/gempyor_pkg/src/gempyor/seeding.py +++ b/flepimop/gempyor_pkg/src/gempyor/seeding.py @@ -132,9 +132,10 @@ def get_from_config(self, sim_id: int, setup) -> nb.typed.Dict: amounts = np.random.poisson(seeding["amount"]) elif method == "NegativeBinomialDistributed": raise ValueError("Seeding method 'NegativeBinomialDistributed' is not supported by flepiMoP anymore.") - amounts = np.random.negative_binomial(n=5, p=5 / (seeding["amount"] + 5)) elif method == "FolderDraw" or method == "FromFile": amounts = seeding["amount"] + else: + raise ValueError(f"Unknown seeding method: {method}") return _DataFrame2NumbaDict(df=seeding, amounts=amounts, setup=setup)