From 1d07afcec2e1affc9a89f7012c61391e8d321e46 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Fri, 12 Apr 2024 09:07:57 -0400 Subject: [PATCH] update seeding- and initial_conditions- related tests --- .../src/gempyor/initial_conditions.py | 2 +- flepimop/gempyor_pkg/src/gempyor/seeding.py | 2 +- .../gempyor_pkg/src/gempyor/seeding_ic.py | 634 ------------------ .../src/gempyor/simulation_component.py | 4 + flepimop/gempyor_pkg/tests/seir/test_ic.py | 4 +- .../gempyor_pkg/tests/seir/test_seeding.py | 4 +- .../gempyor_pkg/tests/seir/test_seeding_ic.py | 106 --- 7 files changed, 9 insertions(+), 747 deletions(-) delete mode 100644 flepimop/gempyor_pkg/src/gempyor/seeding_ic.py delete mode 100644 flepimop/gempyor_pkg/tests/seir/test_seeding_ic.py diff --git a/flepimop/gempyor_pkg/src/gempyor/initial_conditions.py b/flepimop/gempyor_pkg/src/gempyor/initial_conditions.py index ed473523d..952e78fa8 100644 --- a/flepimop/gempyor_pkg/src/gempyor/initial_conditions.py +++ b/flepimop/gempyor_pkg/src/gempyor/initial_conditions.py @@ -5,7 +5,7 @@ from numba.typed import Dict import confuse import logging -from simulation_component import SimulationComponent +from .simulation_component import SimulationComponent from . import utils from .utils import read_df diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding.py b/flepimop/gempyor_pkg/src/gempyor/seeding.py index 258510e96..e917f10c2 100644 --- a/flepimop/gempyor_pkg/src/gempyor/seeding.py +++ b/flepimop/gempyor_pkg/src/gempyor/seeding.py @@ -4,7 +4,7 @@ import pandas as pd import confuse import logging -from simulation_component import SimulationComponent +from .simulation_component import SimulationComponent from . import utils import numba as nb diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py b/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py deleted file mode 100644 index fa2f3d162..000000000 --- a/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py +++ /dev/null @@ -1,634 +0,0 @@ -import pathlib -from typing import Dict, Any, Union - -import numpy as np -import pandas as pd -import pyarrow.parquet as pq -from numba.typed import Dict -from . import file_paths -import confuse -import logging -from . import compartments -from . import utils -import numba as nb -from .utils import read_df - -logger = logging.getLogger(__name__) - - -def _DataFrame2NumbaDict(df, amounts, setup) -> nb.typed.Dict: - if not df["date"].is_monotonic_increasing: - raise ValueError("_DataFrame2NumbaDict got an unsorted dataframe, exposing itself to non-sense") - - cmp_grp_names = [col for col in setup.compartments.compartments.columns if col != "name"] - seeding_dict: nb.typed.Dict = nb.typed.Dict.empty( - key_type=nb.types.unicode_type, - value_type=nb.types.int64[:], - ) - seeding_dict["seeding_sources"] = np.zeros(len(amounts), dtype=np.int64) - seeding_dict["seeding_destinations"] = np.zeros(len(amounts), dtype=np.int64) - seeding_dict["seeding_subpops"] = np.zeros(len(amounts), dtype=np.int64) - seeding_amounts = np.zeros(len(amounts), dtype=np.float64) - - nb_seed_perday = np.zeros(setup.n_days, dtype=np.int64) - - n_seeding_ignored_before = 0 - n_seeding_ignored_after = 0 - - # id_seed = 0 - for idx, (row_index, row) in enumerate(df.iterrows()): - if row["subpop"] not in setup.subpop_struct.subpop_names: - logging.debug( - f"Invalid subpop '{row['subpop']}' in row {row_index + 1} of seeding::lambda_file. Not found in geodata... Skipping" - ) - elif (row["date"].date() - setup.ti).days >= 0: - if (row["date"].date() - setup.ti).days < len(nb_seed_perday): - nb_seed_perday[(row["date"].date() - setup.ti).days] = ( - nb_seed_perday[(row["date"].date() - setup.ti).days] + 1 - ) - source_dict = {grp_name: row[f"source_{grp_name}"] for grp_name in cmp_grp_names} - destination_dict = {grp_name: row[f"destination_{grp_name}"] for grp_name in cmp_grp_names} - seeding_dict["seeding_sources"][idx] = setup.compartments.get_comp_idx( - source_dict, error_info=f"(seeding source at idx={idx}, row_index={row_index}, row=>>{row}<<)" - ) - seeding_dict["seeding_destinations"][idx] = setup.compartments.get_comp_idx( - destination_dict, - error_info=f"(seeding destination at idx={idx}, row_index={row_index}, row=>>{row}<<)", - ) - seeding_dict["seeding_subpops"][idx] = setup.subpop_struct.subpop_names.index(row["subpop"]) - seeding_amounts[idx] = amounts[idx] - # id_seed+=1 - else: - n_seeding_ignored_after += 1 - else: - n_seeding_ignored_before += 1 - - if n_seeding_ignored_before > 0: - logging.critical( - f"Seeding ignored {n_seeding_ignored_before} rows because they were before the start of the simulation." - ) - if n_seeding_ignored_after > 0: - logging.critical( - f"Seeding ignored {n_seeding_ignored_after} rows because they were after the end of the simulation." - ) - - day_start_idx = np.zeros(setup.n_days + 1, dtype=np.int64) - day_start_idx[1:] = np.cumsum(nb_seed_perday) - seeding_dict["day_start_idx"] = day_start_idx - - return seeding_dict, seeding_amounts - - -class SeedingAndIC: - def __init__( - self, - seeding_config: confuse.ConfigView, - initial_conditions_config: confuse.ConfigView, - ): - self.seeding_config = seeding_config - self.initial_conditions_config = initial_conditions_config - - def draw_ic(self, sim_id: int, setup) -> np.ndarray: - method = "Default" - if self.initial_conditions_config is not None and "method" in self.initial_conditions_config.keys(): - method = self.initial_conditions_config["method"].as_str() - - if method == "Default": - ## JK : This could be specified in the config - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - y0[0, :] = setup.subpop_pop - return y0 # we finish here: no rest and not proportionallity applies - - allow_missing_subpops = False - allow_missing_compartments = False - if "allow_missing_subpops" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["allow_missing_subpops"].get(): - allow_missing_subpops = True - if "allow_missing_compartments" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["allow_missing_compartments"].get(): - allow_missing_compartments = True - - # Places to allocate the rest of the population - rests = [] - - if method == "SetInitialConditions" or method == "SetInitialConditionsFolderDraw": - # TODO Think about - Does not support the new way of doing compartment indexing - if method == "SetInitialConditionsFolderDraw": - ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"], sim_id=sim_id) - else: - ic_df = read_df( - self.initial_conditions_config["initial_conditions_file"].get(), - ) - - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): # - if pl in list(ic_df["subpop"]): - states_pl = ic_df[ic_df["subpop"] == pl] - for comp_idx, comp_name in setup.compartments.compartments["name"].items(): - if "mc_name" in states_pl.columns: - ic_df_compartment_val = states_pl[states_pl["mc_name"] == comp_name]["amount"] - else: - filters = setup.compartments.compartments.iloc[comp_idx].drop("name") - ic_df_compartment = states_pl.copy() - for mc_name, mc_value in filters.items(): - ic_df_compartment = ic_df_compartment[ic_df_compartment["mc_" + mc_name] == mc_value][ - "amount" - ] - if len(ic_df_compartment_val) > 1: - raise ValueError( - f"ERROR: Several ({len(ic_df_compartment_val)}) rows are matches for compartment {comp_name} in init file: filters returned {ic_df_compartment_val}" - ) - elif ic_df_compartment_val.empty: - if allow_missing_compartments: - ic_df_compartment_val = 0.0 - else: - raise ValueError( - f"Initial Conditions: Could not set compartment {comp_name} (id: {comp_idx}) in subpop {pl} (id: {pl_idx}). The data from the init file is {states_pl}. \n \ - Use 'allow_missing_compartments' to default to 0 for compartments without initial conditions" - ) - if "rest" in str(ic_df_compartment_val).strip().lower(): - rests.append([comp_idx, pl_idx]) - else: - y0[comp_idx, pl_idx] = float(ic_df_compartment_val) - elif allow_missing_subpops: - logger.critical( - f"No initial conditions for for subpop {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartment ({setup.compartments.compartments['name'].iloc[0]})" - ) - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - y0[0, pl_idx] = 1.0 - else: - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - raise ValueError( - f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_subpops=TRUE to bypass this error" - ) - elif method == "InitialConditionsFolderDraw" or method == "FromFile": - if method == "InitialConditionsFolderDraw": - ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"].get(), sim_id=sim_id) - elif method == "FromFile": - ic_df = read_df( - self.initial_conditions_config["initial_conditions_file"].get(), - ) - - # annoying conversion because sometime the parquet columns get attributed a timezone... - ic_df["date"] = pd.to_datetime(ic_df["date"], utc=True) # force date to be UTC - ic_df["date"] = ic_df["date"].dt.date - ic_df["date"] = ic_df["date"].astype(str) - - ic_df = ic_df[(ic_df["date"] == str(setup.ti)) & (ic_df["mc_value_type"] == "prevalence")] - if ic_df.empty: - raise ValueError( - f"There is no entry for initial time ti in the provided initial_conditions::states_file." - ) - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - - for comp_idx, comp_name in setup.compartments.compartments["name"].items(): - # rely on all the mc's instead of mc_name to avoid errors due to e.g order. - # before: only - # ic_df_compartment = ic_df[ic_df["mc_name"] == comp_name] - filters = setup.compartments.compartments.iloc[comp_idx].drop("name") - ic_df_compartment = ic_df.copy() - for mc_name, mc_value in filters.items(): - ic_df_compartment = ic_df_compartment[ic_df_compartment["mc_" + mc_name] == mc_value] - - if len(ic_df_compartment) > 1: - # ic_df_compartment = ic_df_compartment.iloc[0] - raise ValueError( - f"ERROR: Several ({len(ic_df_compartment)}) rows are matches for compartment {mc_name} in init file: filter {filters} returned {ic_df_compartment}" - ) - elif ic_df_compartment.empty: - if allow_missing_compartments: - ic_df_compartment = pd.DataFrame(0, columns=ic_df_compartment.columns, index=[0]) - else: - raise ValueError( - f"Initial Conditions: Could not set compartment {comp_name} (id: {comp_idx}) in subpop {pl} (id: {pl_idx}). The data from the init file is {ic_df_compartment[pl]}." - ) - elif ic_df_compartment["mc_name"].iloc[0] != comp_name: - print( - f"WARNING: init file mc_name {ic_df_compartment['mc_name'].iloc[0]} does not match compartment mc_name {comp_name}" - ) - - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): - if pl in ic_df.columns: - y0[comp_idx, pl_idx] = float(ic_df_compartment[pl]) - elif allow_missing_subpops: - logger.critical( - f"No initial conditions for for subpop {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartments ({setup.compartments.compartments['name'].iloc[0]})" - ) - if "proportion" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportion"].get(): - y0[0, pl_idx] = 1.0 - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - raise ValueError( - f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_subpops=TRUE to bypass this error" - ) - else: - raise NotImplementedError(f"unknown initial conditions method [got: {method}]") - - # rest - if rests: # not empty - for comp_idx, pl_idx in rests: - total = setup.subpop_pop[pl_idx] - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - total = 1.0 - y0[comp_idx, pl_idx] = total - y0[:, pl_idx].sum() - - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - y0 = y0 * setup.subpop_pop[pl_idx] - - # check that the inputed values sums to the subpop population: - error = False - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): - n_y0 = y0[:, pl_idx].sum() - n_pop = setup.subpop_pop[pl_idx] - if abs(n_y0 - n_pop) > 1: - error = True - print( - f"ERROR: subpop_names {pl} (idx: pl_idx) has a population from initial condition of {n_y0} while population from geodata is {n_pop} (absolute difference should be < 1, here is {abs(n_y0-n_pop)})" - ) - ignore_population_checks = False - if "ignore_population_checks" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["ignore_population_checks"].get(): - ignore_population_checks = True - if error and not ignore_population_checks: - raise ValueError( - f""" geodata and initial condition do not agree on population size (see messages above). Use ignore_population_checks: True to ignore""" - ) - elif error and ignore_population_checks: - print( - """ Ignoring the previous population mismatch errors because you added flag 'ignore_population_checks'. This is dangerous""" - ) - return y0 - - def draw_seeding(self, sim_id: int, setup) -> nb.typed.Dict: - method = "NoSeeding" - if self.seeding_config is not None and "method" in self.seeding_config.keys(): - method = self.seeding_config["method"].as_str() - - if method == "NegativeBinomialDistributed" or method == "PoissonDistributed": - seeding = pd.read_csv( - self.seeding_config["lambda_file"].as_str(), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - dupes = seeding[seeding.duplicated(["subpop", "date"])].index + 1 - if not dupes.empty: - raise ValueError(f"Repeated subpop-date in rows {dupes.tolist()} of seeding::lambda_file.") - elif method == "FolderDraw": - seeding = pd.read_csv( - setup.get_input_filename( - ftype=setup.seeding_config["seeding_file_type"].get(), - sim_id=sim_id, - extension_override="csv", - ), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - elif method == "FromFile": - seeding = pd.read_csv( - self.seeding_config["seeding_file"].get(), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - elif method == "NoSeeding": - seeding = pd.DataFrame(columns=["date", "subpop"]) - return _DataFrame2NumbaDict(df=seeding, amounts=[], setup=setup) - else: - raise NotImplementedError(f"unknown seeding method [got: {method}]") - - # Sorting by date is very important here for the seeding format necessary !!!! - # print(seeding.shape) - seeding = seeding.sort_values(by="date", axis="index").reset_index() - # print(seeding) - mask = (seeding["date"].dt.date > setup.ti) & (seeding["date"].dt.date <= setup.tf) - seeding = seeding.loc[mask].reset_index() - # print(seeding.shape) - # print(seeding) - - # TODO: print. - - amounts = np.zeros(len(seeding)) - if method == "PoissonDistributed": - amounts = np.random.poisson(seeding["amount"]) - elif method == "NegativeBinomialDistributed": - raise ValueError("Seeding method 'NegativeBinomialDistributed' is not supported by flepiMoP anymore.") - amounts = np.random.negative_binomial(n=5, p=5 / (seeding["amount"] + 5)) - elif method == "FolderDraw" or method == "FromFile": - amounts = seeding["amount"] - - return _DataFrame2NumbaDict(df=seeding, amounts=amounts, setup=setup) - - def load_seeding(self, sim_id: int, setup) -> nb.typed.Dict: - """only difference with draw seeding is that the sim_id is now sim_id2load""" - return self.draw_seeding(sim_id=sim_id, setup=setup) - - def load_ic(self, sim_id: int, setup) -> nb.typed.Dict: - return self.draw_ic(sim_id=sim_id, setup=setup) - - # Write seeding used to file - def seeding_write(self, seeding, fname, extension): - raise NotImplementedError(f"It is not yet possible to write the seeding to a file") - - -class SimulationComponent: - def __init__(self, config: confuse.ConfigView): - raise NotImplementedError("This method should be overridden in subclasses.") - - def get_from_file(self, sim_id: int, setup) -> np.ndarray: - raise NotImplementedError("This method should be overridden in subclasses.") - - def get_from_config(self, sim_id: int, setup) -> np.ndarray: - raise NotImplementedError("This method should be overridden in subclasses.") - - def write_to_file(self, sim_id: int, setup): - raise NotImplementedError("This method should be overridden in subclasses.") - - -class Seeding(SimulationComponent): - def __init__(self, config: confuse.ConfigView): - self.seeding_config = config - - def get_from_config(self, sim_id: int, setup) -> nb.typed.Dict: - method = "NoSeeding" - if self.seeding_config is not None and "method" in self.seeding_config.keys(): - method = self.seeding_config["method"].as_str() - - if method == "NegativeBinomialDistributed" or method == "PoissonDistributed": - seeding = pd.read_csv( - self.seeding_config["lambda_file"].as_str(), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - dupes = seeding[seeding.duplicated(["subpop", "date"])].index + 1 - if not dupes.empty: - raise ValueError(f"Repeated subpop-date in rows {dupes.tolist()} of seeding::lambda_file.") - elif method == "FolderDraw": - seeding = pd.read_csv( - setup.get_input_filename( - ftype=setup.seeding_config["seeding_file_type"].get(), - sim_id=sim_id, - extension_override="csv", - ), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - elif method == "FromFile": - seeding = pd.read_csv( - self.seeding_config["seeding_file"].get(), - converters={"subpop": lambda x: str(x)}, - parse_dates=["date"], - skipinitialspace=True, - ) - elif method == "NoSeeding": - seeding = pd.DataFrame(columns=["date", "subpop"]) - return _DataFrame2NumbaDict(df=seeding, amounts=[], setup=setup) - else: - raise NotImplementedError(f"unknown seeding method [got: {method}]") - - # Sorting by date is very important here for the seeding format necessary !!!! - # print(seeding.shape) - seeding = seeding.sort_values(by="date", axis="index").reset_index() - # print(seeding) - mask = (seeding["date"].dt.date > setup.ti) & (seeding["date"].dt.date <= setup.tf) - seeding = seeding.loc[mask].reset_index() - # print(seeding.shape) - # print(seeding) - - # TODO: print. - - amounts = np.zeros(len(seeding)) - if method == "PoissonDistributed": - amounts = np.random.poisson(seeding["amount"]) - elif method == "NegativeBinomialDistributed": - raise ValueError("Seeding method 'NegativeBinomialDistributed' is not supported by flepiMoP anymore.") - amounts = np.random.negative_binomial(n=5, p=5 / (seeding["amount"] + 5)) - elif method == "FolderDraw" or method == "FromFile": - amounts = seeding["amount"] - - return _DataFrame2NumbaDict(df=seeding, amounts=amounts, setup=setup) - - def get_from_file(self, sim_id: int, setup) -> nb.typed.Dict: - """only difference with draw seeding is that the sim_id is now sim_id2load""" - return self.get_from_config(sim_id=sim_id, setup=setup) - -class InitialConditions(SimulationComponent): - def __init__(self, config: confuse.ConfigView): - self.initial_conditions_config = config - - def get_from_config(self, sim_id: int, setup) -> np.ndarray: - method = "Default" - if self.initial_conditions_config is not None and "method" in self.initial_conditions_config.keys(): - method = self.initial_conditions_config["method"].as_str() - - if method == "Default": - ## JK : This could be specified in the config - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - y0[0, :] = setup.subpop_pop - return y0 # we finish here: no rest and not proportionallity applies - - allow_missing_subpops = False - allow_missing_compartments = False - if "allow_missing_subpops" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["allow_missing_subpops"].get(): - allow_missing_subpops = True - if "allow_missing_compartments" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["allow_missing_compartments"].get(): - allow_missing_compartments = True - - # Places to allocate the rest of the population - rests = [] - - if method == "SetInitialConditions" or method == "SetInitialConditionsFolderDraw": - # TODO Think about - Does not support the new way of doing compartment indexing - if method == "SetInitialConditionsFolderDraw": - ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"], sim_id=sim_id) - else: - ic_df = read_df( - self.initial_conditions_config["initial_conditions_file"].get(), - ) - - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): # - if pl in list(ic_df["subpop"]): - states_pl = ic_df[ic_df["subpop"] == pl] - for comp_idx, comp_name in setup.compartments.compartments["name"].items(): - if "mc_name" in states_pl.columns: - ic_df_compartment_val = states_pl[states_pl["mc_name"] == comp_name]["amount"] - else: - filters = setup.compartments.compartments.iloc[comp_idx].drop("name") - ic_df_compartment_val = states_pl.copy() - for mc_name, mc_value in filters.items(): - ic_df_compartment_val = ic_df_compartment_val[ic_df_compartment_val["mc_" + mc_name] == mc_value][ - "amount" - ] - if len(ic_df_compartment_val) > 1: - raise ValueError( - f"ERROR: Several ({len(ic_df_compartment_val)}) rows are matches for compartment {comp_name} in init file: filters returned {ic_df_compartment_val}" - ) - elif ic_df_compartment_val.empty: - if allow_missing_compartments: - ic_df_compartment_val = 0.0 - else: - raise ValueError( - f"Initial Conditions: Could not set compartment {comp_name} (id: {comp_idx}) in subpop {pl} (id: {pl_idx}). The data from the init file is {states_pl}. \n \ - Use 'allow_missing_compartments' to default to 0 for compartments without initial conditions" - ) - if "rest" in str(ic_df_compartment_val).strip().lower(): - rests.append([comp_idx, pl_idx]) - else: - if isinstance(ic_df_compartment_val, pd.Series): # it can also be float if we allow allow_missing_compartments - ic_df_compartment_val = float(ic_df_compartment_val.iloc[0]) - y0[comp_idx, pl_idx] = float(ic_df_compartment_val) - elif allow_missing_subpops: - logger.critical( - f"No initial conditions for for subpop {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartment ({setup.compartments.compartments['name'].iloc[0]})" - ) - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - y0[0, pl_idx] = 1.0 - else: - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - raise ValueError( - f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_subpops=TRUE to bypass this error" - ) - elif method == "InitialConditionsFolderDraw" or method == "FromFile": - if method == "InitialConditionsFolderDraw": - ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"].get(), sim_id=sim_id) - elif method == "FromFile": - ic_df = read_df( - self.initial_conditions_config["initial_conditions_file"].get(), - ) - - # annoying conversion because sometime the parquet columns get attributed a timezone... - ic_df["date"] = pd.to_datetime(ic_df["date"], utc=True) # force date to be UTC - ic_df["date"] = ic_df["date"].dt.date - ic_df["date"] = ic_df["date"].astype(str) - - ic_df = ic_df[(ic_df["date"] == str(setup.ti)) & (ic_df["mc_value_type"] == "prevalence")] - if ic_df.empty: - raise ValueError( - f"There is no entry for initial time ti in the provided initial_conditions::states_file." - ) - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) - - for comp_idx, comp_name in setup.compartments.compartments["name"].items(): - # rely on all the mc's instead of mc_name to avoid errors due to e.g order. - # before: only - # ic_df_compartment = ic_df[ic_df["mc_name"] == comp_name] - filters = setup.compartments.compartments.iloc[comp_idx].drop("name") - ic_df_compartment = ic_df.copy() - for mc_name, mc_value in filters.items(): - ic_df_compartment = ic_df_compartment[ic_df_compartment["mc_" + mc_name] == mc_value] - - if len(ic_df_compartment) > 1: - # ic_df_compartment = ic_df_compartment.iloc[0] - raise ValueError( - f"ERROR: Several ({len(ic_df_compartment)}) rows are matches for compartment {mc_name} in init file: filter {filters} returned {ic_df_compartment}" - ) - elif ic_df_compartment.empty: - if allow_missing_compartments: - ic_df_compartment = pd.DataFrame(0, columns=ic_df_compartment.columns, index=[0]) - else: - raise ValueError( - f"Initial Conditions: Could not set compartment {comp_name} (id: {comp_idx}) in subpop {pl} (id: {pl_idx}). The data from the init file is {ic_df_compartment[pl]}." - ) - elif ic_df_compartment["mc_name"].iloc[0] != comp_name: - print( - f"WARNING: init file mc_name {ic_df_compartment['mc_name'].iloc[0]} does not match compartment mc_name {comp_name}" - ) - - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): - if pl in ic_df.columns: - y0[comp_idx, pl_idx] = float(ic_df_compartment[pl].iloc[0]) - elif allow_missing_subpops: - logger.critical( - f"No initial conditions for for subpop {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartments ({setup.compartments.compartments['name'].iloc[0]})" - ) - if "proportion" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportion"].get(): - y0[0, pl_idx] = 1.0 - y0[0, pl_idx] = setup.subpop_pop[pl_idx] - else: - raise ValueError( - f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_subpops=TRUE to bypass this error" - ) - else: - raise NotImplementedError(f"unknown initial conditions method [got: {method}]") - - # rest - if rests: # not empty - for comp_idx, pl_idx in rests: - total = setup.subpop_pop[pl_idx] - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - total = 1.0 - y0[comp_idx, pl_idx] = total - y0[:, pl_idx].sum() - - if "proportional" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["proportional"].get(): - y0 = y0 * setup.subpop_pop - - # check that the inputed values sums to the subpop population: - error = False - for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): - n_y0 = y0[:, pl_idx].sum() - n_pop = setup.subpop_pop[pl_idx] - if abs(n_y0 - n_pop) > 1: - error = True - print( - f"ERROR: subpop_names {pl} (idx: pl_idx) has a population from initial condition of {n_y0} while population from geodata is {n_pop} (absolute difference should be < 1, here is {abs(n_y0-n_pop)})" - ) - ignore_population_checks = False - if "ignore_population_checks" in self.initial_conditions_config.keys(): - if self.initial_conditions_config["ignore_population_checks"].get(): - ignore_population_checks = True - if error and not ignore_population_checks: - raise ValueError( - f""" geodata and initial condition do not agree on population size (see messages above). Use ignore_population_checks: True to ignore""" - ) - elif error and ignore_population_checks: - print( - """ Ignoring the previous population mismatch errors because you added flag 'ignore_population_checks'. This is dangerous""" - ) - return y0 - - def get_from_file(self, sim_id: int, setup) -> np.ndarray: - return self.get_from_config(sim_id=sim_id, setup=setup) - -# TODO: rename config to initial_conditions_config as it shadows the global config - -def InitialConditionsFactory(config: confuse.ConfigView): - if config is not None and "method" in config.keys(): - if config["method"].as_str() == "plugin": - klass = utils.search_and_import_plugins_class( - plugin_file_path=config["plugin_file_path"].as_str(), - class_name="InitialConditions", - config=config - ) - return klass - return InitialConditions(config) - -def SeedingFactory(config: confuse.ConfigView): - if config is not None and "method" in config.keys(): - if config["method"].as_str() == "plugin": - klass = utils.search_and_import_plugins_class( - plugin_file_path=config["plugin_file_path"].as_str(), - class_name="Seeding", - config=config - ) - return klass - return Seeding(config) \ No newline at end of file diff --git a/flepimop/gempyor_pkg/src/gempyor/simulation_component.py b/flepimop/gempyor_pkg/src/gempyor/simulation_component.py index 2c2d181b9..8fb26453e 100644 --- a/flepimop/gempyor_pkg/src/gempyor/simulation_component.py +++ b/flepimop/gempyor_pkg/src/gempyor/simulation_component.py @@ -1,3 +1,7 @@ +import confuse +import numpy as np + + class SimulationComponent: def __init__(self, config: confuse.ConfigView): raise NotImplementedError("This method should be overridden in subclasses.") diff --git a/flepimop/gempyor_pkg/tests/seir/test_ic.py b/flepimop/gempyor_pkg/tests/seir/test_ic.py index 16f4abb30..3cfcc369a 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_ic.py +++ b/flepimop/gempyor_pkg/tests/seir/test_ic.py @@ -41,10 +41,8 @@ def test_IC_allow_missing_node_compartments_success(self): s.initial_conditions_config["allow_missing_nodes"] = True s.initial_conditions_config["allow_missing_compartments"] = True sic = initial_conditions.InitialConditionsFactory(config=s.initial_conditions_config) - assert sic.initial_conditions_config == s.initial_conditions_config + sic.get_from_config(sim_id=100, setup=s) - initial_conditions = sic.get_from_config(sim_id=100, setup=s) - print(initial_conditions) def test_IC_IC_notImplemented_fail(self): with pytest.raises(NotImplementedError, match=r".*unknown.*initial.*conditions.*"): diff --git a/flepimop/gempyor_pkg/tests/seir/test_seeding.py b/flepimop/gempyor_pkg/tests/seir/test_seeding.py index b7a157012..f3a2e4d4a 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_seeding.py +++ b/flepimop/gempyor_pkg/tests/seir/test_seeding.py @@ -39,5 +39,5 @@ def test_Seeding_draw_success(self): sic = seeding.SeedingFactory(config=s.seeding_config) s.seeding_config["method"] = "NoSeeding" - seeding = sic.get_from_config(sim_id=100, setup=s) - print(seeding) + seeding_result = sic.get_from_config(sim_id=100, setup=s) + print(seeding_result) diff --git a/flepimop/gempyor_pkg/tests/seir/test_seeding_ic.py b/flepimop/gempyor_pkg/tests/seir/test_seeding_ic.py deleted file mode 100644 index eaf28a144..000000000 --- a/flepimop/gempyor_pkg/tests/seir/test_seeding_ic.py +++ /dev/null @@ -1,106 +0,0 @@ -import numpy as np -import os -import pytest -import warnings -import shutil - -import pathlib -import pyarrow as pa -import pyarrow.parquet as pq - -from gempyor import seir, NPI, file_paths, seeding_ic, model_info - -from gempyor.utils import config - -DATA_DIR = os.path.dirname(__file__) + "/data" -os.chdir(os.path.dirname(__file__)) - - -class TestSeedingAndIC: - def test_SeedingAndIC_success(self): - config.clear() - config.read(user=False) - config.set_file(f"{DATA_DIR}/config.yml") - - s = model_info.ModelInfo( - config=config, - setup_name="test_seeding and ic", - nslots=1, - seir_modifiers_scenario=None, - outcome_modifiers_scenario=None, - write_csv=False, - ) - sic = seeding_ic.SeedingAndIC( - seeding_config=s.seeding_config, initial_conditions_config=s.initial_conditions_config - ) - assert sic.seeding_config == s.seeding_config - assert sic.initial_conditions_config == s.initial_conditions_config - - def test_SeedingAndIC_allow_missing_node_compartments_success(self): - config.clear() - config.read(user=False) - config.set_file(f"{DATA_DIR}/config.yml") - - s = model_info.ModelInfo( - config=config, - setup_name="test_seeding and ic", - nslots=1, - seir_modifiers_scenario=None, - outcome_modifiers_scenario=None, - write_csv=False, - ) - - s.initial_conditions_config["allow_missing_nodes"] = True - s.initial_conditions_config["allow_missing_compartments"] = True - sic = seeding_ic.SeedingAndIC( - seeding_config=s.seeding_config, initial_conditions_config=s.initial_conditions_config - ) - - initial_conditions = sic.draw_ic(sim_id=100, setup=s) - - # print(initial_conditions) - # integration_method = "legacy" - - def test_SeedingAndIC_IC_notImplemented_fail(self): - with pytest.raises(NotImplementedError, match=r".*unknown.*initial.*conditions.*"): - config.clear() - config.read(user=False) - config.set_file(f"{DATA_DIR}/config.yml") - - s = model_info.ModelInfo( - config=config, - setup_name="test_seeding and ic", - nslots=1, - seir_modifiers_scenario=None, - outcome_modifiers_scenario=None, - write_csv=False, - ) - s.initial_conditions_config["method"] = "unknown" - sic = seeding_ic.SeedingAndIC( - seeding_config=s.seeding_config, initial_conditions_config=s.initial_conditions_config - ) - - sic.draw_ic(sim_id=100, setup=s) - - def test_SeedingAndIC_draw_seeding_success(self): - config.clear() - config.read(user=False) - config.set_file(f"{DATA_DIR}/config.yml") - - s = model_info.ModelInfo( - config=config, - setup_name="test_seeding and ic", - nslots=1, - seir_modifiers_scenario=None, - outcome_modifiers_scenario=None, - write_csv=False, - ) - sic = seeding_ic.SeedingAndIC( - seeding_config=s.seeding_config, initial_conditions_config=s.initial_conditions_config - ) - s.seeding_config["method"] = "NoSeeding" - - seeding = sic.draw_seeding(sim_id=100, setup=s) - print(seeding) - - # print(initial_conditions)