From 55143ada467d2a6196d0d1cba1615685f7d60d4e Mon Sep 17 00:00:00 2001 From: Joseph Lemaitre Date: Tue, 12 Sep 2023 13:41:25 +0200 Subject: [PATCH] Supopulation structure in it's own file --- .../docs/integration_benchmark.ipynb | 2 +- .../gempyor_pkg/src/gempyor/dev/dev_seir.py | 2 +- flepimop/gempyor_pkg/src/gempyor/interface.py | 8 +- flepimop/gempyor_pkg/src/gempyor/setup.py | 95 +------------------ .../src/gempyor/simulate_outcome.py | 2 +- .../gempyor_pkg/src/gempyor/simulate_seir.py | 2 +- .../tests/seir/test_compartments.py | 4 +- .../gempyor_pkg/tests/seir/test_new_seir.py | 4 +- .../gempyor_pkg/tests/seir/test_parameters.py | 8 +- flepimop/gempyor_pkg/tests/seir/test_seir.py | 24 ++--- flepimop/gempyor_pkg/tests/seir/test_setup.py | 12 +-- 11 files changed, 35 insertions(+), 128 deletions(-) diff --git a/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb b/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb index 03db95331..e392401cd 100644 --- a/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb +++ b/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb @@ -200,7 +200,7 @@ "\n", "s = setup.Setup(\n", " setup_name=config[\"name\"].get() + \"_\" + str(npi_scenario),\n", - " spatial_setup=setup.SubpopulationStructure(\n", + " spatial_setup=subpopulation_structure.SubpopulationStructure(\n", " setup_name=config[\"setup_name\"].get(),\n", " geodata_file=spatial_base_path / spatial_config[\"geodata\"].get(),\n", " mobility_file=spatial_base_path / spatial_config[\"mobility\"].get(),\n", diff --git a/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py b/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py index c5339ec34..3781497bb 100644 --- a/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py +++ b/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py @@ -20,7 +20,7 @@ config.read(user=False) config.set_file(f"{DATA_DIR}/config.yml") -ss = setup.SubpopulationStructure( +ss = subpopulation_structure.SubpopulationStructure( setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", diff --git a/flepimop/gempyor_pkg/src/gempyor/interface.py b/flepimop/gempyor_pkg/src/gempyor/interface.py index 698b261c8..181044b69 100644 --- a/flepimop/gempyor_pkg/src/gempyor/interface.py +++ b/flepimop/gempyor_pkg/src/gempyor/interface.py @@ -10,7 +10,7 @@ import pathlib -from . import seir, setup, file_paths +from . import seir, setup, file_paths, subpopulation_structure from . import outcomes from .utils import config, Timer, read_df, profile import numpy as np @@ -80,7 +80,7 @@ def __init__( write_parquet = True self.s = setup.Setup( setup_name=config["name"].get() + "_" + str(npi_scenario), - spatial_setup=setup.SubpopulationStructure( + spatial_setup=subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get() @@ -118,7 +118,7 @@ def __init__( f""" gempyor >> prefix: {in_prefix};""" # ti: {s.ti}; tf: {s.tf}; ) - self.already_built = False # whether we have already build the costly objects that need just one build. + self.already_built = False # whether we have already build the costly objects that need just one build def update_prefix(self, new_prefix, new_out_prefix=None): self.s.in_prefix = new_prefix @@ -374,7 +374,7 @@ def get_seir_parameter_reduced( parameters = self.s.parameters.parameters_reduce(p_draw, npi_seir) full_df = pd.DataFrame() - for i, subpop in enumerate(self.s.subpop_struct.subpop_names): + for i, subpop in enumerate(self.s.spatset.subpop_names): a = pd.DataFrame( parameters[:, :, i].T, columns=self.s.parameters.pnames, diff --git a/flepimop/gempyor_pkg/src/gempyor/setup.py b/flepimop/gempyor_pkg/src/gempyor/setup.py index 9fc7a9576..7d5ea48c2 100644 --- a/flepimop/gempyor_pkg/src/gempyor/setup.py +++ b/flepimop/gempyor_pkg/src/gempyor/setup.py @@ -11,6 +11,7 @@ from . import compartments from . import parameters from . import seeding_ic +from .subpopulation_structure import SubpopulationStructure from .utils import config, read_df, write_df from . import file_paths import logging @@ -238,97 +239,3 @@ def write_simID( df=df, ) return fname - - -class SubpopulationStructure: - def __init__(self, *, setup_name, geodata_file, mobility_file, popnodes_key, subpop_names_key): - self.setup_name = setup_name - self.data = pd.read_csv( - geodata_file, converters={subpop_names_key: lambda x: str(x).strip()}, skipinitialspace=True - ) # subpops and populations, strip whitespaces - self.nnodes = len(self.data) # K = # of locations - - # popnodes_key is the name of the column in geodata_file with populations - if popnodes_key not in self.data: - raise ValueError( - f"popnodes_key: {popnodes_key} does not correspond to a column in geodata: {self.data.columns}" - ) - self.popnodes = self.data[popnodes_key].to_numpy() # population - if len(np.argwhere(self.popnodes == 0)): - raise ValueError( - f"There are {len(np.argwhere(self.popnodes == 0))} nodes with population zero, this is not supported." - ) - - # subpop_names_key is the name of the column in geodata_file with subpops - if subpop_names_key not in self.data: - raise ValueError(f"subpop_names_key: {subpop_names_key} does not correspond to a column in geodata.") - self.subpop_names = self.data[subpop_names_key].tolist() - if len(self.subpop_names) != len(set(self.subpop_names)): - raise ValueError(f"There are duplicate subpop_names in geodata.") - - if mobility_file is not None: - mobility_file = pathlib.Path(mobility_file) - if mobility_file.suffix == ".txt": - print("Mobility files as matrices are not recommended. Please switch soon to long form csv files.") - self.mobility = scipy.sparse.csr_matrix( - np.loadtxt(mobility_file), dtype=int - ) # K x K matrix of people moving - # Validate mobility data - if self.mobility.shape != (self.nnodes, self.nnodes): - raise ValueError( - f"mobility data must have dimensions of length of geodata ({self.nnodes}, {self.nnodes}). Actual: {self.mobility.shape}" - ) - - elif mobility_file.suffix == ".csv": - mobility_data = pd.read_csv(mobility_file, converters={"ori": str, "dest": str}, skipinitialspace=True) - nn_dict = {v: k for k, v in enumerate(self.subpop_names)} - mobility_data["ori_idx"] = mobility_data["ori"].apply(nn_dict.__getitem__) - mobility_data["dest_idx"] = mobility_data["dest"].apply(nn_dict.__getitem__) - if any(mobility_data["ori_idx"] == mobility_data["dest_idx"]): - raise ValueError( - f"Mobility fluxes with same origin and destination in long form matrix. This is not supported" - ) - - self.mobility = scipy.sparse.coo_matrix( - (mobility_data.amount, (mobility_data.ori_idx, mobility_data.dest_idx)), - shape=(self.nnodes, self.nnodes), - dtype=int, - ).tocsr() - - elif mobility_file.suffix == ".npz": - self.mobility = scipy.sparse.load_npz(mobility_file).astype(int) - # Validate mobility data - if self.mobility.shape != (self.nnodes, self.nnodes): - raise ValueError( - f"mobility data must have dimensions of length of geodata ({self.nnodes}, {self.nnodes}). Actual: {self.mobility.shape}" - ) - else: - raise ValueError( - f"Mobility data must either be a .csv file in longform (recommended) or a .txt matrix file. Got {mobility_file}" - ) - - # Make sure mobility values <= the population of src node - tmp = (self.mobility.T - self.popnodes).T - tmp[tmp < 0] = 0 - if tmp.any(): - rows, cols, values = scipy.sparse.find(tmp) - errmsg = "" - for r, c, v in zip(rows, cols, values): - errmsg += f"\n({r}, {c}) = {self.mobility[r, c]} > population of '{self.subpop_names[r]}' = {self.popnodes[r]}" - raise ValueError( - f"The following entries in the mobility data exceed the source node populations in geodata:{errmsg}" - ) - - tmp = self.popnodes - np.squeeze(np.asarray(self.mobility.sum(axis=1))) - tmp[tmp > 0] = 0 - if tmp.any(): - (row,) = np.where(tmp) - errmsg = "" - for r in row: - errmsg += f"\n sum accross row {r} exceed population of node '{self.subpop_names[r]}' ({self.popnodes[r]}), by {-tmp[r]}" - raise ValueError( - f"The following rows in the mobility data exceed the source node populations in geodata:{errmsg}" - ) - else: - logging.critical("No mobility matrix specified -- assuming no one moves") - self.mobility = scipy.sparse.csr_matrix(np.zeros((self.nnodes, self.nnodes)), dtype=int) diff --git a/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py b/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py index 1bdb7ee5d..41f8f4c75 100755 --- a/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py +++ b/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py @@ -197,7 +197,7 @@ def simulate( nslots = config["nslots"].as_number() print(f"Simulations to be run: {nslots}") - spatial_setup = setup.SubpopulationStructure( + spatial_setup = subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get() diff --git a/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py b/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py index 364371260..5995bde77 100755 --- a/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py +++ b/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py @@ -249,7 +249,7 @@ def simulate( if not nslots: nslots = config["nslots"].as_number() - spatial_setup = setup.SubpopulationStructure( + spatial_setup = subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get() diff --git a/flepimop/gempyor_pkg/tests/seir/test_compartments.py b/flepimop/gempyor_pkg/tests/seir/test_compartments.py index 3415baa96..4a2f86d61 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_compartments.py +++ b/flepimop/gempyor_pkg/tests/seir/test_compartments.py @@ -10,7 +10,7 @@ import pyarrow.parquet as pq import filecmp -from gempyor import compartments, seir, NPI, file_paths, setup +from gempyor import compartments, seir, NPI, file_paths, setup, subpopulation_structure from gempyor.utils import config @@ -65,7 +65,7 @@ def test_Setup_has_compartments_component(): config.read(user=False) config.set_file(f"{DATA_DIR}/config.yml") - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_values", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", diff --git a/flepimop/gempyor_pkg/tests/seir/test_new_seir.py b/flepimop/gempyor_pkg/tests/seir/test_new_seir.py index 87e676e8f..f6880b71a 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_new_seir.py +++ b/flepimop/gempyor_pkg/tests/seir/test_new_seir.py @@ -8,7 +8,7 @@ import pyarrow.parquet as pq from functools import reduce -from gempyor import setup, seir, NPI, file_paths, compartments +from gempyor import setup, seir, NPI, file_paths, compartments, subpopulation_structure from gempyor.utils import config @@ -19,7 +19,7 @@ def test_constant_population(): config.set_file(f"{DATA_DIR}/config.yml") - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", diff --git a/flepimop/gempyor_pkg/tests/seir/test_parameters.py b/flepimop/gempyor_pkg/tests/seir/test_parameters.py index d8c782232..c10ce34bd 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_parameters.py +++ b/flepimop/gempyor_pkg/tests/seir/test_parameters.py @@ -10,7 +10,7 @@ import pyarrow.parquet as pq import filecmp -from gempyor import setup, seir, NPI, file_paths, parameters +from gempyor import setup, seir, NPI, file_paths, parameters, subpopulation_structure from gempyor.utils import config, write_df, read_df @@ -23,7 +23,7 @@ def test_parameters_from_config_plus_read_write(): config.read(user=False) config.set_file(f"{DATA_DIR}/config_compartmental_model_format.yml") # Would be better to build a setup - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", @@ -91,7 +91,7 @@ def test_parameters_quick_draw_old(): config.read(user=False) config.set_file(f"{DATA_DIR}/config.yml") - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", @@ -163,7 +163,7 @@ def test_parameters_from_timeserie_file(): config.clear() config.read(user=False) config.set_file(f"{DATA_DIR}/config_compartmental_model_format.yml") - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", diff --git a/flepimop/gempyor_pkg/tests/seir/test_seir.py b/flepimop/gempyor_pkg/tests/seir/test_seir.py index 6ae8e2d34..c3bf7c1c8 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_seir.py +++ b/flepimop/gempyor_pkg/tests/seir/test_seir.py @@ -8,7 +8,7 @@ import pyarrow as pa import pyarrow.parquet as pq -from gempyor import setup, seir, NPI, file_paths +from gempyor import setup, seir, NPI, file_paths, subpopulation_structure from gempyor.utils import config @@ -20,7 +20,7 @@ def test_check_values(): os.chdir(os.path.dirname(__file__)) config.set_file(f"{DATA_DIR}/config.yml") - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_values", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", @@ -73,7 +73,7 @@ def test_check_values(): def test_constant_population_legacy_integration(): config.set_file(f"{DATA_DIR}/config.yml") - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", @@ -149,7 +149,7 @@ def test_steps_SEIR_nb_simple_spread_with_txt_matrices(): print("test mobility with txt matrices") config.set_file(f"{DATA_DIR}/config.yml") - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", @@ -234,7 +234,7 @@ def test_steps_SEIR_nb_simple_spread_with_csv_matrices(): config.set_file(f"{DATA_DIR}/config.yml") print("test mobility with csv matrices") - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.csv", @@ -304,7 +304,7 @@ def test_steps_SEIR_no_spread(): print("test mobility with no spread") config.set_file(f"{DATA_DIR}/config.yml") - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", @@ -405,7 +405,7 @@ def test_continuation_resume(): spatial_base_path = pathlib.Path(config["data_path"].get()) s = setup.Setup( setup_name=config["name"].get() + "_" + str(npi_scenario), - spatial_setup=setup.SubpopulationStructure( + spatial_setup=subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), @@ -455,7 +455,7 @@ def test_continuation_resume(): spatial_base_path = pathlib.Path(config["data_path"].get()) s = setup.Setup( setup_name=config["name"].get() + "_" + str(npi_scenario), - spatial_setup=setup.SubpopulationStructure( + spatial_setup=subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), @@ -523,7 +523,7 @@ def test_inference_resume(): spatial_base_path = pathlib.Path(config["data_path"].get()) s = setup.Setup( setup_name=config["name"].get() + "_" + str(npi_scenario), - spatial_setup=setup.SubpopulationStructure( + spatial_setup=subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), @@ -568,7 +568,7 @@ def test_inference_resume(): spatial_base_path = pathlib.Path(config["data_path"].get()) s = setup.Setup( setup_name=config["name"].get() + "_" + str(npi_scenario), - spatial_setup=setup.SubpopulationStructure( + spatial_setup=subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), @@ -616,7 +616,7 @@ def test_parallel_compartments_with_vacc(): os.chdir(os.path.dirname(__file__)) config.set_file(f"{DATA_DIR}/config_parallel.yml") - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", @@ -710,7 +710,7 @@ def test_parallel_compartments_no_vacc(): os.chdir(os.path.dirname(__file__)) config.set_file(f"{DATA_DIR}/config_parallel.yml") - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", diff --git a/flepimop/gempyor_pkg/tests/seir/test_setup.py b/flepimop/gempyor_pkg/tests/seir/test_setup.py index f9bca47e4..9ca8d7404 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_setup.py +++ b/flepimop/gempyor_pkg/tests/seir/test_setup.py @@ -5,7 +5,7 @@ import pytest import confuse -from gempyor import setup +from gempyor import setup, subpopulation_structure from gempyor.utils import config @@ -17,7 +17,7 @@ class TestSubpopulationStructure: def test_SubpopulationStructure_success(self): - ss = setup.SubpopulationStructure( + ss = subpopulation_structure.SubpopulationStructure( setup_name=TEST_SETUP_NAME, geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", @@ -28,7 +28,7 @@ def test_SubpopulationStructure_success(self): def test_bad_popnodes_key_fail(self): # Bad popnodes_key error with pytest.raises(ValueError, match=r".*popnodes_key.*"): - setup.SubpopulationStructure( + subpopulation_structure.SubpopulationStructure( setup_name=TEST_SETUP_NAME, geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility_small.txt", @@ -38,7 +38,7 @@ def test_bad_popnodes_key_fail(self): def test_bad_subpop_names_key_fail(self): with pytest.raises(ValueError, match=r".*subpop_names_key.*"): - setup.SubpopulationStructure( + subpopulation_structure.SubpopulationStructure( setup_name=TEST_SETUP_NAME, geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", @@ -48,7 +48,7 @@ def test_bad_subpop_names_key_fail(self): def test_mobility_dimensions_fail(self): with pytest.raises(ValueError, match=r".*mobility.*dimensions.*"): - setup.SubpopulationStructure( + subpopulation_structure.SubpopulationStructure( setup_name=TEST_SETUP_NAME, geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility_small.txt", @@ -58,7 +58,7 @@ def test_mobility_dimensions_fail(self): def test_mobility_too_big_fail(self): with pytest.raises(ValueError, match=r".*mobility.*population.*"): - setup.SubpopulationStructure( + subpopulation_structure.SubpopulationStructure( setup_name=TEST_SETUP_NAME, geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility_big.txt",