From dde5513c4d46d28c7d1027792398b77815b014eb Mon Sep 17 00:00:00 2001 From: Joseph Lemaitre Date: Thu, 25 Apr 2024 18:33:27 +0200 Subject: [PATCH] black --- .../model_input/my_initial_conditions.py | 3 +- flepimop/gempyor_pkg/src/gempyor/calibrate.py | 1 + flepimop/gempyor_pkg/src/gempyor/inference.py | 19 +++-- .../src/gempyor/inference_parameter.py | 10 ++- flepimop/gempyor_pkg/src/gempyor/logloss.py | 8 +- .../gempyor_pkg/src/gempyor/parameters.py | 10 ++- .../gempyor_pkg/src/gempyor/statistics.py | 10 +-- flepimop/gempyor_pkg/src/gempyor/steps_rk4.py | 2 +- .../src/gempyor/subpopulation_structure.py | 15 ++-- flepimop/gempyor_pkg/src/gempyor/utils.py | 23 +++--- .../tests/seir/test_subpopulationstructure.py | 81 ++++++++----------- 11 files changed, 89 insertions(+), 93 deletions(-) diff --git a/examples/simple_usa_statelevel/model_input/my_initial_conditions.py b/examples/simple_usa_statelevel/model_input/my_initial_conditions.py index 048912c16..cdd77e9ba 100644 --- a/examples/simple_usa_statelevel/model_input/my_initial_conditions.py +++ b/examples/simple_usa_statelevel/model_input/my_initial_conditions.py @@ -4,11 +4,10 @@ class InitialConditions(gempyor.initial_conditions.InitialConditions): def get_from_config(self, sim_id: int, modinf) -> np.ndarray: - y0 = np.zeros((modinf.compartments.compartments.shape[0], modinf.nsubpops)) S_idx = modinf.compartments.get_comp_idx({"infection_stage": "S"}) I_idx = modinf.compartments.get_comp_idx({"infection_stage": "I"}) - prop_inf = .005 #np.random.uniform(low=0, high=0.01, size=modinf.nsubpops) + prop_inf = 0.005 # np.random.uniform(low=0, high=0.01, size=modinf.nsubpops) # TODO cause this is another example y0[S_idx, :] = modinf.subpop_pop * (1 - prop_inf) y0[I_idx, :] = modinf.subpop_pop * prop_inf diff --git a/flepimop/gempyor_pkg/src/gempyor/calibrate.py b/flepimop/gempyor_pkg/src/gempyor/calibrate.py index 68acc180f..9a5c07b1c 100644 --- a/flepimop/gempyor_pkg/src/gempyor/calibrate.py +++ b/flepimop/gempyor_pkg/src/gempyor/calibrate.py @@ -15,6 +15,7 @@ # disable operations using the MKL linear algebra. os.environ["OMP_NUM_THREADS"] = "1" + @click.command() @click.option( "-c", diff --git a/flepimop/gempyor_pkg/src/gempyor/inference.py b/flepimop/gempyor_pkg/src/gempyor/inference.py index 929f7be75..6e5f4273a 100644 --- a/flepimop/gempyor_pkg/src/gempyor/inference.py +++ b/flepimop/gempyor_pkg/src/gempyor/inference.py @@ -17,25 +17,28 @@ # TODO: should be able to draw e.g from an initial condition folder buuut keep the draw as a blob # so it is saved by emcee, so I can build a posterio -def emcee_logprob(proposal, modinf, inferpar, loss, static_sim_arguments, save=False, silent = True): + +def emcee_logprob(proposal, modinf, inferpar, loss, static_sim_arguments, save=False, silent=True): if not inferpar.check_in_bound(proposal=proposal): if not silent: print("OUT OF BOUND!!") return -np.inf - - snpi_df_mod, hnpi_df_mod = inferpar.inject_proposal(proposal=proposal, snpi_df = static_sim_arguments["snpi_df_ref"], hnpi_df = static_sim_arguments["hnpi_df_ref"]) - + + snpi_df_mod, hnpi_df_mod = inferpar.inject_proposal( + proposal=proposal, snpi_df=static_sim_arguments["snpi_df_ref"], hnpi_df=static_sim_arguments["hnpi_df_ref"] + ) + ss = copy.deepcopy(static_sim_arguments) ss["snpi_df_in"] = snpi_df_mod ss["hnpi_df_in"] = hnpi_df_mod del ss["snpi_df_ref"] del ss["hnpi_df_ref"] - outcomes_df = simulation_atomic(**ss, modinf=modinf, save=save) ll_total, logloss, regularizations = loss.compute_logloss(model_df=outcomes_df, modinf=modinf) - if not silent: print(f"llik is {ll_total}") + if not silent: + print(f"llik is {ll_total}") return ll_total @@ -55,7 +58,7 @@ def simulation_atomic( seeding_data, seeding_amounts, outcomes_parameters, - save=False + save=False, ): # We need to reseed because subprocess inherit of the same random generator state. np.random.seed(int.from_bytes(os.urandom(4), byteorder="little")) @@ -176,7 +179,7 @@ def get_static_arguments(modinf): coords = dict( date=pd.date_range(modinf.ti, modinf.tf, freq="D"), **compartment_coords, - subpop=modinf.subpop_struct.subpop_names + subpop=modinf.subpop_struct.subpop_names, ) zeros = np.zeros((len(coords["date"]), len(coords["mc_name"][1]), len(coords["subpop"]))) diff --git a/flepimop/gempyor_pkg/src/gempyor/inference_parameter.py b/flepimop/gempyor_pkg/src/gempyor/inference_parameter.py index aeb56b461..936e28aec 100644 --- a/flepimop/gempyor_pkg/src/gempyor/inference_parameter.py +++ b/flepimop/gempyor_pkg/src/gempyor/inference_parameter.py @@ -105,10 +105,11 @@ def print_summary(self): def __str__(self) -> str: from collections import Counter + this_str = f"InferenceParameters: with {self.get_dim()} parameters: \n" for key, value in Counter(self.ptypes).items(): this_str += f" {key}: {value} parameters\n" - + return this_str def get_dim(self): @@ -160,7 +161,12 @@ def hit_ubs(self, proposal) -> np.ndarray: """ return np.array((proposal > self.ubs)) - def inject_proposal(self, proposal, snpi_df=None, hnpi_df=None,): + def inject_proposal( + self, + proposal, + snpi_df=None, + hnpi_df=None, + ): """ Injects the proposal into model inputs, at the right place. diff --git a/flepimop/gempyor_pkg/src/gempyor/logloss.py b/flepimop/gempyor_pkg/src/gempyor/logloss.py index dbb638179..09d8aeadd 100644 --- a/flepimop/gempyor_pkg/src/gempyor/logloss.py +++ b/flepimop/gempyor_pkg/src/gempyor/logloss.py @@ -24,8 +24,12 @@ def __init__(self, inference_config: confuse.ConfigView, modinf, data_dir: str = # self.gt_xr = xr.Dataset.from_dataframe(self.gt.reset_index().set_index(["date","subpop"])) # then some NA were created if some dates where present in some gt but no other. # FIXME THIS IS FUNDAMENTALLY WRONG, especially as groundtruth resample by statistic !!!! - - self.gt = pd.read_csv(os.path.join(data_dir, inference_config["gt_data_path"].get()), converters={"subpop": lambda x: str(x)}, skipinitialspace=True) # TODO: use read_df + + self.gt = pd.read_csv( + os.path.join(data_dir, inference_config["gt_data_path"].get()), + converters={"subpop": lambda x: str(x)}, + skipinitialspace=True, + ) # TODO: use read_df self.gt["date"] = pd.to_datetime(self.gt["date"]) self.gt = self.gt.set_index("date") diff --git a/flepimop/gempyor_pkg/src/gempyor/parameters.py b/flepimop/gempyor_pkg/src/gempyor/parameters.py index 3d38b2c1f..f1c2ae44e 100644 --- a/flepimop/gempyor_pkg/src/gempyor/parameters.py +++ b/flepimop/gempyor_pkg/src/gempyor/parameters.py @@ -106,10 +106,10 @@ def __init__( else: self.pdata[pn]["stacked_modifier_method"] = "product" logging.debug(f"No 'stacked_modifier_method' for parameter {pn}, assuming multiplicative NPIs") - + if self.pconfig[pn]["rolling_mean_windows"].exists(): self.pdata[pn]["rolling_mean_windows"] = self.pconfig[pn]["rolling_mean_windows"].get() - + self.stacked_modifier_method[self.pdata[pn]["stacked_modifier_method"]].append(pn.lower()) logging.debug(f"We have {self.npar} parameter: {self.pnames}") @@ -196,7 +196,7 @@ def parameters_reduce(self, p_draw: ndarray, npi: object) -> ndarray: """ p_reduced = copy.deepcopy(p_draw) if npi is not None: - for idx, pn in enumerate(self.pnames): + for idx, pn in enumerate(self.pnames): p_reduced[idx] = NPI.reduce_parameter( parameter=p_draw[idx], modification=npi.getReduction(pn.lower()), @@ -204,6 +204,8 @@ def parameters_reduce(self, p_draw: ndarray, npi: object) -> ndarray: ) # apply rolling mean if specified if "rolling_mean_windows" in self.pdata[pn]: - p_reduced[idx] = utils.rolling_mean_pad(data = p_reduced[idx], window=self.pdata[pn]["rolling_mean_windows"]) + p_reduced[idx] = utils.rolling_mean_pad( + data=p_reduced[idx], window=self.pdata[pn]["rolling_mean_windows"] + ) return p_reduced diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py index fc2582d00..ea2cc72a3 100644 --- a/flepimop/gempyor_pkg/src/gempyor/statistics.py +++ b/flepimop/gempyor_pkg/src/gempyor/statistics.py @@ -111,8 +111,8 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray): x, loc=loc, scale=self.params.get("scale", scale) ), # wrong: "norm_cov": lambda x, loc, scale: scipy.stats.norm.logpdf( - x, loc=loc, scale=scale*loc.where(loc > 5, 5) - ), # TODO: check, that it's really the loc + x, loc=loc, scale=scale * loc.where(loc > 5, 5) + ), # TODO: check, that it's really the loc "nbinom": lambda x, n, p: scipy.stats.nbinom.logpmf(x, n=self.params.get("n"), p=model_data), "rmse": lambda x, y: -np.log(np.nansum(np.sqrt((x - y) ** 2))), "absolute_error": lambda x, y: -np.log(np.nansum(np.abs(x - y))), @@ -122,11 +122,11 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray): if self.dist in ["pois", "nbinom"]: model_data = model_data.astype(int) gt_data = gt_data.astype(int) - + if self.zero_to_one: # so confusing, wish I had not used xarray to do model_data[model_data==0]=1 - model_data=model_data.where(model_data != 0, 1) - gt_data=gt_data.where(gt_data != 0, 1) + model_data = model_data.where(model_data != 0, 1) + gt_data = gt_data.where(gt_data != 0, 1) # Use stored parameters in the distribution function call likelihood = dist_map[self.dist](gt_data, model_data, **self.params) diff --git a/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py b/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py index 084a5de8d..2cb09c99d 100644 --- a/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py +++ b/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py @@ -38,7 +38,7 @@ def rk4_integration( population, # 15 stochastic_p, # 16 method="rk4", - silent = False + silent=False ): states = np.zeros((ndays, ncompartments, nspatial_nodes)) states_daily_incid = np.zeros((ndays, ncompartments, nspatial_nodes)) diff --git a/flepimop/gempyor_pkg/src/gempyor/subpopulation_structure.py b/flepimop/gempyor_pkg/src/gempyor/subpopulation_structure.py index b77accfab..1e2b9b8de 100644 --- a/flepimop/gempyor_pkg/src/gempyor/subpopulation_structure.py +++ b/flepimop/gempyor_pkg/src/gempyor/subpopulation_structure.py @@ -8,13 +8,13 @@ logger = logging.getLogger(__name__) -subpop_pop_key="population" -subpop_names_key="subpop" +subpop_pop_key = "population" +subpop_names_key = "subpop" class SubpopulationStructure: def __init__(self, *, setup_name, subpop_config, path_prefix=pathlib.Path(".")): - """ Important attributes: + """Important attributes: - self.setup_name: Name of the setup - self.data: DataFrame with subpopulations and populations - self.nsubpops: Number of subpopulations @@ -23,7 +23,7 @@ def __init__(self, *, setup_name, subpop_config, path_prefix=pathlib.Path(".")): - self.mobility: Mobility matrix """ - geodata_file=path_prefix / subpop_config["geodata"].get() + geodata_file = path_prefix / subpop_config["geodata"].get() self.setup_name = setup_name self.data = pd.read_csv( @@ -50,7 +50,7 @@ def __init__(self, *, setup_name, subpop_config, path_prefix=pathlib.Path(".")): raise ValueError(f"There are duplicate subpop_names in geodata.") if subpop_config["mobility"].exists(): - mobility_file= path_prefix / subpop_config["mobility"].get() + mobility_file = path_prefix / subpop_config["mobility"].get() mobility_file = pathlib.Path(mobility_file) if mobility_file.suffix == ".txt": print("Mobility files as matrices are not recommended. Please switch soon to long form csv files.") @@ -130,8 +130,3 @@ def __init__(self, *, setup_name, subpop_config, path_prefix=pathlib.Path(".")): self.nsubpops = len(self.data) # TODO: this needs to be tested self.mobility = self.mobility[selected_subpop_indices][:, selected_subpop_indices] - - - - - diff --git a/flepimop/gempyor_pkg/src/gempyor/utils.py b/flepimop/gempyor_pkg/src/gempyor/utils.py index 1076d09bc..686c09d45 100644 --- a/flepimop/gempyor_pkg/src/gempyor/utils.py +++ b/flepimop/gempyor_pkg/src/gempyor/utils.py @@ -273,20 +273,19 @@ def list_filenames(folder: str = ".", filters: list = []) -> list: def rolling_mean_pad(data, window): - """ - Calculates rolling mean with centered window and pads the edges. - - Args: - data: A NumPy array. - window: The window size for the rolling mean. + """ + Calculates rolling mean with centered window and pads the edges. - Returns: - A NumPy array with the padded rolling mean. - """ - padding_size = (window - 1) // 2 - padded_data = np.pad(data, padding_size, mode='edge') - return np.convolve(padded_data, np.ones(window) / window, mode='valid') + Args: + data: A NumPy array. + window: The window size for the rolling mean. + Returns: + A NumPy array with the padded rolling mean. + """ + padding_size = (window - 1) // 2 + padded_data = np.pad(data, padding_size, mode="edge") + return np.convolve(padded_data, np.ones(window) / window, mode="valid") def print_disk_diagnosis(): diff --git a/flepimop/gempyor_pkg/tests/seir/test_subpopulationstructure.py b/flepimop/gempyor_pkg/tests/seir/test_subpopulationstructure.py index c8119ac03..34df630c3 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_subpopulationstructure.py +++ b/flepimop/gempyor_pkg/tests/seir/test_subpopulationstructure.py @@ -26,7 +26,7 @@ def test_subpopulation_structure_mobility(): """ with tempfile.NamedTemporaryFile(delete=False) as temp_file: # Creates a temporary file - temp_file.write(subpop_config_str.encode('utf-8')) # Write the content + temp_file.write(subpop_config_str.encode("utf-8")) # Write the content temp_file.close() # Ensure the file is closed config.set_file(temp_file.name) # Load from the temporary file path @@ -60,13 +60,12 @@ def test_subpopulation_structure_mobility_txt(): """ with tempfile.NamedTemporaryFile(delete=False) as temp_file: # Creates a temporary file - temp_file.write(subpop_config_str.encode('utf-8')) # Write the content + temp_file.write(subpop_config_str.encode("utf-8")) # Write the content temp_file.close() # Ensure the file is closed config.set_file(temp_file.name) # Load from the temporary file path subpop_struct = subpopulation_structure.SubpopulationStructure( - setup_name=TEST_SETUP_NAME, - subpop_config=config["subpop_setup"] + setup_name=TEST_SETUP_NAME, subpop_config=config["subpop_setup"] ) mobility_data = scipy.sparse.csr_matrix(np.loadtxt(mobility_file), dtype=int) # mobility_data = mobility_data.pivot(index="ori", columns="dest", values="amount") @@ -91,7 +90,7 @@ def test_subpopulation_structure_subpop_population_zero_fail(): """ with tempfile.NamedTemporaryFile(delete=False) as temp_file: # Creates a temporary file - temp_file.write(subpop_config_str.encode('utf-8')) # Write the content + temp_file.write(subpop_config_str.encode("utf-8")) # Write the content temp_file.close() # Ensure the file is closed config.set_file(temp_file.name) # Load from the temporary file path @@ -102,7 +101,6 @@ def test_subpopulation_structure_subpop_population_zero_fail(): ) - def test_subpopulation_structure_dulpicate_subpop_names_fail(): config.clear() config.read(user=False) @@ -113,15 +111,14 @@ def test_subpopulation_structure_dulpicate_subpop_names_fail(): """ with tempfile.NamedTemporaryFile(delete=False) as temp_file: # Creates a temporary file - temp_file.write(subpop_config_str.encode('utf-8')) # Write the content + temp_file.write(subpop_config_str.encode("utf-8")) # Write the content temp_file.close() # Ensure the file is closed config.set_file(temp_file.name) # Load from the temporary file path - + with pytest.raises(ValueError, match=r"There are duplicate subpop_names.*"): subpop_struct = subpopulation_structure.SubpopulationStructure( - setup_name=TEST_SETUP_NAME, - subpop_config=config["subpop_setup"] - ) + setup_name=TEST_SETUP_NAME, subpop_config=config["subpop_setup"] + ) def test_subpopulation_structure_mobility_shape_fail(): @@ -134,16 +131,14 @@ def test_subpopulation_structure_mobility_shape_fail(): """ with tempfile.NamedTemporaryFile(delete=False) as temp_file: # Creates a temporary file - temp_file.write(subpop_config_str.encode('utf-8')) # Write the content + temp_file.write(subpop_config_str.encode("utf-8")) # Write the content temp_file.close() # Ensure the file is closed config.set_file(temp_file.name) # Load from the temporary file path - + with pytest.raises(ValueError, match=r"mobility data must have dimensions of length of geodata.*"): subpop_struct = subpopulation_structure.SubpopulationStructure( - setup_name=TEST_SETUP_NAME, - subpop_config=config["subpop_setup"] - ) - + setup_name=TEST_SETUP_NAME, subpop_config=config["subpop_setup"] + ) def test_subpopulation_structure_mobility_fluxes_same_ori_and_dest_fail(): @@ -156,15 +151,15 @@ def test_subpopulation_structure_mobility_fluxes_same_ori_and_dest_fail(): """ with tempfile.NamedTemporaryFile(delete=False) as temp_file: # Creates a temporary file - temp_file.write(subpop_config_str.encode('utf-8')) # Write the content + temp_file.write(subpop_config_str.encode("utf-8")) # Write the content temp_file.close() # Ensure the file is closed config.set_file(temp_file.name) # Load from the temporary file path - + with pytest.raises(ValueError, match=r"Mobility fluxes with same origin and destination.*"): subpop_struct = subpopulation_structure.SubpopulationStructure( - setup_name=TEST_SETUP_NAME, - subpop_config=config["subpop_setup"] - ) + setup_name=TEST_SETUP_NAME, subpop_config=config["subpop_setup"] + ) + def test_subpopulation_structure_mobility_npz_shape_fail(): config.clear() @@ -176,16 +171,14 @@ def test_subpopulation_structure_mobility_npz_shape_fail(): """ with tempfile.NamedTemporaryFile(delete=False) as temp_file: # Creates a temporary file - temp_file.write(subpop_config_str.encode('utf-8')) # Write the content + temp_file.write(subpop_config_str.encode("utf-8")) # Write the content temp_file.close() # Ensure the file is closed config.set_file(temp_file.name) # Load from the temporary file path - + with pytest.raises(ValueError, match=r"mobility data must have dimensions of length of geodata.*"): subpop_struct = subpopulation_structure.SubpopulationStructure( - setup_name=TEST_SETUP_NAME, - subpop_config=config["subpop_setup"] - ) - + setup_name=TEST_SETUP_NAME, subpop_config=config["subpop_setup"] + ) def test_subpopulation_structure_mobility_no_extension_fail(): @@ -198,16 +191,14 @@ def test_subpopulation_structure_mobility_no_extension_fail(): """ with tempfile.NamedTemporaryFile(delete=False) as temp_file: # Creates a temporary file - temp_file.write(subpop_config_str.encode('utf-8')) # Write the content + temp_file.write(subpop_config_str.encode("utf-8")) # Write the content temp_file.close() # Ensure the file is closed config.set_file(temp_file.name) # Load from the temporary file path with pytest.raises(ValueError, match=r"Mobility data must either be a.*"): subpop_struct = subpopulation_structure.SubpopulationStructure( - setup_name=TEST_SETUP_NAME, - subpop_config=config["subpop_setup"] - ) - + setup_name=TEST_SETUP_NAME, subpop_config=config["subpop_setup"] + ) def test_subpopulation_structure_mobility_exceed_source_node_pop_fail(): @@ -220,17 +211,16 @@ def test_subpopulation_structure_mobility_exceed_source_node_pop_fail(): """ with tempfile.NamedTemporaryFile(delete=False) as temp_file: # Creates a temporary file - temp_file.write(subpop_config_str.encode('utf-8')) # Write the content + temp_file.write(subpop_config_str.encode("utf-8")) # Write the content temp_file.close() # Ensure the file is closed config.set_file(temp_file.name) # Load from the temporary file path - + with pytest.raises( ValueError, match=r"The following entries in the mobility data exceed the source subpop populations.*" ): subpop_struct = subpopulation_structure.SubpopulationStructure( - setup_name=TEST_SETUP_NAME, - subpop_config=config["subpop_setup"] - ) + setup_name=TEST_SETUP_NAME, subpop_config=config["subpop_setup"] + ) def test_subpopulation_structure_mobility_rows_exceed_source_node_pop_fail(): @@ -243,7 +233,7 @@ def test_subpopulation_structure_mobility_rows_exceed_source_node_pop_fail(): """ with tempfile.NamedTemporaryFile(delete=False) as temp_file: # Creates a temporary file - temp_file.write(subpop_config_str.encode('utf-8')) # Write the content + temp_file.write(subpop_config_str.encode("utf-8")) # Write the content temp_file.close() # Ensure the file is closed config.set_file(temp_file.name) # Load from the temporary file path @@ -251,10 +241,8 @@ def test_subpopulation_structure_mobility_rows_exceed_source_node_pop_fail(): ValueError, match=r"The following entries in the mobility data exceed the source subpop populations.*" ): subpop_struct = subpopulation_structure.SubpopulationStructure( - setup_name=TEST_SETUP_NAME, - subpop_config=config["subpop_setup"] - ) - + setup_name=TEST_SETUP_NAME, subpop_config=config["subpop_setup"] + ) def test_subpopulation_structure_mobility_no_mobility_matrix_specified(): @@ -265,14 +253,13 @@ def test_subpopulation_structure_mobility_no_mobility_matrix_specified(): config.clear() config.read(user=False) with tempfile.NamedTemporaryFile(delete=False) as temp_file: # Creates a temporary file - temp_file.write(subpop_config_str.encode('utf-8')) # Write the content + temp_file.write(subpop_config_str.encode("utf-8")) # Write the content temp_file.close() # Ensure the file is closed config.set_file(temp_file.name) # Load from the temporary file path subpop_struct = subpopulation_structure.SubpopulationStructure( - setup_name=TEST_SETUP_NAME, - subpop_config=config["subpop_setup"] - ) + setup_name=TEST_SETUP_NAME, subpop_config=config["subpop_setup"] + ) # target = np.array([[0, 0], [0, 0]]) # 2x2, just in this case assert np.array_equal(subpop_struct.mobility.toarray(), np.zeros((2, 2)))