From 8e5baa38da87baeda38aec13a9e50068c8ef773b Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Mon, 9 Dec 2024 15:19:04 -0500 Subject: [PATCH 1/3] Correct formula for poisson log-likelihood Per this comment https://github.com/HopkinsIDD/flepiMoP/pull/375#discussion_r1874023191 corrected the formula for the poisson log-likelihood, including swapping the `ymodel` and `ydata` so it matches prior poisson likelihood. Added a unit test case to demonstrate this change is to restore current behavior. --- .../gempyor_pkg/src/gempyor/statistics.py | 4 +- .../tests/statistics/test_statistic_class.py | 48 +++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py index 7b62c608e..3d85c2693 100644 --- a/flepimop/gempyor_pkg/src/gempyor/statistics.py +++ b/flepimop/gempyor_pkg/src/gempyor/statistics.py @@ -244,8 +244,8 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray) -> xr.DataArray: """ dist_map = { - "pois": lambda ymodel, ydata: -(ymodel + 1) - + ydata * np.log(ymodel + 1) + "pois": lambda ydata, ymodel: -ymodel + + (ydata * np.log(ymodel)) - gammaln(ydata + 1), # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # OLD: # TODO: Swap out in favor of NEW diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py index 781820165..cc4b79f8c 100644 --- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py +++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py @@ -228,11 +228,59 @@ def simple_valid_resample_and_scale_factory() -> MockStatisticInput: ) +def simple_valid_factory_with_pois() -> MockStatisticInput: + data_coords = { + "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 10)), + "subpop": ["01", "02", "03"], + } + data_dim = [len(v) for v in data_coords.values()] + model_data = xr.Dataset( + data_vars={ + "incidH": ( + list(data_coords.keys()), + np.random.poisson(lam=20.0, size=data_dim), + ), + "incidD": ( + list(data_coords.keys()), + np.random.poisson(lam=20.0, size=data_dim), + ), + }, + coords=data_coords, + ) + gt_data = xr.Dataset( + data_vars={ + "incidH": ( + list(data_coords.keys()), + np.random.poisson(lam=20.0, size=data_dim), + ), + "incidD": ( + list(data_coords.keys()), + np.random.poisson(lam=20.0, size=data_dim), + ), + }, + coords=data_coords, + ) + return MockStatisticInput( + "total_hospitalizations", + { + "name": "sum_hospitalizations", + "sim_var": "incidH", + "data_var": "incidH", + "remove_na": True, + "add_one": True, + "likelihood": {"dist": "pois"}, + }, + model_data=model_data, + gt_data=gt_data, + ) + + all_valid_factories = [ (simple_valid_factory), (simple_valid_resample_factory), (simple_valid_resample_factory), (simple_valid_resample_and_scale_factory), + (simple_valid_factory_with_pois), ] From 10e5d9893e340c90bb2dca52d060a7176a685c60 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:29:57 -0500 Subject: [PATCH 2/3] Add test case with poisson and zeros Added a test case with poisson log-likelihood and zero valued data with the `zero_to_one` flag set to `True`. --- .../tests/statistics/test_statistic_class.py | 53 ++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py index cc4b79f8c..c0397ecb2 100644 --- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py +++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py @@ -275,12 +275,48 @@ def simple_valid_factory_with_pois() -> MockStatisticInput: ) +def simple_valid_factory_with_pois_with_some_zeros() -> MockStatisticInput: + mock_input = simple_valid_factory_with_pois() + + mock_input.config["zero_to_one"] = True + + mock_input.model_data["incidH"].loc[ + { + "date": mock_input.model_data.coords["date"][0], + "subpop": mock_input.model_data.coords["subpop"][0], + } + ] = 0 + + mock_input.gt_data["incidD"].loc[ + { + "date": mock_input.gt_data.coords["date"][0], + "subpop": mock_input.gt_data.coords["subpop"][0], + } + ] = 0 + + mock_input.model_data["incidH"].loc[ + { + "date": mock_input.model_data.coords["date"][1], + "subpop": mock_input.model_data.coords["subpop"][1], + } + ] = 0 + mock_input.gt_data["incidH"].loc[ + { + "date": mock_input.gt_data.coords["date"][1], + "subpop": mock_input.gt_data.coords["subpop"][1], + } + ] = 0 + + return mock_input + + all_valid_factories = [ (simple_valid_factory), (simple_valid_resample_factory), (simple_valid_resample_factory), (simple_valid_resample_and_scale_factory), (simple_valid_factory_with_pois), + (simple_valid_factory_with_pois_with_some_zeros), ] @@ -549,8 +585,21 @@ def test_llik(self, factory: Callable[[], MockStatisticInput]) -> None: assert np.allclose( log_likelihood.values, scipy.stats.poisson.logpmf( - mock_inputs.gt_data[mock_inputs.config["data_var"]].values, - mock_inputs.model_data[mock_inputs.config["data_var"]].values, + np.where( + mock_inputs.config.get("zero_to_one", False) + & (mock_inputs.gt_data[mock_inputs.config["data_var"]].values == 0), + 1, + mock_inputs.gt_data[mock_inputs.config["data_var"]].values, + ), + np.where( + mock_inputs.config.get("zero_to_one", False) + & ( + mock_inputs.model_data[mock_inputs.config["data_var"]].values + == 0 + ), + 1, + mock_inputs.model_data[mock_inputs.config["data_var"]].values, + ), ), ) elif dist_name in {"norm", "norm_cov"}: From 14d9bf3a4d343b7c9fb1f80b51b9a0b6ec78c433 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:49:50 -0500 Subject: [PATCH 3/3] Simplify poisson LL tests with only one variable --- .../tests/statistics/test_statistic_class.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py index c0397ecb2..008b5073d 100644 --- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py +++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py @@ -240,10 +240,6 @@ def simple_valid_factory_with_pois() -> MockStatisticInput: list(data_coords.keys()), np.random.poisson(lam=20.0, size=data_dim), ), - "incidD": ( - list(data_coords.keys()), - np.random.poisson(lam=20.0, size=data_dim), - ), }, coords=data_coords, ) @@ -253,10 +249,6 @@ def simple_valid_factory_with_pois() -> MockStatisticInput: list(data_coords.keys()), np.random.poisson(lam=20.0, size=data_dim), ), - "incidD": ( - list(data_coords.keys()), - np.random.poisson(lam=20.0, size=data_dim), - ), }, coords=data_coords, ) @@ -287,10 +279,10 @@ def simple_valid_factory_with_pois_with_some_zeros() -> MockStatisticInput: } ] = 0 - mock_input.gt_data["incidD"].loc[ + mock_input.gt_data["incidH"].loc[ { - "date": mock_input.gt_data.coords["date"][0], - "subpop": mock_input.gt_data.coords["subpop"][0], + "date": mock_input.gt_data.coords["date"][2], + "subpop": mock_input.gt_data.coords["subpop"][2], } ] = 0