Improving readability/consistency of errors/exceptions in gempyor (se…

…cond batch) (#387) * Improving errors in another set of `gempyor` files Includes model_info.py, outcomes.py, seeding.py, seir.py, and simulation_component.py (this last one didn't require any edits). * Applied `black` linting to this branch * Updating a RegEx in `test_model_info.py` * `black` linting and RegEx alteration * Update flepimop/gempyor_pkg/src/gempyor/seir.py Co-authored-by: Timothy Willard <[email protected]> * Update flepimop/gempyor_pkg/src/gempyor/seir.py Co-authored-by: Timothy Willard <[email protected]> * Implementing Tim's suggestions! * Regex adjustment * Regex update * Implementing Tim's suggestions * Regex update in test_statistic_class.py * Regex update in `test_statistic_class.py` * Regex adjusment in `test_random_distribution_sampler.py` * Fixing typo in `utils.py` * Regex update in `test_random_distribution_sampler.py` * Regex update in `test_random_distribution_sampler.py` * Update utils.py the function `test_raises_not_implemented_error()` in test file `test_read_df.py` on account that it was receiving a `ValueError` when it expected a `NotImplementedError`. I switched the error within `utils.py:read_df()` to be an expected `NotImplementedError`. * Regex update in `test_read_df.py` * Regex update in `test_read_df.py` * Regex update in `test_read_df.py` * Updating a unit test in `test_utils.py` * Regex update in `test_utils.py` and error type agreement with `utils.py` * Regex update in `test_write_df.py` * Regex update in `test-write_df.py` * Removing deprecated style of `\` to escape colons * linting with `black` * Updating `steps_source.py` and `utils.py` There were not that many changes in the final batch of error improvements, so I went ahead and lumped them in with batch two. * Implementing Carl's suggestions Awaiting regex error kickback to adjust the test files. * Regex update in `test_model_info.py` * Regex update in `test_model_info.py` * Regex update in `test_model_info.py` * Typo in `model_info.py` * Using `re.escape()` to escape non-alphanumeric characters in the regex * Regex update in `test_seir.py` Updating a regex to match on a specific value rather than arbitrary wording of error message. * Syntax update in `test_seir.py` * Update test_seir.py * Regex update in `test_statistic_class.py` * Regex update in `test_statistic_class.py` * Regex update in `test_statistic_class.py` * Syntax change in `test_statistic_class.py` * Regex update in `test_statistic_class.py` * Testing a regex in `test_statistic_class.py` * Using re.escape() to escape non-alphanumeric characters in the regex * Regex update in `test_random_distribution_sampler.py` * Regex update in `test_random_distribution_sampler.py` * Update test_random_distribution_sampler.py * Update test_random_distribution_sampler.py * Using `re.escape()` in regexs * Regex update in `test_read_df.py` * Update utils.py * Regex update in `test_utils.py` * Regex update in `test_utils.py` * Update test_utils.py * Regex update in `test_write_df.py` * Update utils.py * Update test_utils.py --------- Co-authored-by: Timothy Willard <[email protected]>
HopkinsIDD · Dec 4, 2024 · 0aca24f · 0aca24f
1 parent 0ebb08d
commit 0aca24f
Show file tree

Hide file tree

Showing 16 changed files with 95 additions and 87 deletions.
diff --git a/flepimop/gempyor_pkg/src/gempyor/inference.py b/flepimop/gempyor_pkg/src/gempyor/inference.py
@@ -430,7 +430,7 @@ def get_all_sim_arguments(self):
     def get_logloss(self, proposal):
         if not self.inferpar.check_in_bound(proposal=proposal):
             if not self.silent:
-                print("`llik` is -inf (out of bound proposal)")
+                print("`llik` is -inf (out of bound proposal).")
             return -np.inf, -np.inf, -np.inf
 
         snpi_df_mod, hnpi_df_mod = self.inferpar.inject_proposal(

diff --git a/flepimop/gempyor_pkg/src/gempyor/model_info.py b/flepimop/gempyor_pkg/src/gempyor/model_info.py
@@ -20,7 +20,7 @@ def __init__(self, config: confuse.ConfigView):
         self.tf = config["end_date"].as_date()
         if self.tf <= self.ti:
             raise ValueError(
-                "tf (time to finish) is less than or equal to ti (time to start)"
+                f"Final time ('{self.tf}') is less than or equal to initial time ('{self.ti}')."
             )
         self.n_days = (self.tf - self.ti).days + 1
         self.dates = pd.date_range(start=self.ti, end=self.tf, freq="D")
@@ -77,9 +77,7 @@ def __init__(
         # Auto-detect old config
         if config["interventions"].exists():
             raise ValueError(
-                """This config has an intervention section, and has been written for a previous version of flepiMoP/COVIDScenarioPipeline \
-                             Please use flepiMoP Version 1.1 (Commit SHA: 0c30c23937dd496d33c2b9fa7c6edb198ad80dac) to run this config. \
-                             (use git checkout v1.1 inside the flepiMoP directory)"""
+                "This config has an intervention section, which is only compatible with a previous version (v1.1) of flepiMoP. "
             )
 
         # 1. Create a setup name that contains every scenario.
@@ -105,7 +103,7 @@ def __init__(
         subpop_config = config["subpop_setup"]
         if "data_path" in config:
             raise ValueError(
-                "The config has a data_path section. This is no longer supported."
+                "The config has a `data_path` section. This is no longer supported."
             )
         self.path_prefix = pathlib.Path(path_prefix)
 
@@ -170,19 +168,19 @@ def __init__(
                     self.seir_modifiers_library = config["seir_modifiers"][
                         "modifiers"
                     ].get()
-                    raise ValueError(
-                        "Not implemented yet"
+                    raise NotImplementedError(
+                        "This feature has not been implemented yet."
                     )  # TODO create a Stacked from all
             elif self.seir_modifiers_scenario is not None:
                 raise ValueError(
-                    "An seir modifiers scenario was provided to ModelInfo but no 'seir_modifiers' sections in config"
+                    "A `seir_modifiers_scenario` argument was provided to `ModelInfo` but there is no `seir_modifiers` section in the config."
                 )
             else:
-                logging.info("Running ModelInfo with seir but without SEIR Modifiers")
+                logging.info("Running `ModelInfo` with seir but without SEIR Modifiers")
 
         elif self.seir_modifiers_scenario is not None:
             raise ValueError(
-                "A seir modifiers scenario was provided to ModelInfo but no 'seir:' sections in config"
+                "A `seir_modifiers_scenario` argument was provided to `ModelInfo` but there is no `seir` section in the config."
             )
         else:
             logging.critical("Running ModelInfo without SEIR")
@@ -203,28 +201,28 @@ def __init__(
                     self.outcome_modifiers_library = config["outcome_modifiers"][
                         "modifiers"
                     ].get()
-                    raise ValueError(
-                        "Not implemented yet"
+                    raise NotImplementedError(
+                        "This feature has not been implemented yet."
                     )  # TODO create a Stacked from all
 
             ## NEED TO IMPLEMENT THIS -- CURRENTLY CANNOT USE outcome modifiers
             elif self.outcome_modifiers_scenario is not None:
                 if config["outcome_modifiers"].exists():
                     raise ValueError(
-                        "An outcome modifiers scenario was provided to ModelInfo but no 'outcome_modifiers' sections in config"
+                        "A `outcome_modifiers_scenario` argument was provided to `ModelInfo` but there is no `outcome_modifiers` section in the config."
                     )
                 else:
                     self.outcome_modifiers_scenario = None
             else:
                 logging.info(
-                    "Running ModelInfo with outcomes but without Outcomes Modifiers"
+                    "Running `ModelInfo` with outcomes but without Outcomes Modifiers"
                 )
         elif self.outcome_modifiers_scenario is not None:
             raise ValueError(
-                "An outcome modifiers scenario was provided to ModelInfo but no 'outcomes:' sections in config"
+                "A `outcome_modifiers_scenario` argument was provided to `ModelInfo` but there is no `outcomes` section in the config."
             )
         else:
-            logging.info("Running ModelInfo without Outcomes")
+            logging.info("Running `ModelInfo` without outcomes.")
 
         # 6. Inputs and outputs
         if in_run_id is None:

diff --git a/flepimop/gempyor_pkg/src/gempyor/outcomes.py b/flepimop/gempyor_pkg/src/gempyor/outcomes.py
@@ -140,7 +140,7 @@ def read_parameters_from_config(modinf: model_info.ModelInfo):
                 branching_data = pa.parquet.read_table(branching_file).to_pandas()
                 if "relative_probability" not in list(branching_data["quantity"]):
                     raise ValueError(
-                        f"No 'relative_probability' quantity in {branching_file}, therefor making it useless"
+                        f"There is no `relative_probability` quantity in '{branching_file}'."
                     )
 
                 print(
@@ -162,7 +162,7 @@ def read_parameters_from_config(modinf: model_info.ModelInfo):
                     modinf.subpop_struct.subpop_names
                 ):
                     raise ValueError(
-                        f"Places in seir input files does not correspond to subpops in outcome probability file {branching_file}"
+                        f"SEIR input files do not have subpops that match those in outcome probability file '{branching_file}'."
                     )
 
         parameters = {}
@@ -178,7 +178,8 @@ def read_parameters_from_config(modinf: model_info.ModelInfo):
 
                 else:
                     raise ValueError(
-                        f"unsure how to read outcome {new_comp}: not a str, nor an incidence or prevalence: {src_name}"
+                        f"Expected a `str` or `dict` containing `incidence` or `prevalence`. "
+                        f"Instead given '{src_name}' for outcome '{new_comp}'."
                     )
 
                 parameters[new_comp]["probability"] = outcomes_config[new_comp][
@@ -289,7 +290,7 @@ def read_parameters_from_config(modinf: model_info.ModelInfo):
                 parameters[new_comp] = {}
                 parameters[new_comp]["sum"] = outcomes_config[new_comp]["sum"].get()
             else:
-                raise ValueError(f"No 'source' or 'sum' specified for comp {new_comp}")
+                raise ValueError(f"No `source` or `sum` specified for comp '{new_comp}'.")
 
     return parameters
 
@@ -392,15 +393,15 @@ def compute_all_multioutcomes(
                     )
                 else:
                     raise ValueError(
-                        f"Unknown type for seir simulation provided, got f{type(seir_sim)}"
+                        f"Unknown type provided for seir simulation, received '{type(seir_sim)}'."
                     )
                 # we don't keep source in this cases
             else:  # already defined outcomes
                 if source_name in all_data:
                     source_array = all_data[source_name]
                 else:
                     raise ValueError(
-                        f"ERROR with outcome {new_comp}: the specified source {source_name} is not a dictionnary (for seir outcome) nor an existing pre-identified outcomes."
+                        f"Issue with outcome '{new_comp}'; the specified source '{source_name}' is neither a dictionnary (for seir outcome) nor an existing pre-identified outcome."
                     )
 
             if (loaded_values is not None) and (
@@ -586,7 +587,7 @@ def filter_seir_df(diffI, dates, subpops, filters, outcome_name) -> np.ndarray:
         vtype = "prevalence"
     else:
         raise ValueError(
-            f"Cannot distinguish the source of outcome {outcome_name}: it is not another previously defined outcome and there is no 'incidence:' or 'prevalence:'."
+            f"Cannot discern the source of outcome '{outcome_name}'; it is not a previously defined outcome and there is no `incidence` or `prevalence`."
         )
 
     diffI = diffI[diffI["mc_value_type"] == vtype]
@@ -615,7 +616,7 @@ def filter_seir_xr(diffI, dates, subpops, filters, outcome_name) -> np.ndarray:
         vtype = "prevalence"
     else:
         raise ValueError(
-            f"Cannot distinguish the source of outcome {outcome_name}: it is not another previously defined outcome and there is no 'incidence:' or 'prevalence:'."
+            f"Cannot discern the source of outcome '{outcome_name}'; it is not a previously defined outcome and there is no `incidence` or `prevalence`."
         )
     # Filter the data
     filters = filters[vtype]
@@ -665,7 +666,7 @@ def multishiftee(arr, shifts, stoch_delay_flag=True):
     result = np.zeros_like(arr)
 
     if stoch_delay_flag:
-        raise ValueError("NOT SUPPORTED YET")
+        raise NotImplementedError("`stoch_delay_flag` not supported yet.")
         # for i, row in reversed(enumerate(np.rows(arr))):
         #    for j,elem in reversed(enumerate(row)):
         ## This function takes in :
@@ -691,7 +692,7 @@ def multishift(arr, shifts, stoch_delay_flag=True):
     result = np.zeros_like(arr)
 
     if stoch_delay_flag:
-        raise ValueError("NOT SUPPORTED YET")
+        raise NotImplementedError("`stoch_delay_flag` not supported yet.")
         # for i, row in reversed(enumerate(np.rows(arr))):
         #    for j,elem in reversed(enumerate(row)):
         ## This function takes in :

diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding.py b/flepimop/gempyor_pkg/src/gempyor/seeding.py
@@ -17,9 +17,7 @@
 
 def _DataFrame2NumbaDict(df, amounts, modinf) -> nb.typed.Dict:
     if not df["date"].is_monotonic_increasing:
-        raise ValueError(
-            "_DataFrame2NumbaDict got an unsorted dataframe, exposing itself to non-sense"
-        )
+        raise ValueError("The `df` given is not sorted by the 'date' column.")
 
     cmp_grp_names = [
         col for col in modinf.compartments.compartments.columns if col != "name"
@@ -111,7 +109,7 @@ def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict:
             dupes = seeding[seeding.duplicated(["subpop", "date"])].index + 1
             if not dupes.empty:
                 raise ValueError(
-                    f"Repeated subpop-date in rows {dupes.tolist()} of seeding::lambda_file."
+                    f"There are repeating subpop-date in rows '{dupes.tolist()}' of `seeding::lambda_file`."
                 )
         elif method == "FolderDraw":
             seeding = pd.read_csv(
@@ -136,7 +134,7 @@ def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict:
             seeding = pd.DataFrame(columns=["date", "subpop"])
             return _DataFrame2NumbaDict(df=seeding, amounts=[], modinf=modinf)
         else:
-            raise NotImplementedError(f"unknown seeding method [got: {method}]")
+            raise ValueError(f"Unknown seeding method given, '{method}'.")
 
         # Sorting by date is very important here for the seeding format necessary !!!!
         # print(seeding.shape)
@@ -160,8 +158,6 @@ def get_from_config(self, sim_id: int, modinf) -> nb.typed.Dict:
             )
         elif method == "FolderDraw" or method == "FromFile":
             amounts = seeding["amount"]
-        else:
-            raise ValueError(f"Unknown seeding method: {method}")
 
         return _DataFrame2NumbaDict(df=seeding, amounts=amounts, modinf=modinf)
 

diff --git a/flepimop/gempyor_pkg/src/gempyor/seir.py b/flepimop/gempyor_pkg/src/gempyor/seir.py
@@ -34,7 +34,9 @@ def build_step_source_arg(
             if integration_method == "rk4":
                 integration_method = "rk4.jit"
             if integration_method not in ["rk4.jit", "legacy"]:
-                raise ValueError(f"Unknown integration method {integration_method}.")
+                raise ValueError(
+                    f"Unknown integration method given, '{integration_method}'."
+                )
         if "dt" in modinf.seir_config["integration"].keys():
             dt = float(
                 eval(str(modinf.seir_config["integration"]["dt"].get()))
@@ -151,8 +153,7 @@ def steps_SEIR(
     elif integration_method == "rk4.jit":
         if modinf.stoch_traj_flag == True:
             raise ValueError(
-                f"with method {integration_method}, only deterministic "
-                f"integration is possible (got stoch_straj_flag={modinf.stoch_traj_flag}"
+                f"'{integration_method}' integration method only supports deterministic integration, but `stoch_straj_flag` is '{modinf.stoch_traj_flag}'."
             )
         seir_sim = steps_rk4.rk4_integration(**fnct_args, silent=True)
     else:
@@ -167,8 +168,7 @@ def steps_SEIR(
         ]:
             if modinf.stoch_traj_flag == True:
                 raise ValueError(
-                    f"with method {integration_method}, only deterministic "
-                    f"integration is possible (got stoch_straj_flag={modinf.stoch_traj_flag}"
+                    f"'{integration_method}' integration method only supports deterministic integration, but `stoch_straj_flag` is '{modinf.stoch_traj_flag}'."
                 )
             seir_sim = steps_experimental.ode_integration(
                 **fnct_args, integration_method=integration_method
@@ -190,7 +190,7 @@ def steps_SEIR(
         elif integration_method == "rk4_aot":
             seir_sim = steps_experimental.rk4_aot(**fnct_args)
         else:
-            raise ValueError(f"Unknow integration scheme, got {integration_method}")
+            raise ValueError(f"Unknown integration method given, '{integration_method}'.")
 
     # We return an xarray instead of a ndarray now
     compartment_coords = {}

diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py
@@ -74,7 +74,10 @@ def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
                 reg_name = reg_config["name"].get()
                 reg_func = getattr(self, f"_{reg_name}_regularize", None)
                 if reg_func is None:
-                    raise ValueError(f"Unsupported regularization: {reg_name}")
+                    raise ValueError(
+                        f"Unsupported regularization [received: '{reg_name}']. "
+                        f"Currently only `forecast` and `allsubpop` are supported."
+                    )
                 self.regularizations.append((reg_func, reg_config.get()))
 
         self.resample = False
@@ -253,7 +256,10 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray) -> xr.DataArray:
             "absolute_error": lambda x, y: -np.log(np.nansum(np.abs(x - y))),
         }
         if self.dist not in dist_map:
-            raise ValueError(f"Invalid distribution specified: {self.dist}")
+            raise ValueError(
+                f"Invalid distribution specified: '{self.dist}'. "
+                f"Valid distributions: '{dist_map.keys()}'."
+            )
         if self.dist in ["pois", "nbinom"]:
             model_data = model_data.astype(int)
             gt_data = gt_data.astype(int)
@@ -295,11 +301,9 @@ def compute_logloss(
 
         if not model_data.shape == gt_data.shape:
             raise ValueError(
-                (
-                    f"{self.name} Statistic error: data and groundtruth do not have "
-                    f"the same shape: model_data.shape={model_data.shape} != "
-                    f"gt_data.shape={gt_data.shape}"
-                )
+                f"`model_data` and `gt_data` do not have "
+                f"the same shape: `model_data.shape` = '{model_data.shape}' != "
+                f"`gt_data.shape` = '{gt_data.shape}'."
             )
 
         regularization = 0.0

diff --git a/flepimop/gempyor_pkg/src/gempyor/steps_source.py b/flepimop/gempyor_pkg/src/gempyor/steps_source.py
@@ -278,8 +278,11 @@ def steps_SEIR_nb(
                 print("  ", states_current[comp].max())
 
         if (states_current.min() < 0) or (states_current.max() > 10**10):
-            print((states_current.min() < 0), (states_current.max() > 10**10))
-            raise ValueError(f"Overflow error. Too small ?. Too large ?")
+            raise ValueError(
+                f"State values are outside the valid range. "
+                f"Minimum value: '{states_current.min()}', Maximum value: '{states_current.max()}'. "
+                f"Valid range is between 0 and {10**10}."
+            )
 
     return states, states_daily_incid