From 4b1a854054288ea5694d3b5102955b8c567717a6 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Wed, 31 Jul 2024 10:46:10 -0400 Subject: [PATCH] Preliminary documentation for `gempyor.parameters` First draft of documentation for the `gempyor.parameters` module, most importantly containing the `Parameters` class. Also added `__all__` dunder to regulate exports and converted TODO comments to GitHub issues, see GH-274, GH-275. --- .../gempyor_pkg/src/gempyor/parameters.py | 168 ++++++++++++++---- 1 file changed, 133 insertions(+), 35 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/parameters.py b/flepimop/gempyor_pkg/src/gempyor/parameters.py index b01fb52ab..da41fc394 100644 --- a/flepimop/gempyor_pkg/src/gempyor/parameters.py +++ b/flepimop/gempyor_pkg/src/gempyor/parameters.py @@ -1,30 +1,45 @@ -import numpy as np -import pandas as pd -import pyarrow as pa -import pyarrow.parquet as pq +""" +Abstractions for interacting with the parameters configurations. + +This module contains abstractions for interacting with the parameters section of given +config files. Namely it contains the `Parameters` class. +""" + +__all__ = ["Parameters"] + + import copy -import confuse -from numpy import ndarray -import logging -from . import model_info, NPI, utils import datetime -import xarray as xr +import logging import os -logger = logging.getLogger(__name__) +import confuse +import numpy as np +from numpy import ndarray +import pandas as pd -# TODO: it should work like -# import xarray as xr -# xr.DataArray(p_draw, dims=["parameter", "date", "subpop"], -# coords={"parameter":modinf.parameters.pnames, -# "date":pd.date_range(modinf.ti, modinf.tf, freq="D"), -# "subpop":modinf.subpop_struct.subpop_names}).sel(parameter="gamma_0").plot() +from . import NPI, utils -## TODO: ideally here path_prefix should not be used and all files loaded from modinf +logger = logging.getLogger(__name__) class Parameters: + """ + Encapsulates logic for loading, parsing, and summarizing parameter configurations. + + Attributes: + npar: The number of parameters contained within the given configuration. + pconfig: A view subsetting to the parameters section of a given config file. + pdata: A dictionary containing a processed and reformatted view of the `pconfig` + attribute. + pnames: The names of the parameters given. + pnames2index: A mapping parameter names to their location in the `pnames` + attribute. + stacked_modifier_method: A mapping of modifier method to the parameters to which + that modifier method is relevant for. + """ + # Minimal object to be easily picklable for // runs def __init__( self, @@ -35,6 +50,26 @@ def __init__( subpop_names: list, path_prefix: str = ".", ): + """ + Initialize a `Parameters` instance from a parameter config view. + + Args: + parameter_config: A view subsetting to the parameters section of a given + config file. + ti: An initial date. + tf: A final date. + subpop_names: A list of subpopulation names. + path_prefix: A file path prefix to use when reading in parameter values from + a dataframe like file. + + Raises: + ValueError: The parameter names for the SEIR model are not unique. + ValueError: The dataframe file found for a given parameter contains an + insufficient number of columns for the subpopulations being considered. + ValueError: The dataframe file found for a given parameter does not have + enough date entries to cover the time span being considered by the given + `ti` and `tf`. + """ self.pconfig = parameter_config self.pnames = [] self.npar = len(self.pnames) @@ -118,22 +153,59 @@ def __init__( logging.debug(f"NPI overlap operation is {self.stacked_modifier_method} ") def picklable_lamda_alpha(self): - """These two functions were lambda in __init__ before, it was more elegant. but as the object needs to be pickable, - we cannot use second order function, hence these ugly definitions""" + """ + Read the `alpha_val` attribute. + + This defunct method returns the `alpha_val` attribute of this class which is + never set by this class. If this method is called and the `alpha_val` attribute + is not set an AttributeError will be raised. + + Returns: + The `alpha_val` attribute. + """ return self.alpha_val def picklable_lamda_sigma(self): + """ + Read the `sigma_val` attribute. + + This defunct method returns the `sigma_val` attribute of this class which is + never set by this class. If this method is called and the `sigma_val` attribute + is not set an AttributeError will be raised. + + Returns: + The `sigma_val` attribute. + """ return self.sigma_val def get_pnames2pindex(self) -> dict: + """ + Read the `pnames2pindex` attribute. + + This redundant method returns the `pnames2pindex` attribute of this class. + + Returns: + A mapping parameter names to their location in the `pnames` attribute. + """ return self.pnames2pindex def parameters_quick_draw(self, n_days: int, nsubpops: int) -> ndarray: """ - Returns all parameter in an array. These are drawn based on the seir::parameters section of the config, passed in as p_config. - :param n_days: number of time interval - :param nsubpops: number of spatial nodes - :return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time (same value) + Format all parameters as a numpy array including sampling. + + The entries in the output array are filled based on the input given in the + parameters section of a yaml config file. If the given parameter is pulled from + a distribution rather than fixed the values will be pulled from that + distribution. If an appropriate value cannot be found for an entry then a + `np.nan` is returned. + + Args: + n_days: The number of days to generate an array for. + nsubpops: The number of subpopulations to generate an array for. + + Returns: + A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar` + corresponds to the `npar` attribute of this class. """ param_arr = np.empty((self.npar, n_days, nsubpops), dtype="float64") param_arr[:] = np.nan # fill with NaNs so we don't fail silently @@ -148,12 +220,22 @@ def parameters_quick_draw(self, n_days: int, nsubpops: int) -> ndarray: def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) -> ndarray: """ - drop-in equivalent to param_quick_draw() that take a file as written parameter_write() - :param fname: - :param n_days: - :param nsubpops: - :param extension: - :return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time. + Format all parameters as a numpy array including sampling and overrides. + + This method serves largely the same purpose as the `parameters_quick_draw`, but + has the ability to override the parameter specifications contained by this class + with a given dataframe. + + Args: + param_df: A dataframe containing the columns 'parameter' and 'value'. If + more than one entry for a given parameter is given then only the first + value will be taken. + n_days: The number of days to generate an array for. + nsubpops: The number of subpopulations to generate an array for. + + Returns: + A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar` + corresponds to the `npar` attribute of this class. """ param_arr = np.empty((self.npar, n_days, nsubpops), dtype="float64") param_arr[:] = np.nan # fill with NaNs so we don't fail silently @@ -173,9 +255,19 @@ def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) -> def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame: """ - return parameters generated by parameters_quick_draw() as dataframe, just the first value as they are all similar. - :param p_draw: - :return: The dataframe (to be written to disk, or not) + Serialize a parameter draw as a pandas `DataFrame`. + + This method only considers distribution parameters and will pull the first + sample from the `p_draw` given. + + Args: + p_draw: A numpy array of shape (`npar`, `n_days`, `nsubpops`) like that + returned by `parameters_quick_draw`. + + Returns: + A pandas `DataFrame` with the columns 'parameter' and 'value' corresponding + to the parameter name and value as well as an index containing the parameter + name. """ # we don't write to disk time series parameters. out_df = pd.DataFrame( @@ -190,9 +282,15 @@ def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame: def parameters_reduce(self, p_draw: ndarray, npi: object) -> ndarray: """ Params reduced according to the NPI provided. - :param p_draw: array of shape (nparam, n_days, nsubpops) from p_draw - :param npi: NPI object with the reduction - :return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time, reduced + + Args: + p_draw: A numpy array of shape (`npar`, `n_days`, `nsubpops`) like that + returned by `parameters_quick_draw`. + npi: An NPI object describing the parameter reduction to perform. + + Returns: + An array the same shape as `p_draw` with the prescribed reductions + performed. """ p_reduced = copy.deepcopy(p_draw) if npi is not None: