Skip to content

Commit

Permalink
Preliminary documentation for gempyor.parameters
Browse files Browse the repository at this point in the history
First draft of documentation for the `gempyor.parameters` module, most
importantly containing the `Parameters` class. Also added `__all__`
dunder to regulate exports and converted TODO comments to GitHub issues,
see GH-274, GH-275.
  • Loading branch information
TimothyWillard committed Jul 31, 2024
1 parent 040420b commit 95e0437
Showing 1 changed file with 133 additions and 35 deletions.
168 changes: 133 additions & 35 deletions flepimop/gempyor_pkg/src/gempyor/parameters.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,45 @@
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
"""
Abstractions for interacting with the parameters configurations.
This module contains abstractions for interacting with the parameters section of given
config files. Namely it contains the `Parameters` class.
"""

__all__ = ["Parameters"]


import copy
import confuse
from numpy import ndarray
import logging
from . import model_info, NPI, utils
import datetime
import xarray as xr
import logging
import os

logger = logging.getLogger(__name__)
import confuse
import numpy as np
from numpy import ndarray
import pandas as pd

# TODO: it should work like
# import xarray as xr
# xr.DataArray(p_draw, dims=["parameter", "date", "subpop"],
# coords={"parameter":modinf.parameters.pnames,
# "date":pd.date_range(modinf.ti, modinf.tf, freq="D"),
# "subpop":modinf.subpop_struct.subpop_names}).sel(parameter="gamma_0").plot()
from . import NPI, utils


## TODO: ideally here path_prefix should not be used and all files loaded from modinf
logger = logging.getLogger(__name__)


class Parameters:
"""
Encapsulates logic for loading, parsing, and summarizing parameter configurations.
Attributes:
npar: The number of parameters contained within the given configuration.
pconfig: A view subsetting to the parameters section of a given config file.
pdata: A dictionary containing a processed and reformatted view of the `pconfig`
attribute.
pnames: The names of the parameters given.
pnames2index: A mapping parameter names to their location in the `pnames`
attribute.
stacked_modifier_method: A mapping of modifier method to the parameters to which
that modifier method is relevant for.
"""

# Minimal object to be easily picklable for // runs
def __init__(
self,
Expand All @@ -35,6 +50,26 @@ def __init__(
subpop_names: list,
path_prefix: str = ".",
):
"""
Initialize a `Parameters` instance from a parameter config view.
Args:
parameter_config: A view subsetting to the parameters section of a given
config file.
ti: An initial date.
tf: A final date.
subpop_names: A list of subpopulation names.
path_prefix: A file path prefix to use when reading in parameter values from
a dataframe like file.
Raises:
ValueError: The parameter names for the SEIR model are not unique.
ValueError: The dataframe file found for a given parameter contains an
insufficient number of columns for the subpopulations being considered.
ValueError: The dataframe file found for a given parameter does not have
enough date entries to cover the time span being considered by the given
`ti` and `tf`.
"""
self.pconfig = parameter_config
self.pnames = []
self.npar = len(self.pnames)
Expand Down Expand Up @@ -118,22 +153,59 @@ def __init__(
logging.debug(f"NPI overlap operation is {self.stacked_modifier_method} ")

def picklable_lamda_alpha(self):
"""These two functions were lambda in __init__ before, it was more elegant. but as the object needs to be pickable,
we cannot use second order function, hence these ugly definitions"""
"""
Read the `alpha_val` attribute.
This defunct method returns the `alpha_val` attribute of this class which is
never set by this class. If this method is called and the `alpha_val` attribute
is not set an AttributeError will be raised.
Returns:
The `alpha_val` attribute.
"""
return self.alpha_val

def picklable_lamda_sigma(self):
"""
Read the `sigma_val` attribute.
This defunct method returns the `sigma_val` attribute of this class which is
never set by this class. If this method is called and the `sigma_val` attribute
is not set an AttributeError will be raised.
Returns:
The `sigma_val` attribute.
"""
return self.sigma_val

def get_pnames2pindex(self) -> dict:
"""
Read the `pnames2pindex` attribute.
This redundant method returns the `pnames2pindex` attribute of this class.
Returns:
A mapping parameter names to their location in the `pnames` attribute.
"""
return self.pnames2pindex

def parameters_quick_draw(self, n_days: int, nsubpops: int) -> ndarray:
"""
Returns all parameter in an array. These are drawn based on the seir::parameters section of the config, passed in as p_config.
:param n_days: number of time interval
:param nsubpops: number of spatial nodes
:return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time (same value)
Format all parameters as a numpy array including sampling.
The entries in the output array are filled based on the input given in the
parameters section of a yaml config file. If the given parameter is pulled from
a distribution rather than fixed the values will be pulled from that
distribution. If an appropriate value cannot be found for an entry then a
`np.nan` is returned.
Args:
n_days: The number of days to generate an array for.
nsubpops: The number of subpopulations to generate an array for.
Returns:
A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar`
corresponds to the `npar` attribute of this class.
"""
param_arr = np.empty((self.npar, n_days, nsubpops), dtype="float64")
param_arr[:] = np.nan # fill with NaNs so we don't fail silently
Expand All @@ -148,12 +220,22 @@ def parameters_quick_draw(self, n_days: int, nsubpops: int) -> ndarray:

def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) -> ndarray:
"""
drop-in equivalent to param_quick_draw() that take a file as written parameter_write()
:param fname:
:param n_days:
:param nsubpops:
:param extension:
:return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time.
Format all parameters as a numpy array including sampling and overrides.
This method serves largely the same purpose as the `parameters_quick_draw`, but
has the ability to override the parameter specifications contained by this class
with a given dataframe.
Args:
param_df: A dataframe containing the columns 'parameter' and 'value'. If
more than one entry for a given parameter is given then only the first
value will be taken.
n_days: The number of days to generate an array for.
nsubpops: The number of subpopulations to generate an array for.
Returns:
A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar`
corresponds to the `npar` attribute of this class.
"""
param_arr = np.empty((self.npar, n_days, nsubpops), dtype="float64")
param_arr[:] = np.nan # fill with NaNs so we don't fail silently
Expand All @@ -173,9 +255,19 @@ def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) ->

def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame:
"""
return parameters generated by parameters_quick_draw() as dataframe, just the first value as they are all similar.
:param p_draw:
:return: The dataframe (to be written to disk, or not)
Serialize a parameter draw as a pandas `DataFrame`.
This method only considers distribution parameters and will pull the first
sample from the `p_draw` given.
Args:
p_draw: A numpy array of shape (`npar`, `n_days`, `nsubpops`) like that
returned by `parameters_quick_draw`.
Returns:
A pandas `DataFrame` with the columns 'parameter' and 'value' corresponding
to the parameter name and value as well as an index containing the parameter
name.
"""
# we don't write to disk time series parameters.
out_df = pd.DataFrame(
Expand All @@ -190,9 +282,15 @@ def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame:
def parameters_reduce(self, p_draw: ndarray, npi: object) -> ndarray:
"""
Params reduced according to the NPI provided.
:param p_draw: array of shape (nparam, n_days, nsubpops) from p_draw
:param npi: NPI object with the reduction
:return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time, reduced
Args:
p_draw: A numpy array of shape (`npar`, `n_days`, `nsubpops`) like that
returned by `parameters_quick_draw`.
npi: An NPI object describing the parameter reduction to perform.
Returns:
An array the same shape as `p_draw` with the prescribed reductions
performed.
"""
p_reduced = copy.deepcopy(p_draw)
if npi is not None:
Expand Down

0 comments on commit 95e0437

Please sign in to comment.