diff --git a/documentation/gitbook/how-to-run/multi-configs.md b/documentation/gitbook/how-to-run/multi-configs.md index 738de3399..54402c3b5 100644 --- a/documentation/gitbook/how-to-run/multi-configs.md +++ b/documentation/gitbook/how-to-run/multi-configs.md @@ -20,58 +20,29 @@ You should see an assortment of yml files as a result of that `ls` command. ## Usage -If you run +If you run: ```bash flepimop simulate config_sample_2pop.yml ``` -you'll get a basic foward simulation of this example model. However, you might also note there are several `*_part.yml` files, corresponding to partial configs. You can `simulate` using the combination of multiple configs with, for example: +You'll get a basic forward simulation of this example model. However, you might also note there are several `*_part.yml` files, corresponding to partial configs. You can `simulate` using the combination of multiple configs with, for example: ```bash flepimop simulate config_sample_2pop.yml config_sample_2pop_outcomes_part.yml ``` -if want to see what the combined configuration is, you can use the `patch` command: +While simulate can run your patched configuration, we also suggest you check your configuration file using the patch command: ```bash -flepimop patch config_sample_2pop.yml config_sample_2pop_outcomes_part.yml +flepimop patch config_sample_2pop.yml config_sample_2pop_outcomes_part.yml > config_new.yml +cat config_new.yml ``` You may provide an arbitrary number of separate configuration files to combine to create a complete configuration. ## Caveats -At this time, only `simulate` supports multiple configuration files. Also, the patching operation is fairly crude: configuration options override previous ones completely, though with a warning. The files provided from left to right are from lowest priority (i.e. for the first file, only options specified in no other files are used) to highest priority (i.e. for the last file, its options override any other specification). +At this time, only simulate directly supports multiple configuration files, and our current patching capabilities only allow for the addition of new sections as given in our tutorials. This is helpful for building models piece-by-piece from a simple compartmental forward simulation, to including outcome probabilities, and finally, adding modifier sections. If multiple configuration files specify the same higher level configuration chunks (e.g., seir, outcomes), this will yield an error. We are expanding coverage of this capability to other flepimop actions, e.g. inference, and are exploring options for smarter patching. - -However, currently there are pitfalls like - -```yaml -# config1 -seir_modifiers: - scenarios: ["one", "two"] - one: - # ... - two: - # ... -``` - -```yaml -# config2 -seir_modifiers: - scenarios: ["one", "three"] - one: - # ... - three: - # ... -``` - -Then you might expect - -```bash -flepimop simulate config1.yml config2.yml -``` - -...to override seir scenario one and add scenario three, but what actually happens is that the entire seir_modifiers from config1 is overriden by config2. Specifying the configuration files in the reverse order would lead to a different outcome (the config1 seir_modifiers overrides config2 settings). If you're doing complex combinations of configuration files, you should use `flepimop patch ...` to ensure you're getting what you expect. \ No newline at end of file diff --git a/flepimop/gempyor_pkg/src/gempyor/cli.py b/flepimop/gempyor_pkg/src/gempyor/cli.py index 70da194d2..529fd2638 100644 --- a/flepimop/gempyor_pkg/src/gempyor/cli.py +++ b/flepimop/gempyor_pkg/src/gempyor/cli.py @@ -1,4 +1,5 @@ -from click import pass_context, Context +import click +import yaml from .shared_cli import ( config_files_argument, @@ -7,7 +8,7 @@ cli, mock_context, ) -from .utils import config +from .utils import _dump_formatted_yaml, config # register the commands from the other modules from . import compartments, simulate @@ -20,12 +21,94 @@ # add some basic commands to the CLI -@cli.command(params=[config_files_argument] + list(config_file_options.values())) -@pass_context -def patch(ctx: Context = mock_context, **kwargs) -> None: - """Merge configuration files""" +@cli.command( + params=[config_files_argument] + list(config_file_options.values()), + context_settings=dict(help_option_names=["-h", "--help"]), +) +@click.pass_context +def patch(ctx: click.Context = mock_context, **kwargs) -> None: + """Merge configuration files + + This command will merge multiple config files together by overriding the top level + keys in config files. The order of the config files is important, as the last file + has the highest priority and the first has the lowest. + + A brief example of the command is shown below using the sample config files from the + `examples/tutorials` directory. The command will merge the two files together and + print the resulting configuration to the console. + + \b + ```bash + $ flepimop patch config_sample_2pop_modifiers_part.yml config_sample_2pop_outcomes_part.yml > config_sample_2pop_patched.yml + $ cat config_sample_2pop_patched.yml + write_csv: false + stoch_traj_flag: false + jobs: 14 + write_parquet: true + first_sim_index: 1 + config_src: [config_sample_2pop_modifiers_part.yml, config_sample_2pop_outcomes_part.yml] + seir_modifiers: + scenarios: [Ro_lockdown, Ro_all] + modifiers: + Ro_lockdown: + method: SinglePeriodModifier + parameter: Ro + period_start_date: 2020-03-15 + period_end_date: 2020-05-01 + subpop: all + value: 0.4 + Ro_relax: + method: SinglePeriodModifier + parameter: Ro + period_start_date: 2020-05-01 + period_end_date: 2020-08-31 + subpop: all + value: 0.8 + Ro_all: + method: StackedModifier + modifiers: [Ro_lockdown, Ro_relax] + outcome_modifiers: + scenarios: [test_limits] + modifiers: + test_limits: + method: SinglePeriodModifier + parameter: incidCase::probability + subpop: all + period_start_date: 2020-02-01 + period_end_date: 2020-06-01 + value: 0.5 + outcomes: + method: delayframe + outcomes: + incidCase: + source: + incidence: + infection_stage: I + probability: + value: 0.5 + delay: + value: 5 + incidHosp: + source: + incidence: + infection_stage: I + probability: + value: 0.05 + delay: + value: 7 + duration: + value: 10 + name: currHosp + incidDeath: + source: incidHosp + probability: + value: 0.2 + delay: + value: 14 + ``` + """ parse_config_files(config, ctx, **kwargs) - print(config.dump()) + print(_dump_formatted_yaml(config)) if __name__ == "__main__": diff --git a/flepimop/gempyor_pkg/src/gempyor/shared_cli.py b/flepimop/gempyor_pkg/src/gempyor/shared_cli.py index 3e5f0daea..367769cde 100644 --- a/flepimop/gempyor_pkg/src/gempyor/shared_cli.py +++ b/flepimop/gempyor_pkg/src/gempyor/shared_cli.py @@ -222,7 +222,9 @@ def _parse_option(param: click.Parameter, value: Any) -> Any: config_src = [] if len(found_configs) != 1: if not found_configs: - raise ValueError(f"No config files provided.") + click.echo("No configuration provided! See help for required usage:\n") + click.echo(ctx.get_help()) + ctx.exit() else: error_dict = {k: kwargs[k] for k in found_configs} raise ValueError( @@ -237,21 +239,30 @@ def _parse_option(param: click.Parameter, value: Any) -> Any: ) config_src = _parse_option(config_validator, kwargs[config_key]) cfg.clear() + cfg_data = {} for config_file in config_src: tmp = confuse.Configuration("tmp") tmp.set_file(config_file) - if intersect := set(tmp.keys()) & set(cfg.keys()): - warnings.warn(f"Configuration files contain overlapping keys: {intersect}.") - cfg.set_file(config_file) + if intersect := set(tmp.keys()) & set(cfg_data.keys()): + intersect = ", ".join(sorted(list(intersect))) + raise ValueError( + "Configuration files contain overlapping keys, " + f"{intersect}, introduced by {config_file}." + ) + for k in tmp.keys(): + cfg_data[k] = tmp[k].get() + cfg.set(cfg_data) cfg["config_src"] = [str(k) for k in config_src] # deal with the scenario overrides - scen_args = {k for k in parsed_args if k.endswith("scenarios") and kwargs.get(k)} - for option in scen_args: + scen_args = {k for k in parsed_args if k.endswith("_scenarios")} + for option in {s for s in scen_args if kwargs.get(s)}: key = option.replace("_scenarios", "") value = _parse_option(config_file_options[option], kwargs[option]) if cfg[key].exists(): - cfg[key]["scenarios"] = as_list(value) + cfg[key]["scenarios"] = ( + list(value) if isinstance(value, tuple) else as_list(value) + ) else: raise ValueError( f"Specified {option} when no {key} in configuration file(s): {config_src}" diff --git a/flepimop/gempyor_pkg/src/gempyor/simulate.py b/flepimop/gempyor_pkg/src/gempyor/simulate.py index e8602818d..2b9ffda37 100644 --- a/flepimop/gempyor_pkg/src/gempyor/simulate.py +++ b/flepimop/gempyor_pkg/src/gempyor/simulate.py @@ -300,7 +300,9 @@ def simulate( @cli.command( - name="simulate", params=[config_files_argument] + list(config_file_options.values()) + name="simulate", + params=[config_files_argument] + list(config_file_options.values()), + context_settings=dict(help_option_names=["-h", "--help"]), ) @pass_context def _click_simulate(ctx: Context, **kwargs) -> int: diff --git a/flepimop/gempyor_pkg/src/gempyor/utils.py b/flepimop/gempyor_pkg/src/gempyor/utils.py index 2a848d5b9..af4fbf950 100644 --- a/flepimop/gempyor_pkg/src/gempyor/utils.py +++ b/flepimop/gempyor_pkg/src/gempyor/utils.py @@ -17,6 +17,7 @@ import scipy.ndimage import scipy.stats import sympy.parsing.sympy_parser +import yaml from . import file_paths @@ -1039,3 +1040,74 @@ def move_file_at_local(name_map: dict[str, str]) -> None: for src, dst in name_map.items(): os.path.makedirs(os.path.dirname(dst), exist_ok=True) shutil.copy(src, dst) + + +def _dump_formatted_yaml(cfg: confuse.Configuration) -> str: + """ + Dump confuse configuration to a formatted YAML string. + + Args: + cfg: The confuse configuration object. + + Returns: + A formatted YAML string representation of the configuration. + + Examples: + >>> from gempyor.utils import _dump_formatted_yaml + >>> import confuse + >>> conf = confuse.Configuration("foobar") + >>> data = { + ... "name": "Test Config", + ... "compartments": { + ... "infection_stage": ["S", "E", "I", "R"] + ... }, + ... "seir": { + ... "parameters": { + ... "beta": {"value": 3.4}, + ... "gamma": {"value": 5.6}, + ... }, + ... "transitions": { + ... "source": ["S"], + ... "destination": ["E"], + ... "rate": ["beta * gamma"], + ... "proportional_to": [["S"], ["I"]], + ... "proportion_exponent": [1, 1], + ... }, + ... }, + ... } + >>> conf.set(data) + >>> print(_dump_formatted_yaml(conf)) + name: "Test Config" + compartments: + infection_stage: [S, E, I, R] + seir: + parameters: + beta: + value: 3.4 + gamma: + value: 5.6 + transitions: + source: [S] + destination: [E] + rate: ["beta * gamma"] + proportional_to: [[S], [I]] + proportion_exponent: [1, 1] + """ + + class CustomDumper(yaml.Dumper): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.add_representer(list, self._represent_list) + self.add_representer(str, self._represent_str) + + def _represent_list(self, dumper, data): + return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True) + + def _represent_str(self, dumper, data): + if " " in data: + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style='"') + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + return yaml.dump( + yaml.safe_load(cfg.dump()), Dumper=CustomDumper, indent=4, sort_keys=False + ) diff --git a/flepimop/gempyor_pkg/tests/cli/test_flepimop_patch_cli.py b/flepimop/gempyor_pkg/tests/cli/test_flepimop_patch_cli.py new file mode 100644 index 000000000..b4ba20021 --- /dev/null +++ b/flepimop/gempyor_pkg/tests/cli/test_flepimop_patch_cli.py @@ -0,0 +1,412 @@ +from pathlib import Path +from typing import Any + +from click.testing import CliRunner +import pytest +import yaml + +from gempyor.cli import patch + + +@pytest.mark.parametrize( + ("data_one", "data_two"), + ( + ( + { + "seir": { + "parameters": { + "beta": {"value": 1.2}, + } + } + }, + { + "seir": { + "parameters": { + "gamma": {"value": 3.4}, + } + } + }, + ), + ( + { + "seir": { + "parameters": { + "sigma": {"value": 5.6}, + "gamma": {"value": 7.8}, + } + } + }, + { + "seir": { + "parameters": { + "gamma": {"value": 3.4}, + } + } + }, + ), + ), +) +def test_overlapping_sections_value_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + data_one: dict[str, Any], + data_two: dict[str, Any], +) -> None: + # Setup the test + monkeypatch.chdir(tmp_path) + config_one = tmp_path / "config_one.yml" + config_one.write_text(yaml.dump(data_one)) + config_two = tmp_path / "config_two.yml" + config_two.write_text(yaml.dump(data_two)) + + # Invoke the command + runner = CliRunner() + result = runner.invoke(patch, [config_one.name, config_two.name]) + assert result.exit_code == 1 + assert isinstance(result.exception, ValueError) + assert str(result.exception) == ( + "Configuration files contain overlapping keys, seir, introduced by config_two.yml." + ) + + +@pytest.mark.parametrize( + ("data", "seir_modifier_scenarios", "outcome_modifier_scenarios"), + ( + ( + { + "seir_modifiers": { + "scenarios": ["Ro_lockdown", "Ro_all"], + "modifiers": { + "Ro_lockdown": { + "method": "SinglePeriodModifier", + "parameter": "Ro", + "period_start_date": "2020-03-15", + "period_end_date": "2020-05-01", + "subpop": "all", + "value": 0.4, + }, + "Ro_relax": { + "method": "SinglePeriodModifier", + "parameter": "Ro", + "period_start_date": "2020-05-01", + "period_end_date": "2020-07-01", + "subpop": "all", + "value": 0.8, + }, + "Ro_all": { + "method": "StackedModifier", + "modifiers": ["Ro_lockdown", "Ro_relax"], + }, + }, + }, + }, + [], + [], + ), + ( + { + "seir_modifiers": { + "scenarios": ["Ro_lockdown", "Ro_all"], + "modifiers": { + "Ro_lockdown": { + "method": "SinglePeriodModifier", + "parameter": "Ro", + "period_start_date": "2020-03-15", + "period_end_date": "2020-05-01", + "subpop": "all", + "value": 0.4, + }, + "Ro_relax": { + "method": "SinglePeriodModifier", + "parameter": "Ro", + "period_start_date": "2020-05-01", + "period_end_date": "2020-07-01", + "subpop": "all", + "value": 0.8, + }, + "Ro_all": { + "method": "StackedModifier", + "modifiers": ["Ro_lockdown", "Ro_relax"], + }, + }, + }, + }, + ["Ro_all"], + [], + ), + ( + { + "seir_modifiers": { + "scenarios": ["Ro_lockdown", "Ro_all"], + "modifiers": { + "Ro_lockdown": { + "method": "SinglePeriodModifier", + "parameter": "Ro", + "period_start_date": "2020-03-15", + "period_end_date": "2020-05-01", + "subpop": "all", + "value": 0.4, + }, + "Ro_relax": { + "method": "SinglePeriodModifier", + "parameter": "Ro", + "period_start_date": "2020-05-01", + "period_end_date": "2020-07-01", + "subpop": "all", + "value": 0.8, + }, + "Ro_all": { + "method": "StackedModifier", + "modifiers": ["Ro_lockdown", "Ro_relax"], + }, + }, + }, + }, + ["Ro_all", "Ro_relax", "Ro_lockdown"], + [], + ), + ( + { + "outcome_modifiers": { + "scenarios": ["test_limits"], + "modifiers": { + "test_limits": { + "method": "SinglePeriodModifier", + "parameter": "incidCase::probability", + "subpop": "all", + "period_start_date": "2020-02-01", + "period_end_date": "2020-06-01", + "value": 0.5, + }, + "test_expansion": { + "method": "SinglePeriodModifier", + "parameter": "incidCase::probability", + "period_start_date": "2020-06-01", + "period_end_date": "2020-08-01", + "subpop": "all", + "value": 0.7, + }, + "test_limits_expansion": { + "method": "StackedModifier", + "modifiers": ["test_limits", "test_expansion"], + }, + }, + }, + }, + [], + [], + ), + ( + { + "outcome_modifiers": { + "scenarios": ["test_limits"], + "modifiers": { + "test_limits": { + "method": "SinglePeriodModifier", + "parameter": "incidCase::probability", + "subpop": "all", + "period_start_date": "2020-02-01", + "period_end_date": "2020-06-01", + "value": 0.5, + }, + "test_expansion": { + "method": "SinglePeriodModifier", + "parameter": "incidCase::probability", + "period_start_date": "2020-06-01", + "period_end_date": "2020-08-01", + "subpop": "all", + "value": 0.7, + }, + "test_limits_expansion": { + "method": "StackedModifier", + "modifiers": ["test_limits", "test_expansion"], + }, + }, + }, + }, + [], + ["test_limits_expansion"], + ), + ( + { + "outcome_modifiers": { + "scenarios": ["test_limits"], + "modifiers": { + "test_limits": { + "method": "SinglePeriodModifier", + "parameter": "incidCase::probability", + "subpop": "all", + "period_start_date": "2020-02-01", + "period_end_date": "2020-06-01", + "value": 0.5, + }, + "test_expansion": { + "method": "SinglePeriodModifier", + "parameter": "incidCase::probability", + "period_start_date": "2020-06-01", + "period_end_date": "2020-08-01", + "subpop": "all", + "value": 0.7, + }, + "test_limits_expansion": { + "method": "StackedModifier", + "modifiers": ["test_limits", "test_expansion"], + }, + }, + }, + }, + [], + ["test_limits", "test_expansion", "test_limits_expansion"], + ), + ( + { + "seir_modifiers": { + "scenarios": ["Ro_lockdown", "Ro_all"], + "modifiers": { + "Ro_lockdown": { + "method": "SinglePeriodModifier", + "parameter": "Ro", + "period_start_date": "2020-03-15", + "period_end_date": "2020-05-01", + "subpop": "all", + "value": 0.4, + }, + "Ro_relax": { + "method": "SinglePeriodModifier", + "parameter": "Ro", + "period_start_date": "2020-05-01", + "period_end_date": "2020-07-01", + "subpop": "all", + "value": 0.8, + }, + "Ro_all": { + "method": "StackedModifier", + "modifiers": ["Ro_lockdown", "Ro_relax"], + }, + }, + }, + "outcome_modifiers": { + "scenarios": ["test_limits"], + "modifiers": { + "test_limits": { + "method": "SinglePeriodModifier", + "parameter": "incidCase::probability", + "subpop": "all", + "period_start_date": "2020-02-01", + "period_end_date": "2020-06-01", + "value": 0.5, + }, + "test_expansion": { + "method": "SinglePeriodModifier", + "parameter": "incidCase::probability", + "period_start_date": "2020-06-01", + "period_end_date": "2020-08-01", + "subpop": "all", + "value": 0.7, + }, + "test_limits_expansion": { + "method": "StackedModifier", + "modifiers": ["test_limits", "test_expansion"], + }, + }, + }, + }, + [], + [], + ), + ( + { + "seir_modifiers": { + "scenarios": ["Ro_lockdown", "Ro_all"], + "modifiers": { + "Ro_lockdown": { + "method": "SinglePeriodModifier", + "parameter": "Ro", + "period_start_date": "2020-03-15", + "period_end_date": "2020-05-01", + "subpop": "all", + "value": 0.4, + }, + "Ro_relax": { + "method": "SinglePeriodModifier", + "parameter": "Ro", + "period_start_date": "2020-05-01", + "period_end_date": "2020-07-01", + "subpop": "all", + "value": 0.8, + }, + "Ro_all": { + "method": "StackedModifier", + "modifiers": ["Ro_lockdown", "Ro_relax"], + }, + }, + }, + "outcome_modifiers": { + "scenarios": ["test_limits"], + "modifiers": { + "test_limits": { + "method": "SinglePeriodModifier", + "parameter": "incidCase::probability", + "subpop": "all", + "period_start_date": "2020-02-01", + "period_end_date": "2020-06-01", + "value": 0.5, + }, + "test_expansion": { + "method": "SinglePeriodModifier", + "parameter": "incidCase::probability", + "period_start_date": "2020-06-01", + "period_end_date": "2020-08-01", + "subpop": "all", + "value": 0.7, + }, + "test_limits_expansion": { + "method": "StackedModifier", + "modifiers": ["test_limits", "test_expansion"], + }, + }, + }, + }, + ["Ro_relax"], + ["test_expansion"], + ), + ), +) +def test_editing_modifier_scenarios( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + data: dict[str, Any], + seir_modifier_scenarios: list[str], + outcome_modifier_scenarios: list[str], +) -> None: + # Setup the test + monkeypatch.chdir(tmp_path) + config_path = tmp_path / "config.yml" + config_path.write_text(yaml.dump(data)) + + # Invoke the command + runner = CliRunner() + args = [config_path.name] + if seir_modifier_scenarios: + for s in seir_modifier_scenarios: + args += ["--seir_modifiers_scenarios", s] + if outcome_modifier_scenarios: + for o in outcome_modifier_scenarios: + args += ["--outcome_modifiers_scenarios", o] + result = runner.invoke(patch, args) + assert result.exit_code == 0 + + # Check the output + patched_data = yaml.safe_load(result.output) + assert "seir_modifiers_scenarios" not in patched_data + assert patched_data.get("seir_modifiers", {}).get("scenarios", []) == ( + seir_modifier_scenarios + if seir_modifier_scenarios + else data.get("seir_modifiers", {}).get("scenarios", []) + ) + assert "outcome_modifiers_scenarios" not in patched_data + assert patched_data.get("outcome_modifiers", {}).get("scenarios", []) == ( + outcome_modifier_scenarios + if outcome_modifier_scenarios + else data.get("outcome_modifiers", {}).get("scenarios", []) + ) diff --git a/flepimop/gempyor_pkg/tests/shared_cli/test_parse_config_files.py b/flepimop/gempyor_pkg/tests/shared_cli/test_parse_config_files.py index a64fc38f6..a2aa55c47 100644 --- a/flepimop/gempyor_pkg/tests/shared_cli/test_parse_config_files.py +++ b/flepimop/gempyor_pkg/tests/shared_cli/test_parse_config_files.py @@ -158,7 +158,7 @@ def test_multifile_config_collision( tmpconfigfile1 = config_file(tmp_path, testdict1, "config1.yaml") tmpconfigfile2 = config_file(tmp_path, testdict2, "config2.yaml") mockconfig = mock_empty_config() - with pytest.warns(UserWarning, match=r"foo"): + with pytest.raises(ValueError, match=r"foo"): parse_config_files(mockconfig, config_files=[tmpconfigfile1, tmpconfigfile2]) for k, v in (testdict1 | testdict2).items(): assert mockconfig[k].get(v) == v diff --git a/flepimop/gempyor_pkg/tests/utils/test__dump_formatted_yaml.py b/flepimop/gempyor_pkg/tests/utils/test__dump_formatted_yaml.py new file mode 100644 index 000000000..4ee574eea --- /dev/null +++ b/flepimop/gempyor_pkg/tests/utils/test__dump_formatted_yaml.py @@ -0,0 +1,55 @@ +from typing import Any + +import confuse +import pytest + +from gempyor.utils import _dump_formatted_yaml + + +@pytest.mark.parametrize( + ("data", "expected"), + ( + ({"key": "value"}, "key: value\n"), + ( + {"name": "Test Config", "compartments": {"infection_stage": ["S", "I", "R"]}}, + """name: "Test Config" +compartments: + infection_stage: [S, I, R] +""", + ), + ( + { + "seir": { + "parameters": { + "beta": {"value": 3.4}, + "gamma": {"value": 5.6}, + }, + "transitions": { + "source": ["S"], + "destination": ["E"], + "rate": ["beta * gamma"], + "proportional_to": [["S"], ["I"]], + "proportion_exponent": [1, 1], + }, + } + }, + """seir: + parameters: + beta: + value: 3.4 + gamma: + value: 5.6 + transitions: + source: [S] + destination: [E] + rate: ["beta * gamma"] + proportional_to: [[S], [I]] + proportion_exponent: [1, 1] +""", + ), + ), +) +def test_exact_output_for_select_values(data: dict[str, Any], expected: str) -> None: + cfg = confuse.Configuration("test", __name__) + cfg.set(data) + assert _dump_formatted_yaml(cfg) == expected