From 1316ebefd5409306a39e20bc87ea58fa57775239 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Fri, 8 Nov 2024 16:08:58 -0500 Subject: [PATCH] Optional overrides to job resource presets --- flepimop/gempyor_pkg/src/gempyor/batch.py | 52 ++++++++++++++++--- .../tests/batch/test_job_resources_class.py | 16 ++++++ 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/batch.py b/flepimop/gempyor_pkg/src/gempyor/batch.py index e2cdd4aa7..9b6742f61 100644 --- a/flepimop/gempyor_pkg/src/gempyor/batch.py +++ b/flepimop/gempyor_pkg/src/gempyor/batch.py @@ -183,23 +183,35 @@ def __post_init__(self) -> None: @classmethod def from_presets( - cls, job_size: JobSize, inference_method: Literal["emcee"] | None + cls, + job_size: JobSize, + inference_method: Literal["emcee"] | None, + nodes: int | None = None, + cpus: int | None = None, + memory: int | None = None, ) -> "JobResources": """ - Calculate suggested job resources from presets. + Calculate suggested job resources from presets with optional overrides. Args: job_size: The size of the job being ran. inference_method: The inference method being used for this job. + nodes: Optional manual override for the number of nodes. + cpus: Optional manual override for the number of CPUs per node. + memory: Optional manual override for the amount of memory per node. Returns: A job resources instances scaled to the job size given. """ if inference_method == "emcee": - return cls( - nodes=1, cpus=2 * job_size.jobs, memory=2 * 1024 * job_size.simulations - ) - return cls(nodes=job_size.jobs, cpus=2, memory=2 * 1024) + nodes = 1 if nodes is None else nodes + cpus = 2 * job_size.jobs if cpus is None else cpus + memory = 2 * 1024 * job_size.simulations if memory is None else memory + else: + nodes = job_size.jobs if nodes is None else nodes + cpus = 2 if cpus is None else cpus + memory = 2 * 1024 if memory is None else memory + return cls(nodes=nodes, cpus=cpus, memory=memory) @property def total_cpus(self) -> int: @@ -728,6 +740,24 @@ def _job_name(name: str | None, timestamp: datetime | None) -> str: default=None, help="Optionally an email that can be notified on job begin and end.", ), + click.Option( + param_decls=["--nodes", "nodes"], + type=click.IntRange(min=1), + default=None, + help="Override for the number of nodes to use.", + ), + click.Option( + param_decls=["--cpus", "cpus"], + type=click.IntRange(min=1), + default=None, + help="Override for the number of CPUs per node to use.", + ), + click.Option( + param_decls=["--memory", "memory"], + type=click.IntRange(min=1), + default=None, + help="Override for the amount of memory per node to use in MB.", + ), ] + list(verbosity_options.values()), ) @@ -801,7 +831,13 @@ def _click_submit(ctx: click.Context = mock_context, **kwargs) -> None: logger.info("Setting a total job time limit of %s minutes", job_time_limit.format()) # Job resources - job_resources = JobResources.from_presets(job_size, inference_method) + job_resources = JobResources.from_presets( + job_size, + inference_method, + nodes=kwargs["nodes"], + cpus=kwargs["cpus"], + memory=kwargs["memory"], + ) # Cluster info cluster: Cluster | None = None @@ -839,7 +875,7 @@ def _click_submit(ctx: click.Context = mock_context, **kwargs) -> None: "flepi_path": kwargs["flepi_path"].absolute(), "job_name": job_name, "jobs": job_size.jobs, - "nslots": job_size.simulations, + "nslots": job_size.simulations, # aka nwalkers "prefix": kwargs["prefix"], "project_path": kwargs["project_path"].absolute(), "run_id": kwargs["run_id"], diff --git a/flepimop/gempyor_pkg/tests/batch/test_job_resources_class.py b/flepimop/gempyor_pkg/tests/batch/test_job_resources_class.py index 3b7d6b6fb..c5abeb040 100644 --- a/flepimop/gempyor_pkg/tests/batch/test_job_resources_class.py +++ b/flepimop/gempyor_pkg/tests/batch/test_job_resources_class.py @@ -81,3 +81,19 @@ def test_from_presets_for_select_inputs( else: assert job_resources.cpus == 2 assert job_resources.memory == 2 * 1024 + + +@pytest.mark.parametrize("inference_method", ("emcee", None)) +@pytest.mark.parametrize("nodes", (1, 2, 4, 8)) +@pytest.mark.parametrize("cpus", (1, 2, 4, 8)) +@pytest.mark.parametrize("memory", (1024, 2 * 1024, 4 * 1024, 8 * 1024)) +def test_from_presets_overrides( + inference_method: Literal["emcee"] | None, nodes: int, cpus: int, memory: int +) -> None: + job_size = JobSize(jobs=1, simulations=1, blocks=1) + job_resources = JobResources.from_presets( + job_size, inference_method, nodes=nodes, cpus=cpus, memory=memory + ) + assert job_resources.nodes == nodes + assert job_resources.cpus == cpus + assert job_resources.memory == memory