From aa09931a8323e23d8a2a2f8d64b63acce41a71ba Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Wed, 11 Dec 2024 13:06:22 +0000 Subject: [PATCH] mock-based reproducer for issue #3696 --- parsl/jobs/strategy.py | 8 +++ .../test_regression_3696_oscillation.py | 62 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 parsl/tests/test_scaling/test_regression_3696_oscillation.py diff --git a/parsl/jobs/strategy.py b/parsl/jobs/strategy.py index e0898cccb1..c50bb21086 100644 --- a/parsl/jobs/strategy.py +++ b/parsl/jobs/strategy.py @@ -275,6 +275,14 @@ def _general_strategy(self, executors: List[BlockProviderExecutor], *, strategy_ logger.debug(f"Strategy case 2b: active_blocks {active_blocks} < max_blocks {max_blocks} so scaling out") excess_slots = math.ceil((active_tasks * parallelism) - active_slots) excess_blocks = math.ceil(float(excess_slots) / (tasks_per_node * nodes_per_block)) + print(f"BENC: active_tasks {active_tasks}") + print(f"BENC: active_slots {active_slots}") + print(f"BENC: excess slots {excess_slots}") + print(f"BENC: tpn {tasks_per_node}") + print(f"BENC: npb {nodes_per_block}") + print(f"BENC: excess blocks {excess_blocks}") + print(f"BENC: max blocks {max_blocks}") + print(f"BENC: active blocks {active_blocks}") excess_blocks = min(excess_blocks, max_blocks - active_blocks) logger.debug(f"Requesting {excess_blocks} more blocks") executor.scale_out_facade(excess_blocks) diff --git a/parsl/tests/test_scaling/test_regression_3696_oscillation.py b/parsl/tests/test_scaling/test_regression_3696_oscillation.py new file mode 100644 index 0000000000..2ede9df368 --- /dev/null +++ b/parsl/tests/test_scaling/test_regression_3696_oscillation.py @@ -0,0 +1,62 @@ +import math +from unittest.mock import MagicMock + +import pytest + +from parsl.executors.high_throughput.executor import HighThroughputExecutor +from parsl.jobs.states import JobState, JobStatus +from parsl.jobs.strategy import Strategy + + +@pytest.mark.local +def test_htex_strategy_does_not_oscillate(): + """Check for oscillations in htex scaling. + In issue 3696, with a large number of workers per block + and a smaller number of active tasks, the htex scaling + strategy oscillates between 0 and 1 active block, rather + than converging to 1 active block. + + The choices of 14 tasks and 48 workers per node are taken + from issue #3696. + """ + + s = Strategy(strategy='htex_auto_scale', max_idletime=math.inf) + + provider = MagicMock() + executor = MagicMock(spec=HighThroughputExecutor) + + statuses = {} + + executor.provider = provider + executor.outstanding = 14 + executor.status_facade = statuses + executor.workers_per_node = 48 + + provider.parallelism = 1 + provider.init_blocks = 0 + provider.min_blocks = 0 + provider.max_blocks = 2 + provider.nodes_per_block = 1 + + def f(n): + for _ in range(n): + statuses[len(statuses)] = JobStatus(state=JobState.PENDING) + + executor.scale_out_facade.side_effect = f + + s.add_executors([executor]) + + # In issue #3696, this first strategise does initial and load based + # scale outs, because 14 > 48*0 + s.strategize([executor]) + executor.scale_out_facade.assert_called() + executor.scale_in_facade.assert_not_called() + + # In issue #3696, this second strategize does a scale in, because 14 < 48*1 + s.strategize([executor]) + executor.scale_in_facade.assert_not_called() # this assert fails due to issue #3696 + + # Now check scale in happens with 0 load + executor.outstanding = 0 + s.strategize([executor]) + executor.scale_in_facade.assert_called()