Skip to content

Commit

Permalink
mock-based reproducer for issue #3696
Browse files Browse the repository at this point in the history
  • Loading branch information
benclifford committed Dec 11, 2024
1 parent a01f7e4 commit aa09931
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
8 changes: 8 additions & 0 deletions parsl/jobs/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,14 @@ def _general_strategy(self, executors: List[BlockProviderExecutor], *, strategy_
logger.debug(f"Strategy case 2b: active_blocks {active_blocks} < max_blocks {max_blocks} so scaling out")
excess_slots = math.ceil((active_tasks * parallelism) - active_slots)
excess_blocks = math.ceil(float(excess_slots) / (tasks_per_node * nodes_per_block))
print(f"BENC: active_tasks {active_tasks}")
print(f"BENC: active_slots {active_slots}")
print(f"BENC: excess slots {excess_slots}")
print(f"BENC: tpn {tasks_per_node}")
print(f"BENC: npb {nodes_per_block}")
print(f"BENC: excess blocks {excess_blocks}")
print(f"BENC: max blocks {max_blocks}")
print(f"BENC: active blocks {active_blocks}")
excess_blocks = min(excess_blocks, max_blocks - active_blocks)
logger.debug(f"Requesting {excess_blocks} more blocks")
executor.scale_out_facade(excess_blocks)
Expand Down
62 changes: 62 additions & 0 deletions parsl/tests/test_scaling/test_regression_3696_oscillation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import math
from unittest.mock import MagicMock

import pytest

from parsl.executors.high_throughput.executor import HighThroughputExecutor
from parsl.jobs.states import JobState, JobStatus
from parsl.jobs.strategy import Strategy


@pytest.mark.local
def test_htex_strategy_does_not_oscillate():
"""Check for oscillations in htex scaling.
In issue 3696, with a large number of workers per block
and a smaller number of active tasks, the htex scaling
strategy oscillates between 0 and 1 active block, rather
than converging to 1 active block.
The choices of 14 tasks and 48 workers per node are taken
from issue #3696.
"""

s = Strategy(strategy='htex_auto_scale', max_idletime=math.inf)

provider = MagicMock()
executor = MagicMock(spec=HighThroughputExecutor)

statuses = {}

executor.provider = provider
executor.outstanding = 14
executor.status_facade = statuses
executor.workers_per_node = 48

provider.parallelism = 1
provider.init_blocks = 0
provider.min_blocks = 0
provider.max_blocks = 2
provider.nodes_per_block = 1

def f(n):
for _ in range(n):
statuses[len(statuses)] = JobStatus(state=JobState.PENDING)

executor.scale_out_facade.side_effect = f

s.add_executors([executor])

# In issue #3696, this first strategise does initial and load based
# scale outs, because 14 > 48*0
s.strategize([executor])
executor.scale_out_facade.assert_called()
executor.scale_in_facade.assert_not_called()

# In issue #3696, this second strategize does a scale in, because 14 < 48*1
s.strategize([executor])
executor.scale_in_facade.assert_not_called() # this assert fails due to issue #3696

# Now check scale in happens with 0 load
executor.outstanding = 0
s.strategize([executor])
executor.scale_in_facade.assert_called()

0 comments on commit aa09931

Please sign in to comment.