diff --git a/t/t2226-housekeeping.t b/t/t2226-housekeeping.t index cca404f3e126..524ed7342ca0 100755 --- a/t/t2226-housekeeping.t +++ b/t/t2226-housekeeping.t @@ -27,6 +27,11 @@ kill_ranks () { flux housekeeping kill --targets=$1 --signal=$2 } +# Usage: straggler_count +straggler_count () { + flux housekeeping list -no {nnodes} +} + # Note: the hand off of resources to housekeeping occurs just before the job # becomes inactive, therefore it is safe to assume that housekeeping has run # for the job if it is enclosed between successful 'wait_for_running 0' calls. @@ -42,6 +47,16 @@ wait_for_running () { done } +# Usage: wait_for_straggler_count count +wait_for_straggler_count () { + count=0 + while test $(straggler_count) -gt $1; do + count=$(($count+1)); + test $count -eq 300 && return 1 # max 300 * 0.1s sleep = 30s + sleep 0.1 + done +} + test_expect_success 'flux-housekeeping utility exists' ' flux housekeeping list --help && flux housekeeping kill --help @@ -343,8 +358,9 @@ test_expect_success 'configure housekeeping with immediate release' ' test_expect_success 'run job that uses 4 nodes to trigger housekeeping' ' flux run -N4 true ' -test_expect_success 'housekeeping is running for 1 job' ' - wait_for_running 1 +test_expect_success 'housekeeping completed except for one straggler' ' + wait_for_running 1 && + wait_for_straggler_count 1 ' test_expect_success 'reload scheduler without partial hello capability' ' flux dmesg -C && @@ -357,4 +373,26 @@ test_expect_success 'wait for housekeeping to finish' ' test_expect_success 'housekeeping jobs were terminated due to sched reload' ' flux dmesg | grep "housekeeping:.*will be terminated" ' +test_expect_success 'no node are allocated' ' + test $(flux resource list -s allocated -no {nnodes}) -eq 0 && + test $(FLUX_RESOURCE_LIST_RPC=sched.resource-status \ + flux resource list -s allocated -no {nnodes}) -eq 0 +' +test_expect_success 'run job that uses 4 nodes to trigger housekeeping' ' + flux run -N4 true +' +test_expect_success 'housekeeping completed except for one straggler' ' + wait_for_running 1 && + wait_for_straggler_count 1 +' +test_expect_success 'reload scheduler WITH partial hello capability' ' + flux dmesg -C && + flux module reload -f sched-simple && + flux dmesg -H +' +test_expect_success 'one node is allocated' ' + test $(flux resource list -s allocated -no {nnodes}) -eq 1 && + test $(FLUX_RESOURCE_LIST_RPC=sched.resource-status \ + flux resource list -s allocated -no {nnodes}) -eq 1 +' test_done