Skip to content

Commit

Permalink
CI sanity check test for env vars (pytorch#120519)
Browse files Browse the repository at this point in the history
Make a test that fails on purpose to trigger retries.  Check the opposite of success (that env vars exist)

It's bit hacky because I want it to fail on the normal flow in order to trigger reruns but I don't want to expose the failures to users since it's confusing.
Pull Request resolved: pytorch#120519
Approved by: https://github.com/huydhn
  • Loading branch information
clee2000 authored and pytorchmergebot committed Mar 8, 2024
1 parent 75bb049 commit f43b9c5
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
22 changes: 21 additions & 1 deletion test/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import argparse
import copy
import glob
import json
import os
import pathlib
Expand Down Expand Up @@ -381,6 +382,7 @@ def run_test(
launcher_cmd=None,
extra_unittest_args=None,
env=None,
print_log=True,
) -> int:
env = env or os.environ.copy()
maybe_set_hip_visible_devies()
Expand Down Expand Up @@ -542,7 +544,7 @@ def run_test(
# comes up in the future.
ret_code = 0 if ret_code == 5 or ret_code == 4 else ret_code

if options.pipe_logs:
if options.pipe_logs and print_log:
handle_log_file(
test_module, log_path, failed=(ret_code != 0), was_rerun=was_rerun
)
Expand Down Expand Up @@ -1002,6 +1004,23 @@ def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False):
return pytest_args


def run_ci_sanity_check(test: ShardedTest, test_directory, options):
assert (
test.name == "test_ci_sanity_check_fail"
), f"This handler only works for test_ci_sanity_check_fail, got {test.name}"
ret_code = run_test(test, test_directory, options, print_log=False)
# This test should fail
if ret_code != 1:
return 1
test_reports_dir = str(REPO_ROOT / "test/test-reports")
# Delete the log files and xmls generated by the test
for file in glob.glob(f"{test_reports_dir}/{test.name}*.log"):
os.remove(file)
for dirname in glob.glob(f"{test_reports_dir}/**/{test.name}"):
shutil.rmtree(dirname)
return 0


CUSTOM_HANDLERS = {
"test_cuda_primary_ctx": run_test_with_subprocess,
"test_cuda_nvml_based_avail": run_test_with_subprocess,
Expand All @@ -1024,6 +1043,7 @@ def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False):
"distributed/rpc/test_share_memory": run_test_with_subprocess,
"distributed/rpc/cuda/test_tensorpipe_agent": run_test_with_subprocess,
"doctests": run_doctests,
"test_ci_sanity_check_fail": run_ci_sanity_check,
}


Expand Down
16 changes: 16 additions & 0 deletions test/test_ci_sanity_check_fail.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Owner(s): ["module: ci"]
# Sanity check for CI setup in GHA. This file is expected to fail so it can trigger reruns

import os

from torch.testing._internal.common_utils import run_tests, TestCase


class TestCISanityCheck(TestCase):
def test_env_vars_exist(self):
# This check should fail and trigger reruns. If it passes, something is wrong
self.assertTrue(os.environ.get("CI") is None)


if __name__ == "__main__":
run_tests()

0 comments on commit f43b9c5

Please sign in to comment.