Skip to content

Commit

Permalink
Test filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
carmocca committed Nov 17, 2023
1 parent f475bd5 commit edc4ed5
Show file tree
Hide file tree
Showing 11 changed files with 183 additions and 46 deletions.
12 changes: 11 additions & 1 deletion .github/azure-gpu-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,14 @@ jobs:
displayName: "Env details"
- bash: pytest -v --disable-pytest-warnings --strict-markers --color=yes
displayName: 'Testing'
displayName: 'Ordinary tests'
env:
PL_RUN_CUDA_TESTS: "1"
timeoutInMinutes: "5"

- bash: bash run_standalone_tests.sh
workingDirectory: tests
env:
PL_RUN_CUDA_TESTS: "1"
displayName: "Standalone tests"
timeoutInMinutes: "5"
6 changes: 3 additions & 3 deletions .github/workflows/cpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ jobs:
- name: Run tests without the package installed
run: |
pip install -r requirements-all.txt pytest pytest-rerunfailures transformers einops protobuf
pip install -r requirements-all.txt pytest pytest-rerunfailures pytest-timeout transformers einops protobuf
pip list
pytest -v --disable-pytest-warnings --strict-markers --color=yes
pytest -v --disable-pytest-warnings --strict-markers --color=yes --timeout 60
- name: Run tests
run: |
pip install . --no-deps
pytest -v --disable-pytest-warnings --strict-markers --color=yes
pytest -v --disable-pytest-warnings --strict-markers --color=yes --timeout 60
15 changes: 7 additions & 8 deletions pretrain/tinyllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,13 @@ def train(fabric, state, train_dataloader, val_dataloader, resume):
if curr_iter < initial_iter:
curr_iter += 1
continue
else:
resume = False
curr_iter = -1
fabric.barrier()
fabric.print(
"Resuming data loader finished."
f"Took {time.perf_counter() - total_t0:.1f} seconds to reach iteration {initial_iter}."
)
resume = False
curr_iter = -1
fabric.barrier()
fabric.print(
"Resuming data loader finished."
f"Took {time.perf_counter() - total_t0:.1f} seconds to reach iteration {initial_iter}."
)

if state["iter_num"] >= max_iters:
break
Expand Down
52 changes: 52 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os
import sys
from pathlib import Path
from typing import List

import pytest
import torch
from lightning.fabric.utilities.testing import _runif_reasons

wd = Path(__file__).parent.parent.absolute()

Expand Down Expand Up @@ -40,3 +42,53 @@ def tensor_like():
def restore_default_dtype():
# just in case
torch.set_default_dtype(torch.float32)


def RunIf(**kwargs):
reasons, marker_kwargs = _runif_reasons(**kwargs)
return pytest.mark.skipif(condition=len(reasons) > 0, reason=f"Requires: [{' + '.join(reasons)}]", **marker_kwargs)


# https://github.com/Lightning-AI/lightning/blob/6e517bd55b50166138ce6ab915abd4547702994b/tests/tests_fabric/conftest.py#L140
def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.Config) -> None:
initial_size = len(items)
conditions = []
filtered, skipped = 0, 0

options = {"standalone": "PL_RUN_STANDALONE_TESTS", "min_cuda_gpus": "PL_RUN_CUDA_TESTS"}
if os.getenv(options["standalone"], "0") == "1" and os.getenv(options["min_cuda_gpus"], "0") == "1":
# special case: we don't have a CPU job for standalone tests, so we shouldn't run only cuda tests.
# by deleting the key, we avoid filtering out the CPU tests
del options["min_cuda_gpus"]

for kwarg, env_var in options.items():
# this will compute the intersection of all tests selected per environment variable
if os.getenv(env_var, "0") == "1":
conditions.append(env_var)
for i, test in reversed(list(enumerate(items))): # loop in reverse, since we are going to pop items
already_skipped = any(marker.name == "skip" for marker in test.own_markers)
if already_skipped:
# the test was going to be skipped anyway, filter it out
items.pop(i)
skipped += 1
continue
has_runif_with_kwarg = any(
marker.name == "skipif" and marker.kwargs.get(kwarg) for marker in test.own_markers
)
if not has_runif_with_kwarg:
# the test has `@RunIf(kwarg=True)`, filter it out
items.pop(i)
filtered += 1

if config.option.verbose >= 0 and (filtered or skipped):
writer = config.get_terminal_writer()
writer.write(
(
f"\nThe number of tests has been filtered from {initial_size} to {initial_size - filtered} after the"
f" filters {conditions}.\n{skipped} tests are marked as unconditional skips.\nIn total,"
f" {len(items)} tests will run.\n"
),
flush=True,
bold=True,
purple=True, # oh yeah, branded pytest messages
)
78 changes: 78 additions & 0 deletions tests/run_standalone_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/bash
set -e

# Batch size for testing: Determines how many standalone test invocations run in parallel
# It can be set through the env variable PL_STANDALONE_TESTS_BATCH_SIZE
test_batch_size="${PL_STANDALONE_TESTS_BATCH_SIZE:-1}"

# this environment variable allows special tests to run
export PL_RUN_STANDALONE_TESTS=1
# python arguments
defaults="-m pytest --no-header -v --disable-pytest-warnings --strict-markers --color=yes -s --timeout 120"
echo "Using defaults: ${defaults}"

# find tests marked as `@RunIf(standalone=True)`. done manually instead of with pytest because it is faster
grep_output=$(grep --recursive --word-regexp . --regexp 'standalone=True' --include '*.py')

# file paths, remove duplicates
files=$(echo "$grep_output" | cut -f1 -d: | sort | uniq)

# get the list of parametrizations. we need to call them separately. the last two lines are removed.
# note: if there's a syntax error, this will fail with some garbled output
if [[ "$OSTYPE" == "darwin"* ]]; then
parametrizations=$(python3 -m pytest $files --collect-only --quiet "$@" | tail -r | sed -e '1,3d' | tail -r)
else
parametrizations=$(python3 -m pytest $files --collect-only --quiet "$@" | head -n -2)
fi
# remove the "tests/" path suffix
path_suffix=$(basename "$(pwd)")"/" # https://stackoverflow.com/a/8223345
parametrizations=${parametrizations//$path_suffix/}
parametrizations_arr=($parametrizations)

report=''

rm -f standalone_test_output.txt # in case it exists, remove it
function show_batched_output {
if [ -f standalone_test_output.txt ]; then # if exists
cat standalone_test_output.txt
# heuristic: stop if there's mentions of errors. this can prevent false negatives when only some of the ranks fail
if grep -iE 'error|exception|traceback|failed' standalone_test_output.txt | grep -qvE 'on_exception|xfailed'; then
echo "Potential error! Stopping."
rm standalone_test_output.txt
exit 1
fi
rm standalone_test_output.txt
fi
}
trap show_batched_output EXIT # show the output on exit

for i in "${!parametrizations_arr[@]}"; do
parametrization=${parametrizations_arr[$i]}
prefix="$((i+1))/${#parametrizations_arr[@]}"

echo "$prefix: Running $parametrization"
# execute the test in the background
# redirect to a log file that buffers test output. since the tests will run in the background, we cannot let them
# output to std{out,err} because the outputs would be garbled together
python3 ${defaults} "$parametrization" &>> standalone_test_output.txt &
# save the PID in an array
pids[${i}]=$!
# add row to the final report
report+="Ran\t$parametrization\n"

if ((($i + 1) % $test_batch_size == 0)); then
# wait for running tests
for pid in ${pids[*]}; do wait $pid; done
unset pids # empty the array
show_batched_output
fi
done
# wait for leftover tests
for pid in ${pids[*]}; do wait $pid; done
show_batched_output

# echo test report
printf '=%.s' {1..80}
printf "\n$report"
printf '=%.s' {1..80}
printf '\n'
6 changes: 3 additions & 3 deletions tests/test_adapter.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import sys
from contextlib import redirect_stdout
from dataclasses import asdict
from io import StringIO
from unittest.mock import Mock

import pytest
import torch
from lightning import Fabric

from tests.conftest import RunIf


def test_config_identical():
import lit_gpt.adapter as gpt_adapter
Expand Down Expand Up @@ -106,7 +106,7 @@ def test_adapter_gpt_init_weights():
assert (param == 0).all()


@pytest.mark.skipif(sys.platform in ("win32", "darwin"), reason="torch.compile not supported on this platform")
@RunIf(dynamo=True)
@torch.inference_mode()
def test_adapter_compile():
from lit_gpt.adapter import GPT
Expand Down
6 changes: 3 additions & 3 deletions tests/test_adapter_v2.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import sys
from contextlib import redirect_stdout
from io import StringIO
from unittest.mock import Mock

import pytest
import torch
from lightning import Fabric

from tests.conftest import RunIf


def test_config_identical():
import lit_gpt.adapter_v2 as gpt_adapter
Expand Down Expand Up @@ -135,7 +135,7 @@ def test_base_model_can_be_adapter_v2_loaded():
assert adapter_filter(k, None)


@pytest.mark.skipif(sys.platform in ("win32", "darwin"), reason="torch.compile not supported on this platform")
@RunIf(dynamo=True)
@torch.inference_mode()
def test_adapter_v2_compile():
from lit_gpt.adapter_v2 import GPT
Expand Down
5 changes: 3 additions & 2 deletions tests/test_gptq.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import lightning as L
import pytest
import torch
from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_2_2

from tests.conftest import RunIf

@pytest.mark.skipif(_TORCH_GREATER_EQUAL_2_2, reason="Core dumped")

@RunIf(max_torch="2.2") # TODO: core dumped
def test_gptq_blockwise_quantization():
from quantize.gptq import _TRITON_AVAILABLE

Expand Down
7 changes: 4 additions & 3 deletions tests/test_lora.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import sys
from contextlib import redirect_stdout
from io import StringIO
from itertools import product
Expand All @@ -8,6 +7,8 @@
import torch
from lightning import Fabric

from tests.conftest import RunIf


def test_lora_layer_replacement():
from lit_gpt.lora import GPT, Config, LoRALinear
Expand Down Expand Up @@ -351,7 +352,7 @@ def test_lora_qkv_linear_weights_merged_status(rank, enable_lora, expected_merge
assert layer.merged == expected_merged


@pytest.mark.skipif(not torch.cuda.is_available(), reason="8bit requires CUDA")
@RunIf(min_cuda_gpus=1)
# platform dependent cuda issue: libbitsandbytes_cpu.so: undefined symbol: cquantize_blockwise_fp16_nf4
@pytest.mark.xfail(raises=AttributeError, strict=False)
# https://github.com/Lightning-AI/lit-gpt/issues/513
Expand Down Expand Up @@ -456,7 +457,7 @@ def test_base_model_can_be_lora_loaded():
assert lora_filter(k, None)


@pytest.mark.skipif(sys.platform in ("win32", "darwin"), reason="torch.compile not supported on this platform")
@RunIf(dynamo=True)
@torch.inference_mode()
def test_lora_compile():
from lit_gpt.lora import GPT
Expand Down
36 changes: 15 additions & 21 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from lightning.fabric.utilities.imports import _IS_WINDOWS, _TORCH_GREATER_EQUAL_2_2
from lightning_utilities.core.imports import compare_version

from tests.conftest import RunIf

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))
Expand Down Expand Up @@ -341,7 +343,7 @@ def test_against_hf_phi(device, dtype):
# the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input
# is slightly different
pytest.mark.xfail(raises=AssertionError, strict=False),
pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA"),
RunIf(min_cuda_gpus=1),
],
),
],
Expand Down Expand Up @@ -393,7 +395,7 @@ def test_against_hf_mistral(device, dtype):
torch.testing.assert_close(ours_y, theirs_y)


@pytest.mark.skipif(sys.platform in ("win32", "darwin"), reason="torch.compile not supported on this platform")
@RunIf(dynamo=True)
@torch.inference_mode()
def test_model_compile():
from lit_gpt import GPT
Expand Down Expand Up @@ -468,14 +470,10 @@ def test_model_kv_cache_amp():


# https://github.com/pytorch/pytorch/blob/ad3572a5d/torch/testing/_internal/common_cuda.py#L31-L34
SUPPORTS_FLASH_ATTENTION = (
torch.cuda.is_available() and torch.cuda.get_device_capability() >= (8, 0) and not _IS_WINDOWS
)
SUPPORTS_MEM_EFF_ATTENTION = torch.cuda.is_available()
SUPPORTS_FUSED_ATTENTION = SUPPORTS_FLASH_ATTENTION or SUPPORTS_MEM_EFF_ATTENTION
SUPPORTS_FLASH_ATTENTION = torch.cuda.get_device_capability() >= (8, 0) and not _IS_WINDOWS


@pytest.mark.skipif(not SUPPORTS_FUSED_ATTENTION, reason="Unsupported")
@RunIf(min_cuda_gpus=1)
@pytest.mark.parametrize("config", config_module.configs, ids=[c["name"] for c in config_module.configs])
@torch.inference_mode()
def test_sdpa_choice(config):
Expand Down Expand Up @@ -515,13 +513,12 @@ def assert_sdpa_uses_flash(original_fn, q, k, v, mask):
with torch.backends.cuda.sdp_kernel(enable_mem_efficient=False):
model(x)

if SUPPORTS_MEM_EFF_ATTENTION:
expected = SDPBackend.EFFICIENT_ATTENTION if config.head_size % 8 == 0 else SDPBackend.MATH
with torch.backends.cuda.sdp_kernel(enable_flash=False):
model(x)
expected = SDPBackend.EFFICIENT_ATTENTION if config.head_size % 8 == 0 else SDPBackend.MATH
with torch.backends.cuda.sdp_kernel(enable_flash=False):
model(x)


@pytest.mark.skipif(not SUPPORTS_FUSED_ATTENTION, reason="Unsupported")
@RunIf(min_cuda_gpus=1)
@pytest.mark.parametrize("config", config_module.configs, ids=[c["name"] for c in config_module.configs])
@torch.inference_mode()
def test_sdpa_choice_kv_cache(config):
Expand Down Expand Up @@ -559,11 +556,8 @@ def assert_sdpa_uses_flash(original_fn, q, k, v, mask):
with torch.backends.cuda.sdp_kernel(enable_mem_efficient=False):
model(x, input_pos)

if SUPPORTS_MEM_EFF_ATTENTION:
expected = (
SDPBackend.EFFICIENT_ATTENTION
if config.head_size % 8 == 0 and config.n_query_groups != 1
else SDPBackend.MATH
)
with torch.backends.cuda.sdp_kernel(enable_flash=False):
model(x, input_pos)
expected = (
SDPBackend.EFFICIENT_ATTENTION if config.head_size % 8 == 0 and config.n_query_groups != 1 else SDPBackend.MATH
)
with torch.backends.cuda.sdp_kernel(enable_flash=False):
model(x, input_pos)
6 changes: 4 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import os
import sys
from contextlib import redirect_stderr
from io import StringIO

import pytest
import torch
import torch.nn.functional as F

from tests.conftest import RunIf


def test_find_multiple():
from lit_gpt.utils import find_multiple
Expand All @@ -20,7 +21,8 @@ def test_find_multiple():
assert find_multiple(50254, 512) == 50688


@pytest.mark.skipif(sys.platform == "win32", reason="match fails on windows. why did they have to use backslashes?")
# match fails on windows. why did they have to use backslashes?
@RunIf(skip_windows=True)
def test_check_valid_checkpoint_dir(tmp_path):
from lit_gpt.utils import check_valid_checkpoint_dir

Expand Down

0 comments on commit edc4ed5

Please sign in to comment.