ci: add test for Huggingface Accelerate #25
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Linux Accelerate Test | |
on: | |
pull_request: | |
branches: | |
- main | |
paths: | |
- '.github/scripts/parse-junitxml.py' | |
- '.github/actions/print-environment/action.yml' | |
- '.github/workflows/_linux_accelerate.yml' | |
workflow_dispatch: | |
inputs: | |
pytorch: | |
required: false | |
type: string | |
default: 'nightly' | |
description: Pytorch branch/commit | |
python: | |
required: false | |
type: string | |
default: '3.10' | |
description: Python version | |
runner: | |
required: true | |
type: string | |
default: 'linux.idc.xpu' | |
description: Runner label | |
accelerate: | |
required: false | |
type: string | |
default: 'v1.2.1' | |
description: Accelerate version | |
transformers: | |
required: false | |
type: string | |
default: 'v4.47.1' | |
description: Transformers version | |
permissions: read-all | |
jobs: | |
Torch-XPU-Accelerate-Tests: | |
runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }} | |
env: | |
CONDA_ENV_NAME: 'huggingface_accelerate_test' | |
WORK_DIR: 'accelerate' | |
NEOReadDebugKeys: 0 | |
DisableScratchPages: 0 | |
accelerate: ${{ inputs.accelerate != '' && inputs.accelerate || 'v1.2.1' }} | |
transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.47.1' }} | |
python: ${{ inputs.python != '' && inputs.python || '3.10' }} | |
PYTORCH_DEBUG_XPU_FALLBACK: 1 | |
ZE_AFFINITY_MASK: 0 | |
PARSE_JUNIT: ${{ github.workspace }}/torch-xpu-ops/.github/scripts/parse-junitxml.py | |
steps: | |
- name: Checkout torch-xpu-ops | |
uses: actions/checkout@v4 | |
with: | |
path: torch-xpu-ops | |
- name: Checkout Accelerate | |
uses: actions/checkout@v4 | |
with: | |
repository: huggingface/accelerate | |
ref: ${{ env.accelerate }} | |
path: accelerate | |
- name: Prepare Conda ENV | |
run: | | |
which conda && conda clean -ay | |
conda remove --all -y -n $CONDA_ENV_NAME || rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME | |
conda create -y -n $CONDA_ENV_NAME python=${{ env.python }} | |
source activate $CONDA_ENV_NAME | |
pip install junitparser | |
pip install transformers==${{ env.transformers }} | |
- name: Prepare Stock XPU Pytorch | |
run: | | |
source activate $CONDA_ENV_NAME | |
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu | |
- name: Prepare Accelerate | |
run: | | |
source activate $CONDA_ENV_NAME | |
cd $WORK_DIR | |
pip install -e . | |
pip install -e ".[testing]" | |
rm -rf tests_log && mkdir -p tests_log | |
rm -rf reports | |
cp ${{ github.workspace }}/torch-xpu-ops/.github/scripts/spec.py ./ | |
- name: Report installed versions | |
run: | | |
source activate $CONDA_ENV_NAME | |
echo "pip installed packages:" | |
pip list | tee ${{ github.workspace }}/$WORK_DIR/tests_log/pip_list.txt | |
echo "lspci gpu devices:" | |
lspci -d ::0380 | tee ${{ github.workspace }}/$WORK_DIR/tests_log/lspci_0380.txt | |
echo "GPU render nodes:" | |
cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/$WORK_DIR/tests_log/device_IDs.txt | |
echo "xpu-smi output:" | |
xpu-smi discovery -y --json --dump -1 | |
- name: Sanity check installed packages | |
run: | | |
source activate $CONDA_ENV_NAME | |
# These checks are to exit earlier if for any reason torch | |
# packages were reinstalled back to CUDA versions (not expected). | |
pip show torch | grep Version | grep xpu | |
pip show torchaudio | grep Version | grep xpu | |
pip show torchvision | grep Version | grep xpu | |
python -c 'import torch; exit(not torch.xpu.is_available())' | |
- name: Run tests | |
run: | | |
source activate $CONDA_ENV_NAME | |
cd $WORK_DIR && rm -rf reports && mkdir -p reports | |
# Excluding tests due to: | |
# * tests/test_examples.py::FeatureExamplesTests::test_profiler fails on | |
# Kineto profiler initialization for XPU device: PTI_ERROR_INTERNAL | |
# * tests/test_cli.py::ModelEstimatorTester::test_gated for failures due | |
# to not root caused environment configuration issue | |
pattern="not test_profiler and not test_gated" | |
cmd=(python3 -m pytest -rsf --junitxml=reports/accelerate.xml -k "$pattern" tests/) | |
{ | |
echo "### Running" | |
echo "\`\`\`" | |
echo "${cmd[@]@Q}" | |
echo "\`\`\`" | |
} >> $GITHUB_STEP_SUMMARY | |
"${cmd[@]}" | |
- name: Print result tables | |
if: ${{ ! cancelled() }} | |
run: | | |
source activate $CONDA_ENV_NAME | |
cd $WORK_DIR | |
{ | |
echo "### Results" | |
python3 $PARSE_JUNIT reports/accelerate.xml --stats | |
echo "### Failed" | |
python3 $PARSE_JUNIT reports/accelerate.xml --errors --failed | |
echo "### Skipped" | |
python3 $PARSE_JUNIT reports/accelerate.xml --skipped | |
} >> $GITHUB_STEP_SUMMARY | |
- name: Print environment | |
if: ${{ ! cancelled() }} | |
uses: ./torch-xpu-ops/.github/actions/print-environment | |
with: | |
conda: $CONDA_ENV_NAME | |
pip_packages: 'accelerate transformers' | |
- name: Upload Test log | |
if: ${{ ! cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: Torch-XPU-Accelerate-Log-${{ github.event.pull_request.number || github.sha }} | |
path: | | |
${{ github.workspace }}/accelerate/reports | |
${{ github.workspace }}/accelerate/tests_log |