Skip to content

ci: add test for Huggingface Accelerate #25

ci: add test for Huggingface Accelerate

ci: add test for Huggingface Accelerate #25

name: Linux Accelerate Test
on:
pull_request:
branches:
- main
paths:
- '.github/scripts/parse-junitxml.py'
- '.github/actions/print-environment/action.yml'
- '.github/workflows/_linux_accelerate.yml'
workflow_dispatch:
inputs:
pytorch:
required: false
type: string
default: 'nightly'
description: Pytorch branch/commit
python:
required: false
type: string
default: '3.10'
description: Python version
runner:
required: true
type: string
default: 'linux.idc.xpu'
description: Runner label
accelerate:
required: false
type: string
default: 'v1.2.1'
description: Accelerate version
transformers:
required: false
type: string
default: 'v4.47.1'
description: Transformers version
permissions: read-all
jobs:
Torch-XPU-Accelerate-Tests:
runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }}
env:
CONDA_ENV_NAME: 'huggingface_accelerate_test'
WORK_DIR: 'accelerate'
NEOReadDebugKeys: 0
DisableScratchPages: 0
accelerate: ${{ inputs.accelerate != '' && inputs.accelerate || 'v1.2.1' }}
transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.47.1' }}
python: ${{ inputs.python != '' && inputs.python || '3.10' }}
PYTORCH_DEBUG_XPU_FALLBACK: 1
ZE_AFFINITY_MASK: 0
PARSE_JUNIT: ${{ github.workspace }}/torch-xpu-ops/.github/scripts/parse-junitxml.py
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
with:
path: torch-xpu-ops
- name: Checkout Accelerate
uses: actions/checkout@v4
with:
repository: huggingface/accelerate
ref: ${{ env.accelerate }}
path: accelerate
- name: Prepare Conda ENV
run: |
which conda && conda clean -ay
conda remove --all -y -n $CONDA_ENV_NAME || rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME
conda create -y -n $CONDA_ENV_NAME python=${{ env.python }}
source activate $CONDA_ENV_NAME
pip install junitparser
pip install transformers==${{ env.transformers }}
- name: Prepare Stock XPU Pytorch
run: |
source activate $CONDA_ENV_NAME
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
- name: Prepare Accelerate
run: |
source activate $CONDA_ENV_NAME
cd $WORK_DIR
pip install -e .
pip install -e ".[testing]"
rm -rf tests_log && mkdir -p tests_log
rm -rf reports
cp ${{ github.workspace }}/torch-xpu-ops/.github/scripts/spec.py ./
- name: Report installed versions
run: |
source activate $CONDA_ENV_NAME
echo "pip installed packages:"
pip list | tee ${{ github.workspace }}/$WORK_DIR/tests_log/pip_list.txt
echo "lspci gpu devices:"
lspci -d ::0380 | tee ${{ github.workspace }}/$WORK_DIR/tests_log/lspci_0380.txt
echo "GPU render nodes:"
cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/$WORK_DIR/tests_log/device_IDs.txt
echo "xpu-smi output:"
xpu-smi discovery -y --json --dump -1
- name: Sanity check installed packages
run: |
source activate $CONDA_ENV_NAME
# These checks are to exit earlier if for any reason torch
# packages were reinstalled back to CUDA versions (not expected).
pip show torch | grep Version | grep xpu
pip show torchaudio | grep Version | grep xpu
pip show torchvision | grep Version | grep xpu
python -c 'import torch; exit(not torch.xpu.is_available())'
- name: Run tests
run: |
source activate $CONDA_ENV_NAME
cd $WORK_DIR && rm -rf reports && mkdir -p reports
# Excluding tests due to:
# * tests/test_examples.py::FeatureExamplesTests::test_profiler fails on
# Kineto profiler initialization for XPU device: PTI_ERROR_INTERNAL
# * tests/test_cli.py::ModelEstimatorTester::test_gated for failures due
# to not root caused environment configuration issue
pattern="not test_profiler and not test_gated"
cmd=(python3 -m pytest -rsf --junitxml=reports/accelerate.xml -k "$pattern" tests/)
{
echo "### Running"
echo "\`\`\`"
echo "${cmd[@]@Q}"
echo "\`\`\`"
} >> $GITHUB_STEP_SUMMARY
"${cmd[@]}"
- name: Print result tables
if: ${{ ! cancelled() }}
run: |
source activate $CONDA_ENV_NAME
cd $WORK_DIR
{
echo "### Results"
python3 $PARSE_JUNIT reports/accelerate.xml --stats
echo "### Failed"
python3 $PARSE_JUNIT reports/accelerate.xml --errors --failed
echo "### Skipped"
python3 $PARSE_JUNIT reports/accelerate.xml --skipped
} >> $GITHUB_STEP_SUMMARY
- name: Print environment
if: ${{ ! cancelled() }}
uses: ./torch-xpu-ops/.github/actions/print-environment
with:
conda: $CONDA_ENV_NAME
pip_packages: 'accelerate transformers'
- name: Upload Test log
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
with:
name: Torch-XPU-Accelerate-Log-${{ github.event.pull_request.number || github.sha }}
path: |
${{ github.workspace }}/accelerate/reports
${{ github.workspace }}/accelerate/tests_log