Skip to content

ci: add test for Huggingface Accelerate #4

ci: add test for Huggingface Accelerate

ci: add test for Huggingface Accelerate #4

name: Linux Accelerate Test
on:
pull_request:
branches:
- main
paths:
- '.github/scripts/spec.py'
- '.github/workflows/_linux_accelerate.yml'
workflow_dispatch:
inputs:
pytorch:
required: false
type: string
default: 'nightly'
description: Pytorch branch/commit
python:
required: false
type: string
default: '3.10'
description: Python version
runner:
required: true
type: string
default: 'linux.idc.xpu'
description: Runner label
nightly_whl:
required: false
type: string
default: ''
description: Pytorch nightly wheel version
accelerate:
required: false
type: string
default: 'v1.2.1'
description: Accelerate version
permissions: read-all
jobs:
Torch-XPU-Accelerate-Tests:
runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }}
env:
CONDA_ENV_NAME: 'huggingface_accelerate_test'
WORK_DIR: 'accelerate'
NEOReadDebugKeys: 0
DisableScratchPages: 0
accelerate: ${{ inputs.accelerate != '' && inputs.accelerate || 'v1.2.1' }}
python: ${{ inputs.python != '' && inputs.python || '3.10' }}
pytorch: ${{ inputs.pytorch != '' && inputs.pytorch || 'nightly' }}
PYTORCH_DEBUG_XPU_FALLBACK: 1
ZE_AFFINITY_MASK: 0
PARSE_JUNIT: ${{ github.workspace }}/torch-xpu-ops/.github/scripts/parse-junitxml.py
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
with:
path: torch-xpu-ops
- name: Checkout Accelerate
uses: actions/checkout@v4
with:
repository: huggingface/accelerate
ref: ${{ env.accelerate }}
path: accelerate
#- name: Prepare OS environment
# run: |
# sudo apt-get update
# sudo apt-get install -y \
# espeak-ng \
# git-lfs \
# pkg-config \
# libavcodec-dev \
# libavdevice-dev \
# libavfilter-dev \
# libavformat-dev \
# libavutil-dev \
# libswresample-dev \
# libswscale-dev
# git lfs install
- name: Prepare Conda ENV
run: |
which conda && conda clean -ay
conda remove --all -y -n $CONDA_ENV_NAME || rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME
conda create -y -n $CONDA_ENV_NAME python=${{ env.python }}
source activate $CONDA_ENV_NAME
pip install junitparser
- name: Prepare Stock XPU Pytorch
run: |
source activate $CONDA_ENV_NAME
if [ -z "${{ inputs.nightly_whl }}" ]; then
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
else
pip install torch==$(echo ${{ inputs.nightly_whl }}) torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
fi
- name: Prepare Accelerate
run: |
source activate $CONDA_ENV_NAME
cd $WORK_DIR
pip install -e .
pip install -e ".[testing]"
rm -rf tests_log && mkdir -p tests_log
rm -rf reports
cp ${{ github.workspace }}/torch-xpu-ops/.github/scripts/spec.py ./
- name: Report installed versions
run: |
source activate $CONDA_ENV_NAME
echo "pip installed packages:"
pip list | tee ${{ github.workspace }}/$WORK_DIR/tests_log/pip_list.txt
echo "lspci gpu devices:"
lspci -d ::0380 | tee ${{ github.workspace }}/$WORK_DIR/tests_log/lspci_0380.txt
echo "GPU render nodes:"
cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/$WORK_DIR/tests_log/device_IDs.txt
echo "xpu-smi output:"
xpu-smi discovery -y --json --dump -1
- name: Sanity check installed packages
run: |
source activate $CONDA_ENV_NAME
# These checks are to exit earlier if for any reason torch
# packages were reinstalled back to CUDA versions (not expected).
pip show torch | grep Version | grep xpu
pip show torchaudio | grep Version | grep xpu
pip show torchvision | grep Version | grep xpu
python -c 'import torch; exit(not torch.xpu.is_available())'
- name: Run -k backbone tests
env:
TEST_CASE: 'tests_backbone'
run: |
source activate $CONDA_ENV_NAME
cd $WORK_DIR && rm -rf reports && mkdir -p reports
# Excluding tests due to:
# * tests/test_examples.py::FeatureExamplesTests::test_profiler fails on
# Kineto profiler initialization for XPU device: PTI_ERROR_INTERNAL
pattern="not test_profiler"
cmd="python3 -m pytest -rsf --junitxml=reports/accelerate.xml -k \"$pattern\" tests/"
{
echo "### Running"
echo "```"
echo "$cmd"
echo "```"
} >> $GITHUB_STEP_SUMMARY
$cmd
- name: Print result tables
if: ${{ ! cancelled() }}
run: |
source activate $CONDA_ENV_NAME
cd $WORK_DIR
{
echo "### Results"
python3 $PARSE_JUNIT reports/accelerate.xml --stats
echo "### Failed"
python3 $PARSE_JUNIT reports/accelerate.xml --errors --failed
echo "### Skipped"
python3 $PARSE_JUNIT reports/accelerate.xml --skipped
} >> $GITHUB_STEP_SUMMARY
- name: Print annotations
if: ${{ ! cancelled() }}
run: |
source activate $CONDA_ENV_NAME
{
echo "### Annotations"
echo "| | |"
echo "| --- | --- |"
echo "| jobs.$GITHUB_JOB.versions.os | $(source /etc/os-release && echo $VERSION_ID) |"
echo "| jobs.$GITHUB_JOB.versions.linux-kernel | $(uname -r) |"
echo "| jobs.$GITHUB_JOB.versions.python | $(python --version | cut -f2 -d' ') |"
packages=" \
level-zero \
libigc1 \
libigc2 \
libze1 \
libze-intel-gpu1 \
intel-i915-dkms \
intel-level-zero-gpu \
intel-opencl-icd"
for package in $packages; do
package_version=$(dpkg -l | grep $package | grep ii | head -1 | sed "s/ */ /g" | cut -f3 -d" ")
echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |"
done
packages="accelerate \
numpy \
torch \
torchaudio \
torchvision \
accelerate"
for package in $packages; do
package_version=$(python -c "import $package; print($package.__version__)" || true)
echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |"
done
# printing annotations for GPU cards
var="[$(cat /sys/class/drm/render*/device/vendor || true)]"
echo "| jobs.$GITHUB_JOB.drm.render_nodes_vendor_ids | $(echo $var | sed 's/ /,/g') |"
var="[$(cat /sys/class/drm/render*/device/device || true)]"
echo "| jobs.$GITHUB_JOB.drm.render_nodes_device_ids | $(echo $var | sed 's/ /,/g') |"
var=$(python -c "import torch; print(torch.version.xpu)" || true)
echo "| jobs.$GITHUB_JOB.torch.version.xpu | $var |"
var=$(python -c "import torch; print(torch.xpu.device_count())" || true)
echo "| jobs.$GITHUB_JOB.torch.xpu.device_count | $var |"
# printing annotations with key environment variables
echo "| jobs.$GITHUB_JOB.env.ZE_AFFINITY_MASK | $ZE_AFFINITY_MASK |"
echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |"
echo "| jobs.$GITHUB_JOB.env.PYTORCH_ENABLE_XPU_FALLBACK | $PYTORCH_ENABLE_XPU_FALLBACK |"
echo "| jobs.$GITHUB_JOB.env.PYTORCH_DEBUG_XPU_FALLBACK | $PYTORCH_DEBUG_XPU_FALLBACK |"
} >> $GITHUB_STEP_SUMMARY
- name: Upload Test log
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
with:
name: Torch-XPU-Accelerate-Log-${{ github.event.pull_request.number || github.sha }}
path: |
${{ github.workspace }}/accelerate/reports
${{ github.workspace }}/accelerate/tests_log