ci: add test for Huggingface Accelerate #6
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Linux Accelerate Test | |
on: | |
pull_request: | |
branches: | |
- main | |
paths: | |
- '.github/scripts/spec.py' | |
- '.github/workflows/_linux_accelerate.yml' | |
workflow_dispatch: | |
inputs: | |
pytorch: | |
required: false | |
type: string | |
default: 'nightly' | |
description: Pytorch branch/commit | |
python: | |
required: false | |
type: string | |
default: '3.10' | |
description: Python version | |
runner: | |
required: true | |
type: string | |
default: 'linux.idc.xpu' | |
description: Runner label | |
nightly_whl: | |
required: false | |
type: string | |
default: '' | |
description: Pytorch nightly wheel version | |
accelerate: | |
required: false | |
type: string | |
default: 'v1.2.1' | |
description: Accelerate version | |
permissions: read-all | |
jobs: | |
Torch-XPU-Accelerate-Tests: | |
runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }} | |
env: | |
CONDA_ENV_NAME: 'huggingface_accelerate_test' | |
WORK_DIR: 'accelerate' | |
NEOReadDebugKeys: 0 | |
DisableScratchPages: 0 | |
accelerate: ${{ inputs.accelerate != '' && inputs.accelerate || 'v1.2.1' }} | |
python: ${{ inputs.python != '' && inputs.python || '3.10' }} | |
pytorch: ${{ inputs.pytorch != '' && inputs.pytorch || 'nightly' }} | |
PYTORCH_DEBUG_XPU_FALLBACK: 1 | |
ZE_AFFINITY_MASK: 0 | |
PARSE_JUNIT: ${{ github.workspace }}/torch-xpu-ops/.github/scripts/parse-junitxml.py | |
steps: | |
- name: Checkout torch-xpu-ops | |
uses: actions/checkout@v4 | |
with: | |
path: torch-xpu-ops | |
- name: Checkout Accelerate | |
uses: actions/checkout@v4 | |
with: | |
repository: huggingface/accelerate | |
ref: ${{ env.accelerate }} | |
path: accelerate | |
#- name: Prepare OS environment | |
# run: | | |
# sudo apt-get update | |
# sudo apt-get install -y \ | |
# espeak-ng \ | |
# git-lfs \ | |
# pkg-config \ | |
# libavcodec-dev \ | |
# libavdevice-dev \ | |
# libavfilter-dev \ | |
# libavformat-dev \ | |
# libavutil-dev \ | |
# libswresample-dev \ | |
# libswscale-dev | |
# git lfs install | |
- name: Prepare Conda ENV | |
run: | | |
which conda && conda clean -ay | |
conda remove --all -y -n $CONDA_ENV_NAME || rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME | |
conda create -y -n $CONDA_ENV_NAME python=${{ env.python }} | |
source activate $CONDA_ENV_NAME | |
pip install junitparser | |
- name: Prepare Stock XPU Pytorch | |
run: | | |
source activate $CONDA_ENV_NAME | |
if [ -z "${{ inputs.nightly_whl }}" ]; then | |
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu | |
else | |
pip install torch==$(echo ${{ inputs.nightly_whl }}) torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu | |
fi | |
- name: Prepare Accelerate | |
run: | | |
source activate $CONDA_ENV_NAME | |
cd $WORK_DIR | |
pip install -e . | |
pip install -e ".[testing]" | |
rm -rf tests_log && mkdir -p tests_log | |
rm -rf reports | |
cp ${{ github.workspace }}/torch-xpu-ops/.github/scripts/spec.py ./ | |
- name: Report installed versions | |
run: | | |
source activate $CONDA_ENV_NAME | |
echo "pip installed packages:" | |
pip list | tee ${{ github.workspace }}/$WORK_DIR/tests_log/pip_list.txt | |
echo "lspci gpu devices:" | |
lspci -d ::0380 | tee ${{ github.workspace }}/$WORK_DIR/tests_log/lspci_0380.txt | |
echo "GPU render nodes:" | |
cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/$WORK_DIR/tests_log/device_IDs.txt | |
echo "xpu-smi output:" | |
xpu-smi discovery -y --json --dump -1 | |
- name: Sanity check installed packages | |
run: | | |
source activate $CONDA_ENV_NAME | |
# These checks are to exit earlier if for any reason torch | |
# packages were reinstalled back to CUDA versions (not expected). | |
pip show torch | grep Version | grep xpu | |
pip show torchaudio | grep Version | grep xpu | |
pip show torchvision | grep Version | grep xpu | |
python -c 'import torch; exit(not torch.xpu.is_available())' | |
- name: Run -k backbone tests | |
env: | |
TEST_CASE: 'tests_backbone' | |
run: | | |
source activate $CONDA_ENV_NAME | |
cd $WORK_DIR && rm -rf reports && mkdir -p reports | |
# Excluding tests due to: | |
# * tests/test_examples.py::FeatureExamplesTests::test_profiler fails on | |
# Kineto profiler initialization for XPU device: PTI_ERROR_INTERNAL | |
pattern="not test_profiler" | |
cmd=(python3 -m pytest -rsf --junitxml=reports/accelerate.xml -k "$pattern" tests/) | |
{ | |
echo "### Running" | |
echo "\`\`\`" | |
echo "${cmd[@]@Q}" | |
echo "\`\`\`" | |
} >> $GITHUB_STEP_SUMMARY | |
"${cmd[@]}" | |
- name: Print result tables | |
if: ${{ ! cancelled() }} | |
run: | | |
source activate $CONDA_ENV_NAME | |
cd $WORK_DIR | |
{ | |
echo "### Results" | |
python3 $PARSE_JUNIT reports/accelerate.xml --stats | |
echo "### Failed" | |
python3 $PARSE_JUNIT reports/accelerate.xml --errors --failed | |
echo "### Skipped" | |
python3 $PARSE_JUNIT reports/accelerate.xml --skipped | |
} >> $GITHUB_STEP_SUMMARY | |
- name: Print annotations | |
if: ${{ ! cancelled() }} | |
run: | | |
source activate $CONDA_ENV_NAME | |
{ | |
echo "### Annotations" | |
echo "| | |" | |
echo "| --- | --- |" | |
echo "| jobs.$GITHUB_JOB.versions.os | $(source /etc/os-release && echo $VERSION_ID) |" | |
echo "| jobs.$GITHUB_JOB.versions.linux-kernel | $(uname -r) |" | |
echo "| jobs.$GITHUB_JOB.versions.python | $(python --version | cut -f2 -d' ') |" | |
packages=" \ | |
level-zero \ | |
libigc1 \ | |
libigc2 \ | |
libze1 \ | |
libze-intel-gpu1 \ | |
intel-i915-dkms \ | |
intel-level-zero-gpu \ | |
intel-opencl-icd" | |
for package in $packages; do | |
package_version=$(dpkg -l | grep $package | grep ii | head -1 | sed "s/ */ /g" | cut -f3 -d" ") | |
echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" | |
done | |
packages="accelerate \ | |
numpy \ | |
torch \ | |
torchaudio \ | |
torchvision \ | |
accelerate" | |
for package in $packages; do | |
package_version=$(python -c "import $package; print($package.__version__)" || true) | |
echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" | |
done | |
# printing annotations for GPU cards | |
var="[$(cat /sys/class/drm/render*/device/vendor || true)]" | |
echo "| jobs.$GITHUB_JOB.drm.render_nodes_vendor_ids | $(echo $var | sed 's/ /,/g') |" | |
var="[$(cat /sys/class/drm/render*/device/device || true)]" | |
echo "| jobs.$GITHUB_JOB.drm.render_nodes_device_ids | $(echo $var | sed 's/ /,/g') |" | |
var=$(python -c "import torch; print(torch.version.xpu)" || true) | |
echo "| jobs.$GITHUB_JOB.torch.version.xpu | $var |" | |
var=$(python -c "import torch; print(torch.xpu.device_count())" || true) | |
echo "| jobs.$GITHUB_JOB.torch.xpu.device_count | $var |" | |
# printing annotations with key environment variables | |
echo "| jobs.$GITHUB_JOB.env.ZE_AFFINITY_MASK | $ZE_AFFINITY_MASK |" | |
echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |" | |
echo "| jobs.$GITHUB_JOB.env.PYTORCH_ENABLE_XPU_FALLBACK | $PYTORCH_ENABLE_XPU_FALLBACK |" | |
echo "| jobs.$GITHUB_JOB.env.PYTORCH_DEBUG_XPU_FALLBACK | $PYTORCH_DEBUG_XPU_FALLBACK |" | |
} >> $GITHUB_STEP_SUMMARY | |
- name: Upload Test log | |
if: ${{ ! cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: Torch-XPU-Accelerate-Log-${{ github.event.pull_request.number || github.sha }} | |
path: | | |
${{ github.workspace }}/accelerate/reports | |
${{ github.workspace }}/accelerate/tests_log |