Triton benchmarks igc-90431 LargeIO #1182
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Triton benchmarks | |
run-name: ${{ inputs.run_name }} | |
on: | |
workflow_dispatch: | |
inputs: | |
runner_label: | |
description: Runner label, keep empty for default | |
type: string | |
default: "" | |
tag: | |
description: Tag for benchmark results | |
type: string | |
default: "test" | |
benchmarking_method: | |
description: The method used to obtain performance numbers | |
type: choice | |
options: | |
- PYTORCH_LEGACY_PROFILER_USING_IPEX | |
- ELAPSED_TIME | |
- UPSTREAM_PYTORCH_PROFILER | |
default: PYTORCH_LEGACY_PROFILER_USING_IPEX | |
run_name: | |
description: Run name | |
type: string | |
default: "Triton benchmarks" | |
skip_benchmarks: | |
description: JSON list of benchmarks to skip | |
type: string | |
default: "[]" | |
use_pyenv_python: | |
description: Use Python built with pyenv | |
type: boolean | |
default: false | |
schedule: | |
- cron: "5 23 * * *" | |
pull_request: | |
branches: | |
- main | |
paths: | |
- .github/workflows/triton-benchmarks.yml | |
- benchmarks/** | |
permissions: read-all | |
env: | |
PYTHON_VERSION: "3.10" | |
BENCHMARKING_METHOD: ${{ inputs.benchmarking_method || 'PYTORCH_LEGACY_PROFILER_USING_IPEX' }} | |
USE_IPEX: ${{ github.event_name != 'workflow_dispatch' && '1' || inputs.benchmarking_method == 'PYTORCH_LEGACY_PROFILER_USING_IPEX' && '1' || '0' }} | |
TAG: ${{ inputs.tag || (github.event_name == 'pull_request' && format('pr-{0}', github.event.number)) || (github.event_name == 'schedule' && 'ci') || 'test' }} | |
jobs: | |
build: | |
name: Triton benchmarks | |
runs-on: | |
- ${{ inputs.runner_label || 'max1550' }} | |
timeout-minutes: 720 | |
defaults: | |
run: | |
shell: bash -noprofile --norc -eo pipefail -c "source /opt/intel/oneapi/setvars.sh > /dev/null; source {0}" | |
steps: | |
- name: Print inputs | |
run: | | |
cat <<EOF | |
${{ toJSON(inputs) }} | |
EOF | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Load pip cache | |
id: pip-cache | |
uses: ./.github/actions/load | |
with: | |
path: $HOME/.cache/pip | |
# pip cache per commit id just to minimize network traffic | |
key: pip-$PYTHON_VERSION-$GITHUB_SHA | |
- name: Install Python | |
if: ${{ !(inputs.use_pyenv_python || false) }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install Python (from pyenv) ${{ inputs.python_version }} | |
if: ${{ inputs.use_pyenv_python }} | |
uses: ./.github/actions/setup-pyenv-python | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Identify Python version | |
run: | | |
PYTHON_VERSION="$(python -c 'import sys; print(f"{sys.version_info[0]}.{ sys.version_info[1]}")')" | |
echo "PYTHON_VERSION=$PYTHON_VERSION" | tee -a $GITHUB_ENV | |
- name: Install Python build dependencies | |
run: | | |
pip install wheel cmake | |
- name: Setup PyTorch with IPEX | |
if: ${{ env.USE_IPEX == '1' }} | |
uses: ./.github/actions/setup-pytorch | |
with: | |
repository: Stonepia/pytorch | |
- name: Setup PyTorch without IPEX | |
if: ${{ env.USE_IPEX == '0' }} | |
uses: ./.github/actions/setup-pytorch | |
with: | |
repository: pytorch/pytorch | |
- name: Setup IPEX | |
if: ${{ env.USE_IPEX == '1' }} | |
uses: ./.github/actions/setup-ipex | |
- name: Build Triton wheels | |
uses: ./.github/actions/setup-triton | |
with: | |
command: DEBUG=1 python setup.py bdist_wheel | |
- name: Install Triton | |
run: | | |
pip install python/dist/*.whl | |
- name: Install benchmark dependencies | |
run: | | |
pip install matplotlib pandas tabulate | |
- name: Create reports dir | |
run: | | |
mkdir reports | |
echo "REPORTS=$PWD/reports" >> $GITHUB_ENV | |
- name: Install benchmarks | |
id: install | |
run: | | |
cd benchmarks | |
python setup.py install | |
- name: Run Triton FA kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python flash_attention_fwd_benchmark.py --reports $REPORTS | |
source ../../scripts/capture-hw-details.sh | |
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG | |
- name: Run Triton FA kernel benchmark - advanced path | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py_advanced') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
TRITON_INTEL_ADVANCED_PATH=1 \ | |
IGC_VISAOptions=" -enableBCR" \ | |
python flash_attention_fwd_benchmark.py --reports $REPORTS | |
TAG="${TAG}-adv" | |
source ../../scripts/capture-hw-details.sh | |
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
- name: Save pip cache | |
if: ${{ steps.pip-cache.outputs.status == 'miss' }} | |
uses: ./.github/actions/save | |
with: | |
path: ${{ steps.pip-cache.outputs.path }} | |
dest: ${{ steps.pip-cache.outputs.dest }} | |
- name: Upload benchmark reports | |
if: ${{ steps.install.outcome == 'success' && !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: benchmark-reports | |
path: reports |