Skip to content

Triton benchmarks

Triton benchmarks #183

name: Triton benchmarks
on:
workflow_dispatch:
inputs:
runner_label:
description: Runner label, keep empty for default
type: string
default: ""
schedule:
- cron: "5 23 * * *"
permissions: read-all
env:
PYTHON_VERSION: "3.10"
jobs:
build:
name: Triton benchmarks
runs-on:
- ${{ inputs.runner_label || 'max1550' }}
timeout-minutes: 720
defaults:
run:
shell: bash -noprofile --norc -eo pipefail -c "source /home/runner/intel/oneapi/setvars.sh > /dev/null; source {0}"
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Load pip cache
id: pip-cache
uses: ./.github/actions/load
with:
path: $HOME/.cache/pip
# pip cache per commit id just to minimize network traffic
key: pip-$PYTHON_VERSION-$GITHUB_SHA
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Python build dependencies
run: |
pip install wheel
- name: Setup PyTorch
uses: ./.github/actions/setup-pytorch
- name: Setup IPEX
uses: ./.github/actions/setup-ipex
- name: Generate Triton cache key
id: triton-key
run: |
COMPOSITE_KEY=$(echo $PYTHON_VERSION $GITHUB_SHA | sha256sum - | cut -d\ -f1)
echo "key=triton-$COMPOSITE_KEY" >> $GITHUB_OUTPUT
- name: Load Triton wheels from a cache
id: triton-cache
uses: ./.github/actions/load
with:
path: python/dist
key: ${{ steps.triton-key.outputs.key }}
- name: Build Triton wheels
if: ${{ steps.triton-cache.outputs.status == 'miss' }}
uses: ./.github/actions/setup-triton
with:
command: DEBUG=1 python setup.py bdist_wheel
- name: Install Triton
run: |
pip install python/dist/*.whl
- name: Save Triton wheels to a cache
if: ${{ steps.triton-cache.outputs.status == 'miss' }}
uses: ./.github/actions/save
with:
path: ${{ steps.triton-cache.outputs.path }}
dest: ${{ steps.triton-cache.outputs.dest }}
- name: Install benchmark dependencies
run: |
pip install matplotlib pandas tabulate
- name: Create reports dir
run: |
mkdir reports
echo "REPORTS=$PWD/reports" >> $GITHUB_ENV
- name: Install benchmarks
run: |
cd benchmarks
python setup.py install
- name: Run triton softmax kernel benchmark
run: |
cd benchmarks/triton_kernels_benchmark
python fused_softmax.py --reports $REPORTS
source ../../scripts/capture-hw-details.sh
python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-triton-report.csv --benchmark softmax --compiler triton --param_cols "N" --tflops_col Triton-TFlops-max --hbm_col "Triton-GB/s-max"
python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops-max --hbm_col "XeTLA-GB/s-max"
- name: Run micro benchmark
run: |
cd benchmarks/micro_benchmarks
python run_benchmarks.py --reports $REPORTS
- name: Save pip cache
if: ${{ steps.pip-cache.outputs.status == 'miss' }}
uses: ./.github/actions/save
with:
path: ${{ steps.pip-cache.outputs.path }}
dest: ${{ steps.pip-cache.outputs.dest }}
- name: Upload benchmark reports
uses: actions/upload-artifact@v4
with:
name: benchmark-reports
path: reports
benchmark-attention:
name: Benchmark flash attention
runs-on:
- ${{ inputs.runner_label || 'max1550' }}
timeout-minutes: 720
defaults:
run:
shell: bash -noprofile --norc -eo pipefail -c "source /home/runner/intel/oneapi/setvars.sh > /dev/null; source {0}"
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: 'perf_attn'
- name: Checkout repository
uses: actions/checkout@v4
with:
path: llvm-target
- name: Load pip cache
id: pip-cache
uses: ./.github/actions/load
with:
path: $HOME/.cache/pip
# pip cache per commit id just to minimize network traffic
key: pip-$PYTHON_VERSION-$GITHUB_SHA
- name: Load artifacts cache
id: artifacts-cache
uses: ./.github/actions/load
with:
path: artifacts
key: artifacts
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Python build dependencies
run: |
pip install wheel
- name: Setup PyTorch
uses: ./.github/actions/setup-pytorch
- name: Setup IPEX
uses: ./.github/actions/setup-ipex
- name: Generate Triton cache key
id: triton-key
run: |
COMPOSITE_KEY=$(echo $PYTHON_VERSION $GITHUB_SHA | sha256sum - | cut -d\ -f1)
echo "key=triton-$COMPOSITE_KEY" >> $GITHUB_OUTPUT
- name: Load Triton wheels from a cache
id: triton-cache
uses: ./.github/actions/load
with:
path: python/dist
key: ${{ steps.triton-key.outputs.key }}
- name: Build Triton wheels
if: ${{ steps.triton-cache.outputs.status == 'miss' }}
uses: ./llvm-target/.github/actions/setup-triton
with:
command: DEBUG=1 python setup.py bdist_wheel
- name: Install Triton
run: |
pip install python/dist/*.whl
- name: Save Triton wheels to a cache
if: ${{ steps.triton-cache.outputs.status == 'miss' }}
uses: ./.github/actions/save
with:
path: ${{ steps.triton-cache.outputs.path }}
dest: ${{ steps.triton-cache.outputs.dest }}
- name: Install benchmark dependencies
run: |
pip install matplotlib pandas tabulate
- name: Install a custom libigc from artifacts
run: |
sudo dpkg -i artifacts/libigc1_1.0.24994.16243-igc+releaseinternal1_amd64.deb
- name: Create reports dir
run: |
mkdir reports
echo "REPORTS=$PWD/reports" >> $GITHUB_ENV
- name: Run flash attention benchmarks
run: |
cd python/tutorials
bash run_all.sh
# This will fix csv file issues, fixing ", "->"," and " " -> "," that exist in raw output
cp summary.csv $REPORTS/attention-summary.csv
sed -E 's/, /,/g;s/ /,/g' summary.csv > attention-results.csv
source ../../scripts/capture-hw-details.sh
python ../../scripts/build_report.py attention-results.csv $REPORTS/attention-triton-report.csv --benchmark flash_attention --compiler triton --tflops_col max_tflops --param_cols "Z,H,N_CTX,D_HEAD"
- name: Save pip cache
if: ${{ steps.pip-cache.outputs.status == 'miss' }}
uses: ./.github/actions/save
with:
path: ${{ steps.pip-cache.outputs.path }}
dest: ${{ steps.pip-cache.outputs.dest }}
- name: Upload benchmark reports
uses: actions/upload-artifact@v4
with:
name: benchmark-attention-reports
path: reports
benchmark-gemm:
name: GEMM benchmarks
runs-on:
- ${{ inputs.runner_label || 'max1550' }}
timeout-minutes: 720
defaults:
run:
shell: bash -noprofile --norc -eo pipefail -c "source /home/runner/intel/oneapi/setvars.sh > /dev/null; source {0}"
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Checkout repository
uses: actions/checkout@v4
with:
path: llvm-target
- name: Load pip cache
id: pip-cache
uses: ./.github/actions/load
with:
path: $HOME/.cache/pip
# pip cache per commit id just to minimize network traffic
key: pip-$PYTHON_VERSION-$GITHUB_SHA
- name: Load artifacts cache
id: artifacts-cache
uses: ./.github/actions/load
with:
path: artifacts
key: artifacts
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Python build dependencies
run: |
pip install wheel
- name: Setup PyTorch
uses: ./.github/actions/setup-pytorch
- name: Setup IPEX
uses: ./.github/actions/setup-ipex
- name: Generate Triton cache key
id: triton-key
run: |
COMPOSITE_KEY=$(echo $PYTHON_VERSION $GITHUB_SHA | sha256sum - | cut -d\ -f1)
echo "key=triton-$COMPOSITE_KEY" >> $GITHUB_OUTPUT
- name: Load Triton wheels from a cache
id: triton-cache
uses: ./.github/actions/load
with:
path: python/dist
key: ${{ steps.triton-key.outputs.key }}
- name: Build Triton wheels
if: ${{ steps.triton-cache.outputs.status == 'miss' }}
uses: ./llvm-target/.github/actions/setup-triton
with:
command: DEBUG=1 python setup.py bdist_wheel
- name: Install Triton
run: |
pip install python/dist/*.whl
- name: Save Triton wheels to a cache
if: ${{ steps.triton-cache.outputs.status == 'miss' }}
uses: ./.github/actions/save
with:
path: ${{ steps.triton-cache.outputs.path }}
dest: ${{ steps.triton-cache.outputs.dest }}
- name: Install benchmark dependencies
run: |
pip install matplotlib pandas tabulate
- name: Install a custom libigc from artifacts
run: |
mkdir libigc1_1.0.24994.16243
sudo dpkg -X artifacts/libigc1_1.0.24994.16243-igc+releaseinternal1_amd64.deb libigc1_1.0.24994.16243
- name: Create reports dir
run: |
mkdir reports
echo "REPORTS=$PWD/reports" >> $GITHUB_ENV
- name: Install benchmarks
run: |
cd benchmarks
python setup.py install
- name: Run triton gemm kernel benchmark
run: |
export LD_LIBRARY_PATH=$PWD/libigc1_1.0.24994.16243:$LD_LIBRARY_PATH
cd benchmarks/triton_kernels_benchmark
rm -rf ./tt_cache
TRITON_CACHE_DIR=./tt_cache \
TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
TRITON_INTEL_ADVANCED_PATH=1 \
IGC_VISAOptions=" -TotalGRFNum 256 -enableBCR -nolocalra -printregusage -DPASTokenReduction -enableHalfLSC -abiver 2" \
IGC_ForcePrefetchToL1Cache=1 \
IGC_VATemp=1 \
UR_L0_IN_ORDER_BARRIER_BY_SIGNAL=0 \
IGC_DisableLoopUnroll=1 \
NEO_CACHE_PERSISTENT=0 \
python gemm_benchmark.py --reports $REPORTS
source ../../scripts/capture-hw-details.sh
python ../../scripts/build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-triton-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops-max --hbm_col "Triton-GB/s-max"
python ../../scripts/build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops-max --hbm_col "XeTLA-GB/s-max"
- name: Save pip cache
if: ${{ steps.pip-cache.outputs.status == 'miss' }}
uses: ./.github/actions/save
with:
path: ${{ steps.pip-cache.outputs.path }}
dest: ${{ steps.pip-cache.outputs.dest }}
- name: Upload benchmark reports
uses: actions/upload-artifact@v4
with:
name: benchmark-gemm-reports
path: reports