Triton benchmarks #183

Workflow file for this run

.github/workflows/triton-benchmarks.yml at 75b32f1

	name: Triton benchmarks

	on:
	workflow_dispatch:
	inputs:
	runner_label:
	description: Runner label, keep empty for default
	type: string
	default: ""
	schedule:
	- cron: "5 23 * * *"

	permissions: read-all

	env:
	PYTHON_VERSION: "3.10"

	jobs:
	build:
	name: Triton benchmarks
	runs-on:
	- ${{ inputs.runner_label \|\| 'max1550' }}
	timeout-minutes: 720
	defaults:
	run:
	shell: bash -noprofile --norc -eo pipefail -c "source /home/runner/intel/oneapi/setvars.sh > /dev/null; source {0}"
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Load pip cache
	id: pip-cache
	uses: ./.github/actions/load
	with:
	path: $HOME/.cache/pip
	# pip cache per commit id just to minimize network traffic
	key: pip-$PYTHON_VERSION-$GITHUB_SHA

	- name: Install Python
	uses: actions/setup-python@v5
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: Install Python build dependencies
	run: \|
	pip install wheel

	- name: Setup PyTorch
	uses: ./.github/actions/setup-pytorch

	- name: Setup IPEX
	uses: ./.github/actions/setup-ipex

	- name: Generate Triton cache key
	id: triton-key
	run: \|
	COMPOSITE_KEY=$(echo $PYTHON_VERSION $GITHUB_SHA \| sha256sum - \| cut -d\ -f1)
	echo "key=triton-$COMPOSITE_KEY" >> $GITHUB_OUTPUT

	- name: Load Triton wheels from a cache
	id: triton-cache
	uses: ./.github/actions/load
	with:
	path: python/dist
	key: ${{ steps.triton-key.outputs.key }}

	- name: Build Triton wheels
	if: ${{ steps.triton-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/setup-triton
	with:
	command: DEBUG=1 python setup.py bdist_wheel

	- name: Install Triton
	run: \|
	pip install python/dist/*.whl

	- name: Save Triton wheels to a cache
	if: ${{ steps.triton-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.triton-cache.outputs.path }}
	dest: ${{ steps.triton-cache.outputs.dest }}

	- name: Install benchmark dependencies
	run: \|
	pip install matplotlib pandas tabulate

	- name: Create reports dir
	run: \|
	mkdir reports
	echo "REPORTS=$PWD/reports" >> $GITHUB_ENV

	- name: Install benchmarks
	run: \|
	cd benchmarks
	python setup.py install

	- name: Run triton softmax kernel benchmark
	run: \|
	cd benchmarks/triton_kernels_benchmark
	python fused_softmax.py --reports $REPORTS
	source ../../scripts/capture-hw-details.sh
	python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-triton-report.csv --benchmark softmax --compiler triton --param_cols "N" --tflops_col Triton-TFlops-max --hbm_col "Triton-GB/s-max"
	python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops-max --hbm_col "XeTLA-GB/s-max"


	- name: Run micro benchmark
	run: \|
	cd benchmarks/micro_benchmarks
	python run_benchmarks.py --reports $REPORTS

	- name: Save pip cache
	if: ${{ steps.pip-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.pip-cache.outputs.path }}
	dest: ${{ steps.pip-cache.outputs.dest }}

	- name: Upload benchmark reports
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-reports
	path: reports

	benchmark-attention:
	name: Benchmark flash attention
	runs-on:
	- ${{ inputs.runner_label \|\| 'max1550' }}
	timeout-minutes: 720
	defaults:
	run:
	shell: bash -noprofile --norc -eo pipefail -c "source /home/runner/intel/oneapi/setvars.sh > /dev/null; source {0}"
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4
	with:
	ref: 'perf_attn'

	- name: Checkout repository
	uses: actions/checkout@v4
	with:
	path: llvm-target

	- name: Load pip cache
	id: pip-cache
	uses: ./.github/actions/load
	with:
	path: $HOME/.cache/pip
	# pip cache per commit id just to minimize network traffic
	key: pip-$PYTHON_VERSION-$GITHUB_SHA

	- name: Load artifacts cache
	id: artifacts-cache
	uses: ./.github/actions/load
	with:
	path: artifacts
	key: artifacts

	- name: Install Python
	uses: actions/setup-python@v5
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: Install Python build dependencies
	run: \|
	pip install wheel

	- name: Setup PyTorch
	uses: ./.github/actions/setup-pytorch

	- name: Setup IPEX
	uses: ./.github/actions/setup-ipex

	- name: Generate Triton cache key
	id: triton-key
	run: \|
	COMPOSITE_KEY=$(echo $PYTHON_VERSION $GITHUB_SHA \| sha256sum - \| cut -d\ -f1)
	echo "key=triton-$COMPOSITE_KEY" >> $GITHUB_OUTPUT

	- name: Load Triton wheels from a cache
	id: triton-cache
	uses: ./.github/actions/load
	with:
	path: python/dist
	key: ${{ steps.triton-key.outputs.key }}

	- name: Build Triton wheels
	if: ${{ steps.triton-cache.outputs.status == 'miss' }}
	uses: ./llvm-target/.github/actions/setup-triton
	with:
	command: DEBUG=1 python setup.py bdist_wheel

	- name: Install Triton
	run: \|
	pip install python/dist/*.whl

	- name: Save Triton wheels to a cache
	if: ${{ steps.triton-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.triton-cache.outputs.path }}
	dest: ${{ steps.triton-cache.outputs.dest }}

	- name: Install benchmark dependencies
	run: \|
	pip install matplotlib pandas tabulate

	- name: Install a custom libigc from artifacts
	run: \|
	sudo dpkg -i artifacts/libigc1_1.0.24994.16243-igc+releaseinternal1_amd64.deb

	- name: Create reports dir
	run: \|
	mkdir reports
	echo "REPORTS=$PWD/reports" >> $GITHUB_ENV

	- name: Run flash attention benchmarks
	run: \|
	cd python/tutorials
	bash run_all.sh
	# This will fix csv file issues, fixing ", "->"," and " " -> "," that exist in raw output
	cp summary.csv $REPORTS/attention-summary.csv
	sed -E 's/, /,/g;s/ /,/g' summary.csv > attention-results.csv
	source ../../scripts/capture-hw-details.sh
	python ../../scripts/build_report.py attention-results.csv $REPORTS/attention-triton-report.csv --benchmark flash_attention --compiler triton --tflops_col max_tflops --param_cols "Z,H,N_CTX,D_HEAD"

	- name: Save pip cache
	if: ${{ steps.pip-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.pip-cache.outputs.path }}
	dest: ${{ steps.pip-cache.outputs.dest }}

	- name: Upload benchmark reports
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-attention-reports
	path: reports

	benchmark-gemm:
	name: GEMM benchmarks
	runs-on:
	- ${{ inputs.runner_label \|\| 'max1550' }}
	timeout-minutes: 720
	defaults:
	run:
	shell: bash -noprofile --norc -eo pipefail -c "source /home/runner/intel/oneapi/setvars.sh > /dev/null; source {0}"
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Checkout repository
	uses: actions/checkout@v4
	with:
	path: llvm-target

	- name: Load pip cache
	id: pip-cache
	uses: ./.github/actions/load
	with:
	path: $HOME/.cache/pip
	# pip cache per commit id just to minimize network traffic
	key: pip-$PYTHON_VERSION-$GITHUB_SHA

	- name: Load artifacts cache
	id: artifacts-cache
	uses: ./.github/actions/load
	with:
	path: artifacts
	key: artifacts

	- name: Install Python
	uses: actions/setup-python@v5
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: Install Python build dependencies
	run: \|
	pip install wheel

	- name: Setup PyTorch
	uses: ./.github/actions/setup-pytorch

	- name: Setup IPEX
	uses: ./.github/actions/setup-ipex

	- name: Generate Triton cache key
	id: triton-key
	run: \|
	COMPOSITE_KEY=$(echo $PYTHON_VERSION $GITHUB_SHA \| sha256sum - \| cut -d\ -f1)
	echo "key=triton-$COMPOSITE_KEY" >> $GITHUB_OUTPUT

	- name: Load Triton wheels from a cache
	id: triton-cache
	uses: ./.github/actions/load
	with:
	path: python/dist
	key: ${{ steps.triton-key.outputs.key }}

	- name: Build Triton wheels
	if: ${{ steps.triton-cache.outputs.status == 'miss' }}
	uses: ./llvm-target/.github/actions/setup-triton
	with:
	command: DEBUG=1 python setup.py bdist_wheel

	- name: Install Triton
	run: \|
	pip install python/dist/*.whl

	- name: Save Triton wheels to a cache
	if: ${{ steps.triton-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.triton-cache.outputs.path }}
	dest: ${{ steps.triton-cache.outputs.dest }}

	- name: Install benchmark dependencies
	run: \|
	pip install matplotlib pandas tabulate

	- name: Install a custom libigc from artifacts
	run: \|
	mkdir libigc1_1.0.24994.16243
	sudo dpkg -X artifacts/libigc1_1.0.24994.16243-igc+releaseinternal1_amd64.deb libigc1_1.0.24994.16243

	- name: Create reports dir
	run: \|
	mkdir reports
	echo "REPORTS=$PWD/reports" >> $GITHUB_ENV

	- name: Install benchmarks
	run: \|
	cd benchmarks
	python setup.py install

	- name: Run triton gemm kernel benchmark
	run: \|
	export LD_LIBRARY_PATH=$PWD/libigc1_1.0.24994.16243:$LD_LIBRARY_PATH
	cd benchmarks/triton_kernels_benchmark
	rm -rf ./tt_cache
	TRITON_CACHE_DIR=./tt_cache \
	TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
	TRITON_INTEL_ADVANCED_PATH=1 \
	IGC_VISAOptions=" -TotalGRFNum 256 -enableBCR -nolocalra -printregusage -DPASTokenReduction -enableHalfLSC -abiver 2" \
	IGC_ForcePrefetchToL1Cache=1 \
	IGC_VATemp=1 \
	UR_L0_IN_ORDER_BARRIER_BY_SIGNAL=0 \
	IGC_DisableLoopUnroll=1 \
	NEO_CACHE_PERSISTENT=0 \
	python gemm_benchmark.py --reports $REPORTS
	source ../../scripts/capture-hw-details.sh
	python ../../scripts/build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-triton-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops-max --hbm_col "Triton-GB/s-max"
	python ../../scripts/build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops-max --hbm_col "XeTLA-GB/s-max"

	- name: Save pip cache
	if: ${{ steps.pip-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.pip-cache.outputs.path }}
	dest: ${{ steps.pip-cache.outputs.dest }}

	- name: Upload benchmark reports
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-gemm-reports
	path: reports

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Triton benchmarks #183

Workflow file

Triton benchmarks #183

Jobs

Run details

Workflow file for this run