diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index 0c4647edda..9cdf201735 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -79,108 +79,6 @@ jobs: cd benchmarks python setup.py install - - name: Run Triton Softmax kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} - run: | - cd benchmarks/triton_kernels_benchmark - python fused_softmax.py --reports $REPORTS - source ../../scripts/capture-hw-details.sh - TAG=${{ inputs.tag || 'ci' }} - python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-triton-report.csv --benchmark softmax --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - - - name: Run Triton GEMM kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} - run: | - cd benchmarks/triton_kernels_benchmark - python gemm_benchmark.py --reports $REPORTS - source ../../scripts/capture-hw-details.sh - TAG=${{ inputs.tag || 'ci' }} - python ../../scripts/build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-triton-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - python ../../scripts/build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - - - name: Run Triton GEMM kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() }} - run: | - cd benchmarks/triton_kernels_benchmark - # Default path: - TRITON_INTEL_ADVANCED_PATH=0 \ - TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \ - IGC_VISAOptions=" -enableBCR -nolocalra" \ - IGC_DisableLoopUnroll=1 \ - python gemm_benchmark.py --reports $REPORTS - - TAG=${{ inputs.tag || 'ci' }}-dflt - source ../../scripts/capture-hw-details.sh - python ../../scripts/build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - - - name: Run Triton GEMM kernel benchmark - advanced path - if: ${{ steps.install.outcome == 'success' && !cancelled() }} - run: | - cd benchmarks/triton_kernels_benchmark - # Advanced path: - TRITON_INTEL_ADVANCED_PATH=1 \ - TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \ - IGC_VISAOptions=" -enableBCR -nolocalra" \ - IGC_DisableLoopUnroll=1 \ - python gemm_benchmark.py --reports $REPORTS - - TAG=${{ inputs.tag || 'ci' }}-adv - source ../../scripts/capture-hw-details.sh - python ../../scripts/build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - - - name: Run Triton GEMM + PreOp (exp) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} - run: | - cd benchmarks/triton_kernels_benchmark - python gemm_preop_exp_benchmark.py --reports $REPORTS - source ../../scripts/capture-hw-details.sh - TAG=${{ inputs.tag || 'ci' }} - python ../../scripts/build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - - - name: Run Triton GEMM + PostOp (Gelu) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} - run: | - cd benchmarks/triton_kernels_benchmark - python gemm_postop_gelu_benchmark.py --reports $REPORTS - source ../../scripts/capture-hw-details.sh - TAG=${{ inputs.tag || 'ci' }} - python ../../scripts/build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - - - name: Run Triton GEMM + PostOp (add matrix) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} - run: | - cd benchmarks/triton_kernels_benchmark - python gemm_postop_addmatrix_benchmark.py --reports $REPORTS - source ../../scripts/capture-hw-details.sh - TAG=${{ inputs.tag || 'ci' }} - python ../../scripts/build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - - - name: Run Triton FA kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} - run: | - cd benchmarks/triton_kernels_benchmark - python flash_attention_fwd_benchmark.py --reports $REPORTS - - TAG=${{ inputs.tag || 'ci' }} - source ../../scripts/capture-hw-details.sh - python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - - - name: Run Triton FA kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() }} - run: | - cd benchmarks/triton_kernels_benchmark - TRITON_INTEL_ADVANCED_PATH=0 \ - TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \ - IGC_VISAOptions=" -enableBCR -nolocalra -printregusage -DPASTokenReduction -enableHalfLSC -abiver 2" \ - IGC_DisableLoopUnroll=1 \ - python flash_attention_fwd_benchmark.py --reports $REPORTS - - TAG=${{ inputs.tag || 'ci' }}-dflt - source ../../scripts/capture-hw-details.sh - python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - - name: Run Triton FA kernel benchmark - advanced path if: ${{ steps.install.outcome == 'success' && !cancelled() }} run: | @@ -195,21 +93,6 @@ jobs: source ../../scripts/capture-hw-details.sh python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - - name: Run Prefix Sums kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} - run: | - cd benchmarks/triton_kernels_benchmark - python prefix_sums.py --reports $REPORTS - source ../../scripts/capture-hw-details.sh - TAG=${{ inputs.tag || 'ci' }} - python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - - - name: Run micro benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} - run: | - cd benchmarks/micro_benchmarks - python run_benchmarks.py --reports $REPORTS - - name: Save pip cache if: ${{ steps.pip-cache.outputs.status == 'miss' }} uses: ./.github/actions/save