Skip to content

Commit

Permalink
Test flash_attention and fp8_attention kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
xuzhao9 committed Nov 22, 2024
1 parent eed7f5b commit 5f2a2d1
Showing 1 changed file with 3 additions and 8 deletions.
11 changes: 3 additions & 8 deletions test/test_gpu/skip_tests_h100_triton_main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,22 @@
flash_attention:
# thunderkittens cannot handle the default input shapes
- tk
# FIXME: triton_tutorial_* kernels are broken
- triton_tutorial_flash_v2
- triton_tutorial_flash_v2_opt
- triton_tutorial_flash_v2_tma
# _ws kernels require Triton with warp specialization
- triton_tutorial_flash_v2_ws
- triton_tutorial_flash_v2_tma_ws
fp8_attention:
# fb-only kernel
- colfax_fmha
# FIXME: triton_flash_v2 kernel is broken
- triton_flash_v2
# fb-only kernels
fp8_fused_quant_gemm_rowwise:
fp8_gemm:
# FIXME: triton_*_persistent kernels are broken
- triton_persistent_fp8_gemm
- triton_tma_persistent_fp8_gemm
gemm:
# out of shared memory
# FIXME: out of shared memory
- triton_tma_persistent_matmul
# out of shared memory
# FIXME: out of shared memory
- triton_tma_persistent_cached_matmul
# internal only kernels
- hstu_triton_matmul
Expand Down

0 comments on commit 5f2a2d1

Please sign in to comment.