Test flash_attention and fp8_attention kernels

pytorch-labs · Nov 22, 2024 · 5f2a2d1 · 5f2a2d1
1 parent eed7f5b
commit 5f2a2d1
Showing 1 changed file with 3 additions and 8 deletions.
diff --git a/test/test_gpu/skip_tests_h100_triton_main.yaml b/test/test_gpu/skip_tests_h100_triton_main.yaml
@@ -7,27 +7,22 @@
 flash_attention:
   # thunderkittens cannot handle the default input shapes
   - tk
-  # FIXME: triton_tutorial_* kernels are broken
-  - triton_tutorial_flash_v2
-  - triton_tutorial_flash_v2_opt
-  - triton_tutorial_flash_v2_tma
+  # _ws kernels require Triton with warp specialization
   - triton_tutorial_flash_v2_ws
   - triton_tutorial_flash_v2_tma_ws
 fp8_attention:
   # fb-only kernel
   - colfax_fmha
-  # FIXME: triton_flash_v2 kernel is broken
-  - triton_flash_v2
 # fb-only kernels
 fp8_fused_quant_gemm_rowwise:
 fp8_gemm:
   # FIXME: triton_*_persistent kernels are broken
   - triton_persistent_fp8_gemm
   - triton_tma_persistent_fp8_gemm
 gemm:
-  # out of shared memory
+  # FIXME: out of shared memory
   - triton_tma_persistent_matmul
-  # out of shared memory
+  # FIXME: out of shared memory
   - triton_tma_persistent_cached_matmul
   # internal only kernels
   - hstu_triton_matmul