diff --git a/.github/workflows/_linux-test-h100.yml b/.github/workflows/_linux-test-h100.yml
index 88121b10..7840f2af 100644
--- a/.github/workflows/_linux-test-h100.yml
+++ b/.github/workflows/_linux-test-h100.yml
@@ -22,11 +22,23 @@ jobs:
         uses: actions/checkout@v3
         with:
           submodules: recursive
+      - uses: dorny/paths-filter@v3
+        id: flash_attn_updated
+        with:
+          filters: |
+            src:
+              - 'submodules/flash-attention/**'
       - name: Tune Nvidia GPU
         run: |
           sudo nvidia-smi -pm 1
           sudo ldconfig
           nvidia-smi
+      - name: Reinstall flash-attn (optional)
+        if: steps.flash_attn_updated
+        run: |
+          . "${SETUP_SCRIPT}"
+          python install.py --fa2
+          python install.py --fa3
       - name: Install Tritonbench
         run: |
           # speedup install and skip compile by reusing the docker .so files
diff --git a/submodules/flash-attention b/submodules/flash-attention
index bedf8774..cf0f4c38 160000
--- a/submodules/flash-attention
+++ b/submodules/flash-attention
@@ -1 +1 @@
-Subproject commit bedf8774677315c5eb7e640eca6d7aa15e87775a
+Subproject commit cf0f4c38ef4cfd861caa8173a39ef34e299c5048