From 5e601ff5abea0301d553c072828989602754bb4c Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Fri, 6 Dec 2024 13:23:09 -0500 Subject: [PATCH] Reinstall xformers on demand --- .github/workflows/_linux-test-h100.yml | 7 +++++++ submodules/xformers | 2 +- tritonbench/operators/decoding_attention/operator.py | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_linux-test-h100.yml b/.github/workflows/_linux-test-h100.yml index 4b2b0e95..d8dfe2d9 100644 --- a/.github/workflows/_linux-test-h100.yml +++ b/.github/workflows/_linux-test-h100.yml @@ -28,6 +28,8 @@ jobs: filters: | fa: - 'submodules/flash-attention/**' + xformers: + - 'submodules/xformers/**' - name: Tune Nvidia GPU run: | sudo nvidia-smi -pm 1 @@ -39,6 +41,11 @@ jobs: . "${SETUP_SCRIPT}" python install.py --fa2 python install.py --fa3 + - name: Reinstall xformers (optional) + if: steps.submodules_changes.outputs.xformers == 'true' + run: | + . "${SETUP_SCRIPT}" + python install.py --xformers - name: Install Tritonbench run: | # speedup install and skip compile by reusing the docker .so files diff --git a/submodules/xformers b/submodules/xformers index a97a1e0c..0320918a 160000 --- a/submodules/xformers +++ b/submodules/xformers @@ -1 +1 @@ -Subproject commit a97a1e0cb4337b0a8a8ee1670618a1f15fb84f54 +Subproject commit 0320918a54864a86a6fb5f6435564d9d51f7c80b diff --git a/tritonbench/operators/decoding_attention/operator.py b/tritonbench/operators/decoding_attention/operator.py index d8552e97..75632be7 100644 --- a/tritonbench/operators/decoding_attention/operator.py +++ b/tritonbench/operators/decoding_attention/operator.py @@ -27,7 +27,7 @@ try: torch_lib_path = os.path.join(os.path.dirname(__file__), "lib") with add_ld_library_path(torch_lib_path): - from flash_attn_interface import flash_attn_func as flash_attn_v3 + import flash_attn_interface as flash_attn_v3 except (ImportError, IOError, AttributeError): try: from ai_codesign.gen_ai.flash_attention_v2.hopper import (