Remove hstu install (#93)

Summary: HSTU bug has been fixed upstream: facebookresearch/generative-recommenders#152, so we can remove the patch now. After upstream pytorch pin update, we now use `triton.__file__` to identify whether Triton is installed from main or torch-triton. Pull Request resolved: #93 Test Plan: Internal test: ``` buck2 test -c fbcode.nvcc_arch=h100a -c fbcode.platform010_cuda_version=12.4 'fbcode//mode/opt' fbcode//pytorch/tritonbench/test/test_gpu:test_gpu -- --exact 'pytorch/tritonbench/test/test_gpu:test_gpu - test_gpu_tritonbench_flash_attention (pytorch.tritonbench.test.test_gpu.main.TestTritonbenchGpu)' ``` Cache hits: 98%. Commands: 462286 (cached: 453091, remote: 7877, local: 1318) Tests finished: Pass 1. Fail 0. Fatal 0. Skip 0. Build failure 0 Reviewed By: plotfi Differential Revision: D66741231 Pulled By: xuzhao9 fbshipit-source-id: dece3777acc996015682e28d2ec31a21b1795260
pytorch-labs · Dec 4, 2024 · 7543890 · 7543890
1 parent 6b0b478
commit 7543890
Show file tree

Hide file tree

Showing 6 changed files with 13 additions and 64 deletions.
diff --git a/.github/workflows/_linux-test-h100.yml b/.github/workflows/_linux-test-h100.yml
@@ -29,12 +29,9 @@ jobs:
           nvidia-smi
       - name: Install Tritonbench
         run: |
-          # todo: remove this when the new docker rolls out
+          # speedup install and skip compile by reusing the docker .so files
           mkdir -p /workspace/tritonbench/.data
-          # speedup install and skip compile
           ln -s /workspace/tritonbench/.data .
-          . "${SETUP_SCRIPT}"
-          python install.py --colfax --tk --hstu
       - name: Test Tritonbench operators on H100 GPU
         run: |
           bash ./.ci/tritonbench/test-gpu.sh
diff --git a/install.py b/install.py
@@ -82,7 +82,6 @@ def setup_hip(args: argparse.Namespace):
     # We have to disable all third-parties that donot support hip/rocm
     args.all = False
     args.liger = True
-    args.hstu = True
 
 
 if __name__ == "__main__":
@@ -102,7 +101,6 @@ def setup_hip(args: argparse.Namespace):
     parser.add_argument(
         "--fa3", action="store_true", help="Install optional flash_attention 3 kernels"
     )
-    parser.add_argument("--hstu", action="store_true", help="Install HSTU.")
     parser.add_argument("--jax", action="store_true", help="Install jax nightly")
     parser.add_argument("--tk", action="store_true", help="Install ThunderKittens")
     parser.add_argument("--liger", action="store_true", help="Install Liger-kernel")
@@ -151,11 +149,6 @@ def setup_hip(args: argparse.Namespace):
         from tools.xformers.install import install_xformers
 
         install_xformers()
-    if args.hstu or args.all:
-        logger.info("[tritonbench] installing hstu...")
-        from tools.hstu.install import install_hstu
-
-        install_hstu()
     logger.info("[tritonbench] installation complete!")
     # run tests to check installation
     if args.test:

diff --git a/test/test_gpu/main.py b/test/test_gpu/main.py
@@ -18,14 +18,12 @@
     fbcode_skip_file_path = "fb/skip_tests_h100_fbcode.yaml"
     SKIP_FILE = importlib.resources.files(__package__).joinpath(fbcode_skip_file_path)
 else:
-    SKIP_FILE_NAME = "skip_tests_h100_pytorch.yaml"
-    try:
-        # test if it is Triton main branch
-        import triton.tools.experimental_descriptor  # @manual  # noqa: F401
+    import triton  # @manual
 
+    if "site-packages" in triton.__file__:
+        SKIP_FILE_NAME = "skip_tests_h100_pytorch.yaml"
+    else:
         SKIP_FILE_NAME = "skip_tests_h100_triton_main.yaml"
-    except ModuleNotFoundError:
-        pass
     import os
 
     SKIP_FILE = os.path.abspath(os.path.join(os.path.dirname(__file__), SKIP_FILE_NAME))

diff --git a/test/test_gpu/skip_tests_h100_pytorch.yaml b/test/test_gpu/skip_tests_h100_pytorch.yaml
@@ -10,6 +10,8 @@ bf16xint16_gemm:
 flash_attention:
   # thunderkittens cannot handle the default input shapes
   - tk
+  # triton_op_flash_v2 will segfault on triton-pytorch
+  - triton_op_flash_v2
   # triton_tutorial_* kernels require triton-main
   - triton_tutorial_flash_v2
   - triton_tutorial_flash_v2_opt
@@ -42,5 +44,11 @@ jagged_mean:
 jagged_softmax:
 jagged_sum:
 ragged_attention:
+  # ../../../lib/Tools/LinearLayout.cpp:565: LinearLayout
+  # mlir::triton::LinearLayout::reshapeOuts(ArrayRef<std::pair<StringAttr, int32_t>>) const:
+  # Assertion `getTotalOutDimSize() == std::accumulate( newOutDims.begin(), newOutDims.end(),
+  # 1, [&](int32_t acc, auto &outDim) { return acc * outDim.second; })' failed.
+  - hstu_triton_ragged_attention
+  # presistent kernel is not ready for OSS
   - hstu_triton_ragged_attention_persistent
 test_op:
diff --git a/tools/hstu/hstu.patch b/tools/hstu/hstu.patch
diff --git a/tools/hstu/install.py b/tools/hstu/install.py