expand search space for hstu gemm

Summary: before we were using just a single config Reviewed By: xuzhao9 Differential Revision: D66213893 fbshipit-source-id: 6d5a149d496b24cb80e9e56a8cb1d78010da5e0b
pytorch-labs · Nov 20, 2024 · b151b84 · b151b84
1 parent aa67b62
commit b151b84
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/tritonbench/operators/gemm/operator.py b/tritonbench/operators/gemm/operator.py
@@ -40,6 +40,12 @@
 )
 
 if IS_FBCODE:
+    import hammer.oss.generative_recommenders.ops.triton.triton_addmm as hstu_triton_addmm
+
+    # without this set we can only pick a single config for AMD, Nvidia has 8
+    # with this set AMD will pick from 256 different configs (not the actual full
+    # tuning space, so some perf may be left on the table)
+    hstu_triton_addmm.ENABLE_FULL_TURNING_SPACE = True
     from hammer.ops.triton.triton_matmul import (
         triton_matmul as hstu_triton_matmul_kernel,
     )