From f2e9171b63d28be977ccbf56c1d4ea550330345c Mon Sep 17 00:00:00 2001 From: "Liu, Yucheng" Date: Wed, 13 Mar 2024 18:32:56 +0800 Subject: [PATCH] disable MHA --- neural_speed/core/layers/mha_dense.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/neural_speed/core/layers/mha_dense.cpp b/neural_speed/core/layers/mha_dense.cpp index af2953514..b0ab5118b 100644 --- a/neural_speed/core/layers/mha_dense.cpp +++ b/neural_speed/core/layers/mha_dense.cpp @@ -72,7 +72,9 @@ bool bestla_reordered_attn_fp32_support(const attn_shape_t* params) { // TODO(Yi): check K V's layout if (_cd->AMX_BF16()) return true; #endif - return !_cd->AVX512F() || _cd->AVX2(); // use avx2 and f16c on avx2 platforms + // use avx2 and f16c on avx2 platforms + // todo: check avx2 mha on sever + return !_cd->AVX512F() && _cd->AVX2(); } // kv cache sizes in bytes per layer per batch per beam for; void bestla_reordered_attn_fp32_batch_kv_info(const kv_shape_t* params, kv_cache_info_t* out) {