diff --git a/kernel/x86_64/casum_microk_skylakex-2.c b/kernel/x86_64/casum_microk_skylakex-2.c index 5d37fd5414..10b70ff201 100644 --- a/kernel/x86_64/casum_microk_skylakex-2.c +++ b/kernel/x86_64/casum_microk_skylakex-2.c @@ -2,10 +2,9 @@ #ifdef __NVCOMPILER #define NVCOMPVERS ( __NVCOMPILER_MAJOR__ * 100 + __NVCOMPILER_MINOR__ ) #endif -#if ((( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && (__clang_major__ >= 9 &&__clang_major__ !=17)) || ( defined(__NVCOMPILER) && NVCOMPVERS >= 2309))) +#if ((( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9)) || (defined(__NVCOMPILER) && NVCOMPVERS >= 2203)) -#if (!(defined(__NVCOMPILER) )) -//&& NVCOMPVERS < 2309)) +#if (!(defined(__NVCOMPILER) && NVCOMPVERS < 2203)) #define HAVE_CASUM_KERNEL 1 @@ -21,15 +20,14 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x) if (n2 < 64) { __m128 accum_10, accum_11, accum_12, accum_13; - __m128 abs_mask1 = abs_mask1; + __m128 abs_mask1; accum_10 = _mm_setzero_ps(); accum_11 = _mm_setzero_ps(); accum_12 = _mm_setzero_ps(); accum_13 = _mm_setzero_ps(); - abs_mask1 = (__m128)_mm_cmpeq_epi8((__m128i) abs_mask1, (__m128i) abs_mask1); - abs_mask1 = (__m128)_mm_srli_epi32((__m128i) abs_mask1, 1); + abs_mask1 = (__m128)_mm_set1_epi32(0x7fffffff); _mm_prefetch(&x1[0], _MM_HINT_T0); diff --git a/kernel/x86_64/zasum_microk_skylakex-2.c b/kernel/x86_64/zasum_microk_skylakex-2.c index 7260922e79..f6bc8e37b2 100644 --- a/kernel/x86_64/zasum_microk_skylakex-2.c +++ b/kernel/x86_64/zasum_microk_skylakex-2.c @@ -2,10 +2,9 @@ #ifdef __NVCOMPILER #define NVCOMPVERS ( __NVCOMPILER_MAJOR__ * 100 + __NVCOMPILER_MINOR__ ) #endif -#if ((( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && ( __clang_major__ >= 9 && __clang_major__ != 17)) || (defined(__NVCOMPILER) && NVCOMPVERS >= 2309))) +#if ((( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9)) || (defined(__NVCOMPILER) && NVCOMPVERS >= 2203)) -#if (!(defined(__NVCOMPILER) )) -//&& NVCOMPVERS < 2309)) +#if (!(defined(__NVCOMPILER) && NVCOMPVERS < 2203)) #define HAVE_ZASUM_KERNEL 1 @@ -22,16 +21,14 @@ static FLOAT zasum_kernel(BLASLONG n, FLOAT *x) if (n2 < 32) { __m128d accum_10, accum_11, accum_12, accum_13; - __m128d abs_mask1 = abs_mask1; + __m128d abs_mask1; accum_10 = _mm_setzero_pd(); accum_11 = _mm_setzero_pd(); accum_12 = _mm_setzero_pd(); accum_13 = _mm_setzero_pd(); - // abs_mask1 = (__m128d)_mm_set1_epi64x(0x7fffffffffffffff); - abs_mask1 = (__m128d)_mm_cmpeq_epi8((__m128i) abs_mask1, (__m128i) abs_mask1); - abs_mask1 = (__m128d)_mm_srli_epi64((__m128i) abs_mask1, 1); + abs_mask1 = (__m128d)_mm_set1_epi64x(0x7fffffffffffffff); _mm_prefetch(&x1[0], _MM_HINT_T0); if (n2 >= 16){