diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake index 776a0bb11ae64..5912c5c02ede7 100644 --- a/cmake/cpu_extension.cmake +++ b/cmake/cpu_extension.cmake @@ -93,7 +93,7 @@ if (AVX512_FOUND AND NOT AVX512_DISABLED) FetchContent_Declare( oneDNN GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git - GIT_TAG v3.5.3 + GIT_TAG v3.6 GIT_PROGRESS TRUE GIT_SHALLOW TRUE ) diff --git a/csrc/cpu/cpu_types_x86.hpp b/csrc/cpu/cpu_types_x86.hpp index 12d5757b495be..4bb4eb0f491ac 100644 --- a/csrc/cpu/cpu_types_x86.hpp +++ b/csrc/cpu/cpu_types_x86.hpp @@ -432,6 +432,16 @@ struct FP32Vec16 : public Vec { explicit FP32Vec16(const FP32Vec8 &data) : reg_low(data.reg), reg_high(data.reg) {} + explicit FP32Vec16(const FP16Vec16 &v) { + __m128i low = _mm256_extractf128_si256(v.reg, 0); + __m128i high = _mm256_extractf128_si256(v.reg, 1); + + reg_low = _mm256_cvtph_ps(low); + reg_high = _mm256_cvtph_ps(high); + } + + explicit FP32Vec16(const FP16Vec8 &v) : FP32Vec16(FP32Vec8(v)) {} + explicit FP32Vec16(const BF16Vec16 &v) { __m128i low = _mm256_extractf128_si256(v.reg, 0); __m128i high = _mm256_extractf128_si256(v.reg, 1);