From e6ed9d3fc2821a72cc8ded4a02cac72b9f306f88 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Fri, 8 Nov 2024 16:32:01 -0800 Subject: [PATCH] Fix sse tests with `-mnontrapping-fptoint` (#22893) Turns out we were relying on the explicit llvm bounds check here to give use the defined SSE behavior, but normal C semantics don't give us that so we need explicit bounds checks here. Fixes #22892 --- system/include/compat/emmintrin.h | 37 ++++++++++++++++++++----------- system/include/compat/xmmintrin.h | 18 +++++++++------ test/test_core.py | 28 ++++++++++++++++------- 3 files changed, 55 insertions(+), 28 deletions(-) diff --git a/system/include/compat/emmintrin.h b/system/include/compat/emmintrin.h index 61b69b5bcabe8..7fe6527356720 100644 --- a/system/include/compat/emmintrin.h +++ b/system/include/compat/emmintrin.h @@ -431,9 +431,11 @@ _mm_cvttpd_epi32(__m128d __a) int m[2]; for(int i = 0; i < 2; ++i) { - int x = lrint(__a[i]); - if (x != 0 || fabs(__a[i]) < 2.0) - m[i] = (int)__a[i]; + float elem = __a[i]; + if ((lrint(elem) != 0 || fabs(elem) < 2.0) && !isnanf(elem) && elem <= INT_MAX && elem >= INT_MIN) + // Use the trapping instruction here since we have explicit bounds checks + // above. + m[i] = __builtin_wasm_trunc_s_i32_f32(elem); else m[i] = (int)0x80000000; } @@ -444,9 +446,12 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__)) _mm_cvttsd_si32(__m128d __a) { // TODO: OPTIMIZE! - int x = lrint(__a[0]); - if (x != 0 || fabs(__a[0]) < 2.0) - return (int)__a[0]; + float elem = __a[0]; + if (isnan(elem) || elem > INT_MAX || elem < INT_MIN) return (int)0x80000000; + if (lrint(elem) != 0 || fabs(elem) < 2.0) + // Use the trapping instruction here since we have explicit bounds checks + // above. + return __builtin_wasm_trunc_s_i32_f32(elem); else return (int)0x80000000; } @@ -1013,10 +1018,13 @@ static __inline__ long long __attribute__((__always_inline__, __nodebug__)) _mm_cvttsd_si64(__m128d __a) { // TODO: optimize - if (isnan(__a[0]) || isinf(__a[0])) return 0x8000000000000000LL; - long long x = llrint(__a[0]); - if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(__a[0]) < 2.f)) - return (long long)__a[0]; + float e = __a[0]; + if (isnan(e) || isinf(e) || e > LLONG_MAX || e < LLONG_MIN) return 0x8000000000000000LL; + long long x = llrint(e); + if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f)) + // Use the trapping instruction here since we have explicit bounds checks + // above + return __builtin_wasm_trunc_s_i64_f32(e); else return 0x8000000000000000LL; } @@ -1056,9 +1064,12 @@ _mm_cvttps_epi32(__m128 __a) } u; for(int i = 0; i < 4; ++i) { - int x = lrint(__a[i]); - if (x != 0 || fabs(__a[i]) < 2.0) - u.x[i] = (int)__a[i]; + float e = __a[i]; + int x = lrint(e); + if ((x != 0 || fabs(e) < 2.0) && !isnanf(e) && e <= INT_MAX && e >= INT_MIN) + // Use the trapping instruction here since we have explicit bounds checks + // above. + u.x[i] = __builtin_wasm_trunc_s_i32_f32(e); else u.x[i] = (int)0x80000000; } diff --git a/system/include/compat/xmmintrin.h b/system/include/compat/xmmintrin.h index e6276e84323fa..1363dc36a68cc 100644 --- a/system/include/compat/xmmintrin.h +++ b/system/include/compat/xmmintrin.h @@ -9,6 +9,7 @@ #include +#include #include #include @@ -605,9 +606,11 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SL static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvttss_si32(__m128 __a) { - int x = lrint(((__f32x4)__a)[0]); - if (x != 0 || fabsf(((__f32x4)__a)[0]) < 2.f) - return (int)((__f32x4)__a)[0]; + float e = ((__f32x4)__a)[0]; + if (isnanf(e) || e > INT_MAX || e < INT_MIN) return (int)0x80000000; + int x = lrint(e); + if ((x != 0 || fabsf(e) < 2.f)) + return (int)e; else return (int)0x80000000; } @@ -635,10 +638,11 @@ _mm_cvtss_si64(__m128 __a) static __inline__ long long __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvttss_si64(__m128 __a) { - if (isnan(((__f32x4)__a)[0]) || isinf(((__f32x4)__a)[0])) return 0x8000000000000000LL; - long long x = llrint(((__f32x4)__a)[0]); - if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(((__f32x4)__a)[0]) < 2.f)) - return (long long)((__f32x4)__a)[0]; + float e = ((__f32x4)__a)[0]; + if (isnan(e) || isinf(e) || e > LLONG_MAX || e < LLONG_MIN) return 0x8000000000000000LL; + long long x = llrint(e); + if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f)) + return (long long)e; else return 0x8000000000000000LL; } diff --git a/test/test_core.py b/test/test_core.py index ba8c3d64e2e1e..aa5de85c36a63 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -6511,12 +6511,16 @@ def test_neon_wasm_simd(self): @requires_native_clang @no_safe_heap('has unaligned 64-bit operations in wasm') @no_ubsan('test contains UB') - def test_sse1(self): + @parameterized({ + '': ([],), + 'nontrapping': (['-mnontrapping-fptoint'],) + }) + def test_sse1(self, args): src = test_file('sse/test_sse1.cpp') self.run_process([shared.CLANG_CXX, src, '-msse', '-o', 'test_sse1', '-D_CRT_SECURE_NO_WARNINGS=1'] + clang_native.get_clang_native_args(), stdout=PIPE) native_result = self.run_process('./test_sse1', stdout=PIPE).stdout - self.emcc_args += ['-I' + test_file('sse'), '-msse'] + self.emcc_args += ['-I' + test_file('sse'), '-msse'] + args self.maybe_closure() self.do_runf(src, native_result) @@ -6528,14 +6532,18 @@ def test_sse1(self): @is_slow_test @no_ubsan('https://github.com/emscripten-core/emscripten/issues/19688') @no_asan('local count too large') - def test_sse2(self): + @parameterized({ + '': ([],), + 'nontrapping': (['-mnontrapping-fptoint'],) + }) + def test_sse2(self, args): if self.is_wasm64(): self.require_node_canary() src = test_file('sse/test_sse2.cpp') self.run_process([shared.CLANG_CXX, src, '-msse2', '-Wno-argument-outside-range', '-o', 'test_sse2', '-D_CRT_SECURE_NO_WARNINGS=1'] + clang_native.get_clang_native_args(), stdout=PIPE) native_result = self.run_process('./test_sse2', stdout=PIPE).stdout - self.emcc_args += ['-I' + test_file('sse'), '-msse2', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB'] + self.emcc_args += ['-I' + test_file('sse'), '-msse2', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB'] + args self.maybe_closure() self.do_runf(src, native_result) @@ -6587,8 +6595,8 @@ def test_sse4_1(self): @wasm_simd @requires_native_clang @parameterized({ - '': (False,), - '2': (True,) + '': (False,), + '2': (True,) }) def test_sse4(self, use_4_2): msse4 = '-msse4.2' if use_4_2 else '-msse4' @@ -6606,12 +6614,16 @@ def test_sse4(self, use_4_2): @is_slow_test @no_asan('local count too large') @no_ubsan('local count too large') - def test_avx(self): + @parameterized({ + '': ([],), + 'nontrapping': (['-mnontrapping-fptoint'],) + }) + def test_avx(self, args): src = test_file('sse/test_avx.cpp') self.run_process([shared.CLANG_CXX, src, '-mavx', '-Wno-argument-outside-range', '-Wpedantic', '-o', 'test_avx', '-D_CRT_SECURE_NO_WARNINGS=1'] + clang_native.get_clang_native_args(), stdout=PIPE) native_result = self.run_process('./test_avx', stdout=PIPE).stdout - self.emcc_args += ['-I' + test_file('sse'), '-mavx', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB'] + self.emcc_args += ['-I' + test_file('sse'), '-mavx', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB'] + args self.maybe_closure() self.do_runf(src, native_result)