From 3f3f64d822f60d6b086bfd94ee0d5085effb83b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Tue, 26 Sep 2023 19:45:06 +0200 Subject: [PATCH] Implement `llvm.x86.sse3.*` intrinsics --- src/shims/x86/mod.rs | 6 ++ src/shims/x86/sse3.rs | 121 ++++++++++++++++++++++++++++++ tests/pass/intrinsics-x86-sse3.rs | 114 ++++++++++++++++++++++++++++ 3 files changed, 241 insertions(+) create mode 100644 src/shims/x86/sse3.rs create mode 100644 tests/pass/intrinsics-x86-sse3.rs diff --git a/src/shims/x86/mod.rs b/src/shims/x86/mod.rs index fbfe00e03d..8b3805ae55 100644 --- a/src/shims/x86/mod.rs +++ b/src/shims/x86/mod.rs @@ -9,6 +9,7 @@ use shims::foreign_items::EmulateByNameResult; mod sse; mod sse2; +mod sse3; impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: @@ -88,6 +89,11 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this, link_name, abi, args, dest, ); } + name if name.starts_with("sse3.") => { + return sse3::EvalContextExt::emulate_x86_sse3_intrinsic( + this, link_name, abi, args, dest, + ); + } _ => return Ok(EmulateByNameResult::NotSupported), } Ok(EmulateByNameResult::NeedsJumping) diff --git a/src/shims/x86/sse3.rs b/src/shims/x86/sse3.rs new file mode 100644 index 0000000000..39842fb2d7 --- /dev/null +++ b/src/shims/x86/sse3.rs @@ -0,0 +1,121 @@ +use rustc_middle::mir; +use rustc_span::Symbol; +use rustc_target::abi::Align; +use rustc_target::spec::abi::Abi; + +use crate::*; +use shims::foreign_items::EmulateByNameResult; + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: + crate::MiriInterpCxExt<'mir, 'tcx> +{ + fn emulate_x86_sse3_intrinsic( + &mut self, + link_name: Symbol, + abi: Abi, + args: &[OpTy<'tcx, Provenance>], + dest: &PlaceTy<'tcx, Provenance>, + ) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> { + let this = self.eval_context_mut(); + // Prefix should have already been checked. + let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse3.").unwrap(); + + match unprefixed_name { + // Used to implement the _mm_addsub_ps and _mm_addsub_pd functions. + // Alternatively add and subtract floating point (f32 or f64) from + // `left` and `right` + "addsub.ps" | "addsub.pd" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (left, left_len) = this.operand_to_simd(left)?; + let (right, right_len) = this.operand_to_simd(right)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, left_len); + assert_eq!(dest_len, right_len); + + for i in 0..dest_len { + let left = this.read_immediate(&this.project_index(&left, i)?)?; + let right = this.read_immediate(&this.project_index(&right, i)?)?; + let dest = this.project_index(&dest, i)?; + + // Even elements are subtracted and odd elements are added. + let op = if i % 2 == 0 { mir::BinOp::Sub } else { mir::BinOp::Add }; + let (res, _overflow) = this.overflowing_binary_op(op, &left, &right)?; + + this.write_immediate(*res, &dest)?; + } + } + // Used to implement the _mm_h{add,sub}_p{s,d} functions. + // Horizontally add/subtract adjacent floating point values + // in `left` and `right`. + "hadd.ps" | "hadd.pd" | "hsub.ps" | "hsub.pd" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (left, left_len) = this.operand_to_simd(left)?; + let (right, right_len) = this.operand_to_simd(right)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, left_len); + assert_eq!(dest_len, right_len); + assert_eq!(dest_len % 2, 0); + + let op = match unprefixed_name { + "hadd.ps" | "hadd.pd" => mir::BinOp::Add, + "hsub.ps" | "hsub.pd" => mir::BinOp::Sub, + _ => unreachable!(), + }; + + let middle = dest_len / 2; + for i in 0..dest_len { + let (lhs, rhs) = if i < middle { + let base_i = i.checked_mul(2).unwrap(); + ( + this.read_immediate(&this.project_index(&left, base_i)?)?, + this.read_immediate( + &this.project_index(&left, base_i.checked_add(1).unwrap())?, + )?, + ) + } else { + let base_i = i.checked_sub(middle).unwrap().checked_mul(2).unwrap(); + ( + this.read_immediate(&this.project_index(&right, base_i)?)?, + this.read_immediate( + &this.project_index(&right, base_i.checked_add(1).unwrap())?, + )?, + ) + }; + let (res, _overflow) = this.overflowing_binary_op(op, &lhs, &rhs)?; + + this.write_immediate(*res, &this.project_index(&dest, i)?)?; + } + } + // Used to implement the _mm_lddqu_si128 function. + // Reads a 128-bit vector from an unaligned pointer. This intrinsic + // is expected to perform better than a regular unaligned read when + // the data crosses a cache line, but for Miri this is just a regular + // unaligned read. + "ldu.dq" => { + let [src_ptr] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + let dest = dest.force_mplace(this)?; + + let src_ptr = this.read_pointer(src_ptr)?; + let dest_ptr = this.read_pointer(&this.mplace_to_ref(&dest)?)?; + + this.mem_copy( + src_ptr, + Align::ONE, + dest_ptr, + Align::ONE, + dest.layout.size, + /*nonoverlapping*/ true, + )?; + } + _ => return Ok(EmulateByNameResult::NotSupported), + } + Ok(EmulateByNameResult::NeedsJumping) + } +} diff --git a/tests/pass/intrinsics-x86-sse3.rs b/tests/pass/intrinsics-x86-sse3.rs new file mode 100644 index 0000000000..ea92bf45ac --- /dev/null +++ b/tests/pass/intrinsics-x86-sse3.rs @@ -0,0 +1,114 @@ +// Ignore everything except x86 and x86_64 +// Any additional target are added to CI should be ignored here +// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) +//@ignore-target-aarch64 +//@ignore-target-arm +//@ignore-target-avr +//@ignore-target-s390x +//@ignore-target-thumbv7em +//@ignore-target-wasm32 +//@compile-flags: -C target-feature=+sse3 + +use core::mem::transmute; +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +fn main() { + assert!(is_x86_feature_detected!("sse3")); + + unsafe { + test_sse3(); + } +} + +#[target_feature(enable = "sse3")] +unsafe fn test_sse3() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86/sse3.rs + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_addsub_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_addsub_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0)); + } + test_mm_addsub_ps(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_addsub_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_addsub_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0)); + } + test_mm_addsub_pd(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hadd_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_hadd_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0)); + } + test_mm_hadd_ps(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hadd_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_hadd_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0)); + } + test_mm_hadd_pd(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hsub_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_hsub_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0)); + } + test_mm_hsub_ps(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hsub_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_hsub_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0)); + } + test_mm_hsub_pd(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_lddqu_si128() { + let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm_lddqu_si128(&a); + assert_eq_m128i(a, r); + } + test_mm_lddqu_si128(); +} + +#[track_caller] +#[target_feature(enable = "sse")] +unsafe fn assert_eq_m128(a: __m128, b: __m128) { + let r = _mm_cmpeq_ps(a, b); + if _mm_movemask_ps(r) != 0b1111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +#[target_feature(enable = "sse2")] +unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { + if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +#[target_feature(enable = "sse2")] +pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { + assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) +}