Skip to content

Commit

Permalink
Implement llvm.x86.ssse3.* intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
eduardosm committed Sep 27, 2023
1 parent 3f3f64d commit 184dc14
Show file tree
Hide file tree
Showing 5 changed files with 516 additions and 146 deletions.
45 changes: 45 additions & 0 deletions src/shims/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use shims::foreign_items::EmulateByNameResult;
mod sse;
mod sse2;
mod sse3;
mod ssse3;

impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
Expand Down Expand Up @@ -94,6 +95,11 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
this, link_name, abi, args, dest,
);
}
name if name.starts_with("ssse3.") => {
return ssse3::EvalContextExt::emulate_x86_ssse3_intrinsic(
this, link_name, abi, args, dest,
);
}
_ => return Ok(EmulateByNameResult::NotSupported),
}
Ok(EmulateByNameResult::NeedsJumping)
Expand Down Expand Up @@ -292,3 +298,42 @@ fn bin_op_simd_float_all<'tcx, F: rustc_apfloat::Float>(

Ok(())
}

/// Horizontaly performs `which` operation on adjacent values of
/// `left` and `right` SIMD vectors and stores the result in `dest`.
fn horizontal_bin_op<'tcx>(
this: &mut crate::MiriInterpCx<'_, 'tcx>,
which: mir::BinOp,
saturating: bool,
left: &OpTy<'tcx, Provenance>,
right: &OpTy<'tcx, Provenance>,
dest: &PlaceTy<'tcx, Provenance>,
) -> InterpResult<'tcx, ()> {
let (left, left_len) = this.operand_to_simd(left)?;
let (right, right_len) = this.operand_to_simd(right)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

assert_eq!(dest_len, left_len);
assert_eq!(dest_len, right_len);
assert_eq!(dest_len % 2, 0);

let middle = dest_len / 2;
for i in 0..dest_len {
let (j, src) =
if i < middle { (i, &left) } else { (i.checked_sub(middle).unwrap(), &right) };
let base_i = j.checked_mul(2).unwrap();
let lhs = this.read_immediate(&this.project_index(src, base_i)?)?;
let rhs = this.read_immediate(&this.project_index(src, base_i.checked_add(1).unwrap())?)?;

let res = if saturating {
Immediate::from(this.saturating_arith(which, &lhs, &rhs)?)
} else {
let (res, _overflow) = this.overflowing_binary_op(which, &lhs, &rhs)?;
*res
};

this.write_immediate(res, &this.project_index(&dest, i)?)?;
}

Ok(())
}
35 changes: 3 additions & 32 deletions src/shims/x86/sse3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use rustc_span::Symbol;
use rustc_target::abi::Align;
use rustc_target::spec::abi::Abi;

use super::horizontal_bin_op;
use crate::*;
use shims::foreign_items::EmulateByNameResult;

Expand Down Expand Up @@ -55,43 +56,13 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
let [left, right] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (left, left_len) = this.operand_to_simd(left)?;
let (right, right_len) = this.operand_to_simd(right)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

assert_eq!(dest_len, left_len);
assert_eq!(dest_len, right_len);
assert_eq!(dest_len % 2, 0);

let op = match unprefixed_name {
let which = match unprefixed_name {
"hadd.ps" | "hadd.pd" => mir::BinOp::Add,
"hsub.ps" | "hsub.pd" => mir::BinOp::Sub,
_ => unreachable!(),
};

let middle = dest_len / 2;
for i in 0..dest_len {
let (lhs, rhs) = if i < middle {
let base_i = i.checked_mul(2).unwrap();
(
this.read_immediate(&this.project_index(&left, base_i)?)?,
this.read_immediate(
&this.project_index(&left, base_i.checked_add(1).unwrap())?,
)?,
)
} else {
let base_i = i.checked_sub(middle).unwrap().checked_mul(2).unwrap();
(
this.read_immediate(&this.project_index(&right, base_i)?)?,
this.read_immediate(
&this.project_index(&right, base_i.checked_add(1).unwrap())?,
)?,
)
};
let (res, _overflow) = this.overflowing_binary_op(op, &lhs, &rhs)?;

this.write_immediate(*res, &this.project_index(&dest, i)?)?;
}
horizontal_bin_op(this, which, /*saturating*/ false, left, right, dest)?;
}
// Used to implement the _mm_lddqu_si128 function.
// Reads a 128-bit vector from an unaligned pointer. This intrinsic
Expand Down
206 changes: 206 additions & 0 deletions src/shims/x86/ssse3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
use rustc_middle::mir;
use rustc_span::Symbol;
use rustc_target::spec::abi::Abi;

use super::horizontal_bin_op;
use crate::*;
use shims::foreign_items::EmulateByNameResult;

impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
crate::MiriInterpCxExt<'mir, 'tcx>
{
fn emulate_x86_ssse3_intrinsic(
&mut self,
link_name: Symbol,
abi: Abi,
args: &[OpTy<'tcx, Provenance>],
dest: &PlaceTy<'tcx, Provenance>,
) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> {
let this = self.eval_context_mut();
// Prefix should have already been checked.
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.ssse3.").unwrap();

match unprefixed_name {
// Used to implement the _mm_abs_epi{8,16,32} functions.
// Calculates the absolute value of packed 8/16/32-bit integers.
"pabs.b.128" | "pabs.w.128" | "pabs.d.128" => {
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (op, op_len) = this.operand_to_simd(op)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

assert_eq!(op_len, dest_len);

for i in 0..dest_len {
let op = this.read_scalar(&this.project_index(&op, i)?)?;
let dest = this.project_index(&dest, i)?;

let res = op.to_int(dest.layout.size)?.unsigned_abs();

this.write_scalar(Scalar::from_uint(res, dest.layout.size), &dest)?;
}
}
// Used to implement the _mm_shuffle_epi8 intrinsic.
// Shuffles bytes from `left` using `right` as pattern.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8
"pshuf.b.128" => {
let [left, right] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (left, left_len) = this.operand_to_simd(left)?;
let (right, right_len) = this.operand_to_simd(right)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

assert_eq!(dest_len, left_len);
assert_eq!(dest_len, right_len);

for i in 0..dest_len {
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u8()?;
let dest = this.project_index(&dest, i)?;

let res = if right & 0x80 == 0 {
let j = right % 16; // index wraps around
this.read_scalar(&this.project_index(&left, j.into())?)?
} else {
// If the highest bit in `right` is 1, write zero.
Scalar::from_u8(0)
};

this.write_scalar(res, &dest)?;
}
}
// Used to implement the _mm_h{add,adds,sub}_epi{16,32} functions.
// Horizontally add/add with saturation/subtract adjacent 16/32-bit
// integer values in `left` and `right`.
"phadd.w.128" | "phadd.sw.128" | "phadd.d.128" | "phsub.w.128" | "phsub.sw.128"
| "phsub.d.128" => {
let [left, right] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (which, saturating) = match unprefixed_name {
"phadd.w.128" | "phadd.d.128" => (mir::BinOp::Add, false),
"phadd.sw.128" => (mir::BinOp::Add, true),
"phsub.w.128" | "phsub.d.128" => (mir::BinOp::Sub, false),
"phsub.sw.128" => (mir::BinOp::Sub, true),
_ => unreachable!(),
};

horizontal_bin_op(this, which, saturating, left, right, dest)?;
}
// Used to implement the _mm_maddubs_epi16 function.
// Multiplies corresponding pairs of packed 8-bit unsigned integer
// values contained in the first source operand and packed 8-bit signed
// integer values contained in the second source operand, add pairs of
// contiguous products with signed saturation, and writes the 16-bit sums to
// the corresponding bits in the destination.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16
"pmadd.ub.sw.128" => {
let [left, right] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (left, left_len) = this.operand_to_simd(left)?;
let (right, right_len) = this.operand_to_simd(right)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

assert_eq!(left_len, right_len);
assert_eq!(dest_len, left_len / 2);

for i in 0..dest_len {
let j1 = i.checked_mul(2).unwrap();
let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_u8()?;
let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i8()?;

let j2 = j1.checked_add(1).unwrap();
let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_u8()?;
let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i8()?;

let dest = this.project_index(&dest, i)?;

let mul1 = i16::from(left1).checked_mul(right1.into()).unwrap();
let mul2 = i16::from(left2).checked_mul(right2.into()).unwrap();
let res = mul1.saturating_add(mul2);

this.write_scalar(Scalar::from_i16(res), &dest)?;
}
}
// Used to implement the _mm_mulhrs_epi16 function.
// Multiplies packed 16-bit signed integer values, truncate the 32-bit
// product to the 18 most significant bits by right-shifting, round the
// truncated value by adding 1, and write bits `[16:1]` to the destination.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16
"pmul.hr.sw.128" => {
let [left, right] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (left, left_len) = this.operand_to_simd(left)?;
let (right, right_len) = this.operand_to_simd(right)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

assert_eq!(dest_len, left_len);
assert_eq!(dest_len, right_len);

for i in 0..dest_len {
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i16()?;
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i16()?;
let dest = this.project_index(&dest, i)?;

let res = (i32::from(left).checked_mul(right.into()).unwrap() >> 14)
.checked_add(1)
.unwrap()
>> 1;

this.write_scalar(Scalar::from_i16(res.try_into().unwrap()), &dest)?;
}
}
// Used to implement the _mm_sign_epi{8,16,32} functions.
// Negates elements from `left` when the its corresponding from
// `right` is negative. If an element from `right` is zero, zero
// is writen to the corresponding output element.
"psign.b.128" | "psign.w.128" | "psign.d.128" => {
let [left, right] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (left, left_len) = this.operand_to_simd(left)?;
let (right, right_len) = this.operand_to_simd(right)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

assert_eq!(dest_len, left_len);
assert_eq!(dest_len, right_len);

for i in 0..dest_len {
let dest = this.project_index(&dest, i)?;
let left = this
.read_scalar(&this.project_index(&left, i)?)?
.to_int(dest.layout.size)?;
let right = this
.read_scalar(&this.project_index(&right, i)?)?
.to_int(dest.layout.size)?;

let res = if right == 0 {
0
} else if right < 0 {
if left == dest.layout.size.signed_int_min() {
// Make sure `Scalar::from_int` does not get an overflowed value.
//
// For example, with 16-bit value -0x8000, negating after sign
// extension to i128 will produce 0x8000. However, `Scalar::from_int`
// will fail because 0x8000 is outside the range of signed 16-bit
// integers. If the negation had been done directly with wrapping 16-bit
// arithmetic, the result would be -0x8000, so we simulate that.
left
} else {
left.checked_neg().unwrap()
}
} else {
left
};

this.write_scalar(Scalar::from_int(res, dest.layout.size), &dest)?;
}
}
_ => return Ok(EmulateByNameResult::NotSupported),
}
Ok(EmulateByNameResult::NeedsJumping)
}
}
Loading

0 comments on commit 184dc14

Please sign in to comment.