Skip to content

Commit

Permalink
Auto merge of #3492 - eduardosm:intrinsics-x86-avx2, r=oli-obk
Browse files Browse the repository at this point in the history
Implement LLVM x86 AVX2 intrinsics
  • Loading branch information
bors committed Apr 24, 2024
2 parents e989fe7 + a79b1f1 commit 9d6623e
Show file tree
Hide file tree
Showing 8 changed files with 2,474 additions and 257 deletions.
1 change: 1 addition & 0 deletions clippy.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
arithmetic-side-effects-allowed = ["rustc_target::abi::Size"]
71 changes: 2 additions & 69 deletions src/shims/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use rustc_target::spec::abi::Abi;

use super::{
bin_op_simd_float_all, conditional_dot_product, convert_float_to_int, horizontal_bin_op,
round_all, test_bits_masked, test_high_bits_masked, unary_op_ps, FloatBinOp, FloatUnaryOp,
mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked, unary_op_ps,
FloatBinOp, FloatUnaryOp,
};
use crate::*;
use shims::foreign_items::EmulateForeignItemResult;
Expand Down Expand Up @@ -347,71 +348,3 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
Ok(EmulateForeignItemResult::NeedsJumping)
}
}

/// Conditionally loads from `ptr` according the high bit of each
/// element of `mask`. `ptr` does not need to be aligned.
fn mask_load<'tcx>(
this: &mut crate::MiriInterpCx<'_, 'tcx>,
ptr: &OpTy<'tcx, Provenance>,
mask: &OpTy<'tcx, Provenance>,
dest: &MPlaceTy<'tcx, Provenance>,
) -> InterpResult<'tcx, ()> {
let (mask, mask_len) = this.operand_to_simd(mask)?;
let (dest, dest_len) = this.mplace_to_simd(dest)?;

assert_eq!(dest_len, mask_len);

let mask_item_size = mask.layout.field(this, 0).size;
let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap();

let ptr = this.read_pointer(ptr)?;
for i in 0..dest_len {
let mask = this.project_index(&mask, i)?;
let dest = this.project_index(&dest, i)?;

if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
// Size * u64 is implemented as always checked
#[allow(clippy::arithmetic_side_effects)]
let ptr = ptr.wrapping_offset(dest.layout.size * i, &this.tcx);
// Unaligned copy, which is what we want.
this.mem_copy(ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
} else {
this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
}
}

Ok(())
}

/// Conditionally stores into `ptr` according the high bit of each
/// element of `mask`. `ptr` does not need to be aligned.
fn mask_store<'tcx>(
this: &mut crate::MiriInterpCx<'_, 'tcx>,
ptr: &OpTy<'tcx, Provenance>,
mask: &OpTy<'tcx, Provenance>,
value: &OpTy<'tcx, Provenance>,
) -> InterpResult<'tcx, ()> {
let (mask, mask_len) = this.operand_to_simd(mask)?;
let (value, value_len) = this.operand_to_simd(value)?;

assert_eq!(value_len, mask_len);

let mask_item_size = mask.layout.field(this, 0).size;
let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap();

let ptr = this.read_pointer(ptr)?;
for i in 0..value_len {
let mask = this.project_index(&mask, i)?;
let value = this.project_index(&value, i)?;

if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
// Size * u64 is implemented as always checked
#[allow(clippy::arithmetic_side_effects)]
let ptr = ptr.wrapping_offset(value.layout.size * i, &this.tcx);
// Unaligned copy, which is what we want.
this.mem_copy(value.ptr(), ptr, value.layout.size, /*nonoverlapping*/ true)?;
}
}

Ok(())
}
444 changes: 444 additions & 0 deletions src/shims/x86/avx2.rs

Large diffs are not rendered by default.

Loading

0 comments on commit 9d6623e

Please sign in to comment.