diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index 67e3bf075971..6b77be3d8e3f 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -507,8 +507,11 @@ where } fn worst_case_size() -> CodeOffset { - // `Vconst128 { dst, imm }` is 20 bytes (3 byte opcode + dst + 16-byte imm) - 20 + // `VShuffle { dst, src1, src2, imm }` is 22 bytes: + // 3-byte opcode + // dst, src1, src2 + // 16-byte immediate + 22 } fn ref_type_regclass(_settings: &settings::Flags) -> RegClass { diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index f158bcc84877..db805603c2b2 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -1396,6 +1396,10 @@ (rule (lower (scalar_to_vector a @ (value_type $F64))) (pulley_vinsertf64 (pulley_vconst128 0) a 0)) +;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I8X16 (shuffle a b (u128_from_immediate mask)))) + (pulley_vshuffle a b mask)) ;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index f51f0a3d336d..aa4bcc9f8896 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -404,11 +404,9 @@ impl WastTest { "misc_testsuite/simd/canonicalize-nan.wast", "misc_testsuite/simd/issue_3327_bnot_lowering.wast", "misc_testsuite/simd/v128-select.wast", - "spec_testsuite/proposals/annotations/simd_lane.wast", "spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast", "spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast", "spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast", - "spec_testsuite/proposals/memory64/simd_lane.wast", "spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast", "spec_testsuite/proposals/memory64/relaxed_dot_product.wast", "spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast", @@ -423,7 +421,6 @@ impl WastTest { "spec_testsuite/simd_i32x4_extadd_pairwise_i16x8.wast", "spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast", "spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast", - "spec_testsuite/simd_lane.wast", "spec_testsuite/simd_load.wast", "spec_testsuite/simd_splat.wast", ]; diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 8c965bbad1c6..aa05bae4b250 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -4538,6 +4538,20 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow { + let a = self.state[src1].get_u8x16(); + let b = self.state[src2].get_u8x16(); + let result = mask.to_le_bytes().map(|m| { + if m < 16 { + a[m as usize] + } else { + b[m as usize - 16] + } + }); + self.state[dst].set_u8x16(result); + ControlFlow::Continue(()) + } + fn vswizzlei8x16(&mut self, operands: BinaryOperands) -> ControlFlow { let src1 = self.state[operands.src1].get_i8x16(); let src2 = self.state[operands.src2].get_i8x16(); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 1bbbd7f5b3c8..45e666196f25 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -1227,6 +1227,9 @@ macro_rules! for_each_extended_op { /// `dst = ieee_minimum(src1, src2)` vminimumf64x2 = Vminimumf64x2 { operands: BinaryOperands }; + /// `dst = shuffle(src1, src2, mask)` + vshuffle = VShuffle { dst: VReg, src1: VReg, src2: VReg, mask: u128 }; + /// `dst = swizzle(src1, src2)` vswizzlei8x16 = Vswizzlei8x16 { operands: BinaryOperands };