From d78544e5717f4e4aaf6573670498ae4090769757 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Tue, 31 Dec 2024 02:07:57 +0800 Subject: [PATCH] pulley: Implement iadd_pairwise (#9912) * pulley: Implement iadd_pairwise * access by index Co-authored-by: Xuanwo --------- Co-authored-by: Xuanwo --- .../codegen/src/isa/pulley_shared/lower.isle | 6 ++++- crates/wast-util/src/lib.rs | 5 ---- pulley/src/interp.rs | 25 +++++++++++++++++++ pulley/src/lib.rs | 5 ++++ 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index db805603c2b2..2d815ae4c502 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -215,7 +215,6 @@ (if-let neg_u32 (u32_try_from_u64 neg_u64)) neg_u32) - (rule 1 (lower (has_type $I8X16 (iadd a b))) (pulley_vaddi8x16 a b)) (rule 1 (lower (has_type $I16X8 (iadd a b))) (pulley_vaddi16x8 a b)) (rule 1 (lower (has_type $I32X4 (iadd a b))) (pulley_vaddi32x4 a b)) @@ -226,6 +225,11 @@ (rule 1 (lower (has_type $I16X8 (sadd_sat a b))) (pulley_vaddi16x8_sat a b)) (rule 1 (lower (has_type $I16X8 (uadd_sat a b))) (pulley_vaddu16x8_sat a b)) +;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I16X8 (iadd_pairwise a b))) (pulley_vaddpairwisei16x8_s a b)) +(rule (lower (has_type $I32X4 (iadd_pairwise a b))) (pulley_vaddpairwisei32x4_s a b)) + ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (ty_int (fits_in_32 _)) (isub a b))) (pulley_xsub32 a b)) diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index aa4bcc9f8896..35959d99ac07 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -405,10 +405,8 @@ impl WastTest { "misc_testsuite/simd/issue_3327_bnot_lowering.wast", "misc_testsuite/simd/v128-select.wast", "spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast", - "spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast", "spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast", "spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast", - "spec_testsuite/proposals/memory64/relaxed_dot_product.wast", "spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast", "spec_testsuite/simd_f32x4_arith.wast", "spec_testsuite/simd_f32x4_cmp.wast", @@ -416,9 +414,6 @@ impl WastTest { "spec_testsuite/simd_f64x2_arith.wast", "spec_testsuite/simd_f64x2_cmp.wast", "spec_testsuite/simd_f64x2_pmin_pmax.wast", - "spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast", - "spec_testsuite/simd_i32x4_dot_i16x8.wast", - "spec_testsuite/simd_i32x4_extadd_pairwise_i16x8.wast", "spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast", "spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast", "spec_testsuite/simd_load.wast", diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index aa05bae4b250..07f70cbf7596 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -3339,6 +3339,31 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i16x8(); + let b = self.state[operands.src2].get_i16x8(); + let mut result = [0i16; 8]; + let half = result.len() / 2; + for i in 0..half { + result[i] = a[2 * i].wrapping_add(a[2 * i + 1]); + result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]); + } + self.state[operands.dst].set_i16x8(result); + ControlFlow::Continue(()) + } + + fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i32x4(); + let b = self.state[operands.src2].get_i32x4(); + let mut result = [0i32; 4]; + result[0] = a[0].wrapping_add(a[1]); + result[1] = a[2].wrapping_add(a[3]); + result[2] = b[0].wrapping_add(b[1]); + result[3] = b[2].wrapping_add(b[3]); + self.state[operands.dst].set_i32x4(result); + ControlFlow::Continue(()) + } + fn vshli8x16(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_i8x16(); let b = self.state[operands.src2].get_u32(); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 45e666196f25..1a37bd970e53 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -908,6 +908,11 @@ macro_rules! for_each_extended_op { /// `dst = satruating_add(src1, src2)` vaddu16x8_sat = VAddU16x8Sat { operands: BinaryOperands }; + /// `dst = [src1[0] + src1[1], ..., src2[6] + src2[7]]` + vaddpairwisei16x8_s = VAddpairwiseI16x8S { operands: BinaryOperands }; + /// `dst = [src1[0] + src1[1], ..., src2[2] + src2[3]]` + vaddpairwisei32x4_s = VAddpairwiseI32x4S { operands: BinaryOperands }; + /// `dst = src1 << src2` vshli8x16 = VShlI8x16 { operands: BinaryOperands }; /// `dst = src1 << src2`