Skip to content

Commit

Permalink
[WebAssembly] Explicitly add {z,s}ext so extends are selected
Browse files Browse the repository at this point in the history
During DAG legalization, {u,s}itofp instructions on v2i8, v2i16, v4i8
and v4i16 types ended up being legalized into scalar instructions, when
they could just be extended to v2i32/v4i32 instead.

Fixes #57182

Differential Revision: https://reviews.llvm.org/D140916
  • Loading branch information
lukel97 committed Jan 6, 2023
1 parent 4b455a7 commit fb66026
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 125 deletions.
29 changes: 29 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2356,6 +2356,32 @@ performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return DAG.getBitcast(DstType, NewShuffle);
}

/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
/// split up into scalar instructions during legalization, and the vector
/// extending instructions are selected in performVectorExtendCombine below.
static SDValue
performVectorExtendToFPCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
auto &DAG = DCI.DAG;
assert(N->getOpcode() == ISD::UINT_TO_FP ||
N->getOpcode() == ISD::SINT_TO_FP);

EVT InVT = N->getOperand(0)->getValueType(0);
EVT ResVT = N->getValueType(0);
MVT ExtVT;
if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
ExtVT = MVT::v4i32;
else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
ExtVT = MVT::v2i32;
else
return SDValue();

unsigned Op =
N->getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
}

static SDValue
performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
auto &DAG = DCI.DAG;
Expand Down Expand Up @@ -2641,6 +2667,9 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return performVectorExtendCombine(N, DCI);
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
return performVectorExtendToFPCombine(N, DCI);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
case ISD::FP_ROUND:
Expand Down
154 changes: 29 additions & 125 deletions llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,8 @@ define <4 x float> @extend_to_float_low_i16x8_u(<8 x i16> %x) {
; CHECK: .functype extend_to_float_low_i16x8_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_u 0
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_u 1
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_u 2
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_u 3
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 3
; CHECK-NEXT: i32x4.extend_low_i16x8_u
; CHECK-NEXT: f32x4.convert_i32x4_u
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = uitofp <4 x i16> %low to <4 x float>
Expand All @@ -37,21 +24,8 @@ define <4 x float> @extend_to_float_high_i16x8_u(<8 x i16> %x) {
; CHECK: .functype extend_to_float_high_i16x8_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_u 4
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_u 5
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_u 6
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_u 7
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 3
; CHECK-NEXT: i32x4.extend_high_i16x8_u
; CHECK-NEXT: f32x4.convert_i32x4_u
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = uitofp <4 x i16> %high to <4 x float>
Expand All @@ -62,22 +36,10 @@ define <4 x float> @extend_to_float_low_i8x16_u(<8 x i8> %x) {
; CHECK-LABEL: extend_to_float_low_i8x16_u:
; CHECK: .functype extend_to_float_low_i8x16_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 0
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 1
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 2
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 3
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 3
; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15
; CHECK-NEXT: f32x4.convert_i32x4_u
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = uitofp <4 x i8> %low to <4 x float>
Expand All @@ -89,21 +51,9 @@ define <4 x float> @extend_to_float_high_i8x16_u(<8 x i8> %x) {
; CHECK: .functype extend_to_float_high_i8x16_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 4
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 5
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 6
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 7
; CHECK-NEXT: f32.convert_i32_u
; CHECK-NEXT: f32x4.replace_lane 3
; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: i8x16.shuffle 4, 17, 18, 19, 5, 21, 22, 23, 6, 25, 26, 27, 7, 29, 30, 31
; CHECK-NEXT: f32x4.convert_i32x4_u
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = uitofp <4 x i8> %high to <4 x float>
Expand All @@ -115,21 +65,8 @@ define <4 x float> @extend_to_float_low_i16x8_s(<8 x i16> %x) {
; CHECK: .functype extend_to_float_low_i16x8_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_s 0
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_s 1
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_s 2
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_s 3
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 3
; CHECK-NEXT: i32x4.extend_low_i16x8_s
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = sitofp <4 x i16> %low to <4 x float>
Expand All @@ -141,21 +78,8 @@ define <4 x float> @extend_to_float_high_i16x8_s(<8 x i16> %x) {
; CHECK: .functype extend_to_float_high_i16x8_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_s 4
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_s 5
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_s 6
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_s 7
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 3
; CHECK-NEXT: i32x4.extend_high_i16x8_s
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = sitofp <4 x i16> %high to <4 x float>
Expand All @@ -167,21 +91,13 @@ define <4 x float> @extend_to_float_low_i8x16_s(<8 x i8> %x) {
; CHECK: .functype extend_to_float_low_i8x16_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_s 0
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_s 1
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_s 2
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_s 3
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 3
; CHECK-NEXT: i8x16.shuffle 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32x4.shl
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32x4.shr_s
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = sitofp <4 x i8> %low to <4 x float>
Expand All @@ -193,21 +109,13 @@ define <4 x float> @extend_to_float_high_i8x16_s(<8 x i8> %x) {
; CHECK: .functype extend_to_float_high_i8x16_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_s 4
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_s 5
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_s 6
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_s 7
; CHECK-NEXT: f32.convert_i32_s
; CHECK-NEXT: f32x4.replace_lane 3
; CHECK-NEXT: i8x16.shuffle 4, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32x4.shl
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32x4.shr_s
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = sitofp <4 x i8> %high to <4 x float>
Expand All @@ -230,14 +138,10 @@ define <2 x double> @extend_to_double_low_i16x4_u(<4 x i16> %x) {
; CHECK-LABEL: extend_to_double_low_i16x4_u:
; CHECK: .functype extend_to_double_low_i16x4_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_u 0
; CHECK-NEXT: f64.convert_i32_u
; CHECK-NEXT: f64x2.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extract_lane_u 1
; CHECK-NEXT: f64.convert_i32_u
; CHECK-NEXT: f64x2.replace_lane 1
; CHECK-NEXT: i8x16.shuffle 16, 17, 2, 3, 18, 19, 6, 7, 20, 21, 10, 11, 22, 23, 14, 15
; CHECK-NEXT: f64x2.convert_low_i32x4_u
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <4 x i16> %x, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
%extended = uitofp <2 x i16> %low to <2 x double>
Expand Down

0 comments on commit fb66026

Please sign in to comment.