diff --git a/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll b/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll new file mode 100644 index 000000000000..b42754ad6517 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll @@ -0,0 +1,245 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=+simd128 | FileCheck %s + +; TODO: These tests should check that floating point conversions select +; extending instructions where possible + +target triple = "wasm32-unknown-unknown" + +define <4 x float> @extend_to_float_low_i16x8_u(<8 x i16> %x) { +; CHECK-LABEL: extend_to_float_low_i16x8_u: +; CHECK: .functype extend_to_float_low_i16x8_u (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_u 0 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_u 1 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_u 2 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_u 3 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 3 +; CHECK-NEXT: # fallthrough-return + %low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> + %extended = uitofp <4 x i16> %low to <4 x float> + ret <4 x float> %extended +} + +define <4 x float> @extend_to_float_high_i16x8_u(<8 x i16> %x) { +; CHECK-LABEL: extend_to_float_high_i16x8_u: +; CHECK: .functype extend_to_float_high_i16x8_u (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_u 4 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_u 5 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_u 6 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_u 7 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 3 +; CHECK-NEXT: # fallthrough-return + %high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> + %extended = uitofp <4 x i16> %high to <4 x float> + ret <4 x float> %extended +} + +define <4 x float> @extend_to_float_low_i8x16_u(<8 x i8> %x) { +; CHECK-LABEL: extend_to_float_low_i8x16_u: +; CHECK: .functype extend_to_float_low_i8x16_u (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_u 0 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_u 1 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_u 2 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_u 3 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 3 +; CHECK-NEXT: # fallthrough-return + %low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> + %extended = uitofp <4 x i8> %low to <4 x float> + ret <4 x float> %extended +} + +define <4 x float> @extend_to_float_high_i8x16_u(<8 x i8> %x) { +; CHECK-LABEL: extend_to_float_high_i8x16_u: +; CHECK: .functype extend_to_float_high_i8x16_u (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_u 4 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_u 5 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_u 6 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_u 7 +; CHECK-NEXT: f32.convert_i32_u +; CHECK-NEXT: f32x4.replace_lane 3 +; CHECK-NEXT: # fallthrough-return + %high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> + %extended = uitofp <4 x i8> %high to <4 x float> + ret <4 x float> %extended +} + +define <4 x float> @extend_to_float_low_i16x8_s(<8 x i16> %x) { +; CHECK-LABEL: extend_to_float_low_i16x8_s: +; CHECK: .functype extend_to_float_low_i16x8_s (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_s 0 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_s 1 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_s 2 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_s 3 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 3 +; CHECK-NEXT: # fallthrough-return + %low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> + %extended = sitofp <4 x i16> %low to <4 x float> + ret <4 x float> %extended +} + +define <4 x float> @extend_to_float_high_i16x8_s(<8 x i16> %x) { +; CHECK-LABEL: extend_to_float_high_i16x8_s: +; CHECK: .functype extend_to_float_high_i16x8_s (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_s 4 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_s 5 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_s 6 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_s 7 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 3 +; CHECK-NEXT: # fallthrough-return + %high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> + %extended = sitofp <4 x i16> %high to <4 x float> + ret <4 x float> %extended +} + +define <4 x float> @extend_to_float_low_i8x16_s(<8 x i8> %x) { +; CHECK-LABEL: extend_to_float_low_i8x16_s: +; CHECK: .functype extend_to_float_low_i8x16_s (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_s 0 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_s 1 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_s 2 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_s 3 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 3 +; CHECK-NEXT: # fallthrough-return + %low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> + %extended = sitofp <4 x i8> %low to <4 x float> + ret <4 x float> %extended +} + +define <4 x float> @extend_to_float_high_i8x16_s(<8 x i8> %x) { +; CHECK-LABEL: extend_to_float_high_i8x16_s: +; CHECK: .functype extend_to_float_high_i8x16_s (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_s 4 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_s 5 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_s 6 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.extract_lane_s 7 +; CHECK-NEXT: f32.convert_i32_s +; CHECK-NEXT: f32x4.replace_lane 3 +; CHECK-NEXT: # fallthrough-return + %high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> + %extended = sitofp <4 x i8> %high to <4 x float> + ret <4 x float> %extended +} + +define <2 x double> @extend_to_double_low_i32x4_u(<4 x i32> %x) { +; CHECK-LABEL: extend_to_double_low_i32x4_u: +; CHECK: .functype extend_to_double_low_i32x4_u (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f64x2.convert_low_i32x4_u +; CHECK-NEXT: # fallthrough-return + %low = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> + %extended = uitofp <2 x i32> %low to <2 x double> + ret <2 x double> %extended +} + +define <2 x double> @extend_to_double_low_i16x4_u(<4 x i16> %x) { +; CHECK-LABEL: extend_to_double_low_i16x4_u: +; CHECK: .functype extend_to_double_low_i16x4_u (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_u 0 +; CHECK-NEXT: f64.convert_i32_u +; CHECK-NEXT: f64x2.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extract_lane_u 1 +; CHECK-NEXT: f64.convert_i32_u +; CHECK-NEXT: f64x2.replace_lane 1 +; CHECK-NEXT: # fallthrough-return + %low = shufflevector <4 x i16> %x, <4 x i16> undef, <2 x i32> + %extended = uitofp <2 x i16> %low to <2 x double> + ret <2 x double> %extended +}