diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 6bac2d2b590ffa..5978f5b0bbae5f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1599,7 +1599,6 @@ def FeatureISAVersion9_5_Common : FeatureSet< [FeatureAddressableLocalMemorySize163840, FeatureFP8Insts, FeatureFP8ConversionInsts, - FeatureCvtFP8VOP1Bug, FeatureGFX950Insts, FeaturePrngInst, FeatureBF16ConversionInsts, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll index bd35ee3f009736..3f418ee80f8771 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX940 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX940 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s @@ -22,6 +22,12 @@ define float @test_cvt_f32_bf8_byte0(i32 %a) { ; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_0 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; +; GFX950-LABEL: test_cvt_f32_bf8_byte0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_cvt_f32_bf8_e32 v0, v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_cvt_f32_bf8_byte0: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -36,11 +42,11 @@ define float @test_cvt_f32_bf8_byte0(i32 %a) { } define float @test_cvt_f32_bf8_byte1(i32 %a) { -; GFX940-LABEL: test_cvt_f32_bf8_byte1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_f32_bf8_byte1: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_f32_bf8_byte1: ; GFX12: ; %bb.0: @@ -56,11 +62,11 @@ define float @test_cvt_f32_bf8_byte1(i32 %a) { } define float @test_cvt_f32_bf8_byte2(i32 %a) { -; GFX940-LABEL: test_cvt_f32_bf8_byte2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_f32_bf8_byte2: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_f32_bf8_byte2: ; GFX12: ; %bb.0: @@ -76,11 +82,11 @@ define float @test_cvt_f32_bf8_byte2(i32 %a) { } define float @test_cvt_f32_bf8_byte3(i32 %a) { -; GFX940-LABEL: test_cvt_f32_bf8_byte3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_f32_bf8_byte3: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_3 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_f32_bf8_byte3: ; GFX12: ; %bb.0: @@ -102,6 +108,12 @@ define float @test_cvt_f32_fp8_byte0(i32 %a) { ; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_0 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; +; GFX950-LABEL: test_cvt_f32_fp8_byte0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_cvt_f32_fp8_e32 v0, v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_cvt_f32_fp8_byte0: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -116,11 +128,11 @@ define float @test_cvt_f32_fp8_byte0(i32 %a) { } define float @test_cvt_f32_fp8_byte1(i32 %a) { -; GFX940-LABEL: test_cvt_f32_fp8_byte1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_f32_fp8_byte1: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_f32_fp8_byte1: ; GFX12: ; %bb.0: @@ -136,11 +148,11 @@ define float @test_cvt_f32_fp8_byte1(i32 %a) { } define float @test_cvt_f32_fp8_byte2(i32 %a) { -; GFX940-LABEL: test_cvt_f32_fp8_byte2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_f32_fp8_byte2: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_f32_fp8_byte2: ; GFX12: ; %bb.0: @@ -156,11 +168,11 @@ define float @test_cvt_f32_fp8_byte2(i32 %a) { } define float @test_cvt_f32_fp8_byte3(i32 %a) { -; GFX940-LABEL: test_cvt_f32_fp8_byte3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_f32_fp8_byte3: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_3 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_f32_fp8_byte3: ; GFX12: ; %bb.0: @@ -176,11 +188,11 @@ define float @test_cvt_f32_fp8_byte3(i32 %a) { } define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) { -; GFX940-LABEL: test_cvt_pk_f32_bf8_word0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_pk_f32_bf8_word0: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_pk_f32_bf8_word0: ; GFX12: ; %bb.0: @@ -196,11 +208,11 @@ define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) { } define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) { -; GFX940-LABEL: test_cvt_pk_f32_bf8_word1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_pk_f32_bf8_word1: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_pk_f32_bf8_word1: ; GFX12: ; %bb.0: @@ -216,11 +228,11 @@ define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) { } define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) { -; GFX940-LABEL: test_cvt_pk_f32_fp8_word0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_pk_f32_fp8_word0: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_pk_f32_fp8_word0: ; GFX12: ; %bb.0: @@ -236,11 +248,11 @@ define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) { } define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) { -; GFX940-LABEL: test_cvt_pk_f32_fp8_word1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_pk_f32_fp8_word1: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_pk_f32_fp8_word1: ; GFX12: ; %bb.0: @@ -256,12 +268,12 @@ define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) { } define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) { -; GFX940-LABEL: test_cvt_pk_bf8_f32_word0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_pk_bf8_f32_word0: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_pk_bf8_f32_word0: ; GFX12: ; %bb.0: @@ -279,13 +291,13 @@ define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) { } define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) { -; GFX940-LABEL: test_cvt_pk_bf8_f32_word1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_pk_bf8_f32_word1: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1] +; GFX9X-NEXT: s_nop 0 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_pk_bf8_f32_word1: ; GFX12: ; %bb.0: @@ -303,12 +315,12 @@ define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) { } define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) { -; GFX940-LABEL: test_cvt_pk_fp8_f32_word0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_pk_fp8_f32_word0: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_pk_fp8_f32_word0: ; GFX12: ; %bb.0: @@ -326,13 +338,13 @@ define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) { } define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) { -; GFX940-LABEL: test_cvt_pk_fp8_f32_word1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_pk_fp8_f32_word1: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1] +; GFX9X-NEXT: s_nop 0 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_pk_fp8_f32_word1: ; GFX12: ; %bb.0: @@ -350,12 +362,12 @@ define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) { } define i32 @test_cvt_sr_bf8_f32_byte0(float %x, i32 %r, i32 %old) { -; GFX940-LABEL: test_cvt_sr_bf8_f32_byte0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte0: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte0: ; GFX12: ; %bb.0: @@ -373,13 +385,13 @@ define i32 @test_cvt_sr_bf8_f32_byte0(float %x, i32 %r, i32 %old) { } define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) { -; GFX940-LABEL: test_cvt_sr_bf8_f32_byte1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte1: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0] +; GFX9X-NEXT: s_nop 0 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte1: ; GFX12: ; %bb.0: @@ -397,13 +409,13 @@ define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) { } define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) { -; GFX940-LABEL: test_cvt_sr_bf8_f32_byte2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte2: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1] +; GFX9X-NEXT: s_nop 0 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte2: ; GFX12: ; %bb.0: @@ -421,13 +433,13 @@ define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) { } define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) { -; GFX940-LABEL: test_cvt_sr_bf8_f32_byte3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte3: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1] +; GFX9X-NEXT: s_nop 0 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte3: ; GFX12: ; %bb.0: @@ -445,12 +457,12 @@ define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) { } define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) { -; GFX940-LABEL: test_cvt_sr_fp8_f32_byte0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte0: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte0: ; GFX12: ; %bb.0: @@ -468,13 +480,13 @@ define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) { } define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) { -; GFX940-LABEL: test_cvt_sr_fp8_f32_byte1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte1: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0] +; GFX9X-NEXT: s_nop 0 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte1: ; GFX12: ; %bb.0: @@ -492,13 +504,13 @@ define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) { } define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) { -; GFX940-LABEL: test_cvt_sr_fp8_f32_byte2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte2: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1] +; GFX9X-NEXT: s_nop 0 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte2: ; GFX12: ; %bb.0: @@ -516,13 +528,13 @@ define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) { } define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) { -; GFX940-LABEL: test_cvt_sr_fp8_f32_byte3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte3: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1] +; GFX9X-NEXT: s_nop 0 +; GFX9X-NEXT: v_mov_b32_e32 v0, v2 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte3: ; GFX12: ; %bb.0: @@ -540,12 +552,12 @@ define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) { } define float @test_sext_cvt_f32_fp8(i16 %a) { -; GFX940-LABEL: test_sext_cvt_f32_fp8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_sext_cvt_f32_fp8: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_sext_cvt_f32_fp8: ; GFX12: ; %bb.0: @@ -564,12 +576,12 @@ define float @test_sext_cvt_f32_fp8(i16 %a) { } define float @test_sext_cvt_f32_bf8(i16 %a) { -; GFX940-LABEL: test_sext_cvt_f32_bf8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_sext_cvt_f32_bf8: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_sext_cvt_f32_bf8: ; GFX12: ; %bb.0: @@ -588,12 +600,12 @@ define float @test_sext_cvt_f32_bf8(i16 %a) { } define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) { -; GFX940-LABEL: test_sext_cvt_pk_f32_bf8_word1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX940-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_sext_cvt_pk_f32_bf8_word1: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9X-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1: ; GFX12: ; %bb.0: @@ -612,12 +624,12 @@ define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) { } define <2 x float> @test_sext_cvt_pk_f32_fp8_word0(i16 %a) { -; GFX940-LABEL: test_sext_cvt_pk_f32_fp8_word0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX940-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9X-LABEL: test_sext_cvt_pk_f32_fp8_word0: +; GFX9X: ; %bb.0: +; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9X-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0 +; GFX9X-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0: ; GFX12: ; %bb.0: