diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 0569f70077df4a..83a74b4a435909 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -149,6 +149,12 @@ def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16", "Has v_minimum3_f16 and v_maximum3_f16 instructions" >; +def FeatureMinimum3Maximum3PKF16 : SubtargetFeature<"minimum3-maximum3-pkf16", + "HasMinimum3Maximum3PKF16", + "true", + "Has v_pk_minimum3_f16 and v_pk_maximum3_f16 instructions" +>; + def FeatureSupportsXNACK : SubtargetFeature<"xnack-support", "SupportsXNACK", "true", @@ -432,7 +438,8 @@ def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts", FeatureFP4ConversionScaleInsts, FeatureFP6BF6ConversionScaleInsts, FeatureF16BF16ToFP6BF6ConversionScaleInsts, - FeatureMinimum3Maximum3F32 + FeatureMinimum3Maximum3F32, + FeatureMinimum3Maximum3PKF16 ] >; @@ -2146,6 +2153,10 @@ def HasMinimum3Maximum3F16 : Predicate<"Subtarget->hasMinimum3Maximum3F16()">, AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>; +def HasMinimum3Maximum3PKF16 : + Predicate<"Subtarget->hasMinimum3Maximum3PKF16()">, + AssemblerPredicate<(all_of FeatureMinimum3Maximum3PKF16)>; + def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index cdc5e1a66afa2c..ea5e159fdd8363 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -250,6 +250,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAshrPkInsts = false; bool HasMinimum3Maximum3F32 = false; bool HasMinimum3Maximum3F16 = false; + bool HasMinimum3Maximum3PKF16 = false; bool RequiresCOV6 = false; @@ -1348,6 +1349,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return HasMinimum3Maximum3F16; } + bool hasMinimum3Maximum3PKF16() const { + return HasMinimum3Maximum3PKF16; + } + /// \returns The maximum number of instructions that can be enclosed in an /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that /// instruction. diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index ee68eb32d9173a..ae5a6581a3b200 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -144,6 +144,11 @@ def : VOP3PSatPat; def : VOP3PSatPat; } // End SubtargetPredicate = HasVOP3PInsts +let SubtargetPredicate = HasMinimum3Maximum3PKF16, FPDPRounding = 1 in { +defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", VOP3P_Profile>; +defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile>; +} + // TODO: Make sure we're doing the right thing with denormals. Note // that FMA and MAD will differ. multiclass MadFmaMixPats; defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>; defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>; +defm V_PK_MINIMUM3_F16 : VOP3P_Real_vi <0x1b>; +defm V_PK_MAXIMUM3_F16 : VOP3P_Real_vi <0x1c>; + defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>; defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>; defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>; diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s index 68d93b4abf5a72..75022d8cf0cdd0 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s @@ -1182,3 +1182,100 @@ v_maximum3_f32 v1, v2, s8, v3 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: // GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04] v_minimum3_f32 v0, v1, v2, v3 + + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c] +v_pk_minimum3_f16 v1, v2, v3, v4 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b] +v_pk_minimum3_f16 v1, v2, v3, 2.0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c] +v_pk_minimum3_f16 v1, v2, 2.0, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c] +v_pk_minimum3_f16 v1, 2.0, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c] +v_pk_minimum3_f16 v1, v2, v3, v4 clamp + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c] +v_pk_minimum3_f16 v8, v0, s8, v1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18] +v_pk_minimum3_f16 v8, v0, v1, s8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c] +v_pk_minimum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c] +v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c] +v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04] +v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04] +v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c] +v_pk_maximum3_f16 v1, v2, v3, v4 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b] +v_pk_maximum3_f16 v1, v2, v3, 2.0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c] +v_pk_maximum3_f16 v1, v2, 2.0, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c] +v_pk_maximum3_f16 v1, 2.0, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c] +v_pk_maximum3_f16 v1, v2, v3, v4 clamp + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c] +v_pk_maximum3_f16 v8, v0, s8, v1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18] +v_pk_maximum3_f16 v8, v0, v1, s8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c] +v_pk_maximum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c] +v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c] +v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04] +v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04] +v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s index 03b651260b2886..c5450e48558bfd 100644 --- a/llvm/test/MC/AMDGPU/gfx950_err.s +++ b/llvm/test/MC/AMDGPU/gfx950_err.s @@ -386,3 +386,9 @@ v_minimum3_f32 v0, s1, s2, v3 // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported v_minimum3_f32 v0, v1, v2, 0xdeadbeef + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_pk_minimum3_f16 v0, s1, s2, v3 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_pk_maximum3_f16 v0, s1, s2, v3 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt index f7cb738375d224..adb4f78942503e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt @@ -881,3 +881,64 @@ # GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04] 0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04 + + +# GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c] +0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c + +# GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c] +0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c + +# GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b] +0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b + +# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c] +0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c + +# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c] +0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c + +# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c] +0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c + +# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04] +0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04 + +# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04] +0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04 + +# GFX950: v_pk_maximum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c] +0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c + +# GFX950: v_pk_maximum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18] +0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18 + +# GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c] +0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c + +# GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c] +0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c + +# GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b] +0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b + +# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c] +0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c + +# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c] +0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c + +# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c] +0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c + +# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04] +0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04 + +# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04] +0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04 + +# GFX950: v_pk_minimum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c] +0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c + +# GFX950: v_pk_minimum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18] +0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18