Skip to content

Commit

Permalink
AMDGPU: Make v2f16 minimum/maximum legal for gfx950
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm committed Nov 26, 2024
1 parent f5e92eb commit 7f94002
Show file tree
Hide file tree
Showing 6 changed files with 1,850 additions and 1,782 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum
if (Subtarget->hasMinimum3Maximum3F32())
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);

if (Subtarget->hasMinimum3Maximum3PKF16())
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::v2f16, Legal);
}

setOperationAction(ISD::INTRINSIC_WO_CHAIN,
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,17 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
>;
}

class MinimumMaximumByMinimum3Maximum3VOP3P<SDPatternOperator node,
Instruction inst> : GCNPat<
(v2f16 (node (VOP3PMods v2f16:$src0, i32:$src0_mods), (VOP3PMods v2f16:$src1, i32:$src1_mods))),
(inst $src0_mods, $src0, $src1_mods, $src1, $src1_mods, $src1)
>;

let SubtargetPredicate = HasMinimum3Maximum3PKF16 in {
def : MinimumMaximumByMinimum3Maximum3VOP3P<fminimum, V_PK_MINIMUM3_F16>;
def : MinimumMaximumByMinimum3Maximum3VOP3P<fmaximum, V_PK_MAXIMUM3_F16>;
}

let SubtargetPredicate = HasMadMixInsts, OtherPredicates = [NoFP32Denormals] in {

// These are VOP3a-like opcodes which accept no omod.
Expand Down
Loading

0 comments on commit 7f94002

Please sign in to comment.