Skip to content

Commit

Permalink
AMDGPU: MC support for v_cvt_scale_[f16|f32]_fp8 of gfx950.
Browse files Browse the repository at this point in the history
OPSEL ASM Syntax: opsel:[x,y,z]
where,
    opsel[x] = Inst{11} = src0_modifier{2}
    opsel[y] = Inst{12} = src1_modifier{2}
    opsel[z] = Inst{14} = src0_modifier{3}
Note: Conventional Inst{13} i.e. OPSEL[2] is ignored in asm syntax.

Co-authored-by: Pravin Jagtap <[email protected]>
  • Loading branch information
pravinjagtap authored and arsenm committed Nov 22, 2024
1 parent 2bb646c commit d34c2cc
Show file tree
Hide file tree
Showing 5 changed files with 373 additions and 2 deletions.
14 changes: 12 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -384,11 +384,17 @@ def FeaturePermlane32Swap : SubtargetFeature<"permlane32-swap",
"Has v_permlane32_swap_b32 instructions"
>;

def FeatureFP8ConversionScaleInsts : SubtargetFeature<"fp8-cvt-scale-insts",
"HasFP8ConversionScaleInsts",
"true",
"Has fp8 conversion scale instructions"
>;

def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
"GFX950Insts",
"true",
"Additional instructions for GFX950+",
[FeaturePermlane16Swap, FeaturePermlane32Swap]
[FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts]
>;

def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
Expand Down Expand Up @@ -1531,7 +1537,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureGFX950Insts,
FeaturePrngInst,
FeatureBF16ConversionInsts,
FeatureBitOp3Insts
FeatureBitOp3Insts,
FeatureFP8ConversionScaleInsts
])>;

def FeatureISAVersion9_4_0 : FeatureSet<
Expand Down Expand Up @@ -2405,6 +2412,9 @@ def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">,
def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
AssemblerPredicate<(all_of FeaturePrngInst)>;

def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInsts()">,
AssemblerPredicate<(all_of FeatureFP8ConversionScaleInsts)>;

def HasGDS : Predicate<"Subtarget->hasGDS()">;

def HasGWS : Predicate<"Subtarget->hasGWS()">;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class AMDGPUSubtarget {
bool GCN3Encoding = false;
bool Has16BitInsts = false;
bool HasTrue16BitInsts = false;
bool HasFP8ConversionScaleInsts = false;
bool EnableRealTrue16Insts = false;
bool HasBF16ConversionInsts = false;
bool HasMadMixInsts = false;
Expand Down Expand Up @@ -175,6 +176,8 @@ class AMDGPUSubtarget {
return HasMadMixInsts;
}

bool hasFP8ConversionScaleInsts() const { return HasFP8ConversionScaleInsts; }

bool hasMadMacF32Insts() const {
return HasMadMacF32Insts || !isGCN();
}
Expand Down
22 changes: 22 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,24 @@ class VOP3_BITOP3_Profile<VOPProfile pfl, VOP3Features f> : VOP3_Profile<pfl, f>
let AsmVOP3OpSel = !subst("$op_sel", "$bitop3$op_sel", getAsmVOP3OpSel<3, 0, 0, 0, 0, 0>.ret);
}

class VOP3_CVT_SCALE_F1632_FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
VOP3_OPSEL> {
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
op_sel0:$op_sel);
let HasClamp = 0;
let HasSrc2 = 0;
let HasSrc2Mods = 0;
let HasExtVOP3DPP = 0;
let HasOpSel = 1;
let HasOMod = 0;
}

let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
}

let SubtargetPredicate = isGFX10Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
Expand Down Expand Up @@ -1796,3 +1814,7 @@ defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;

defm V_BITOP3_B16 : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">;
defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
let OtherPredicates = [HasFP8ConversionScaleInsts] in {
defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
}
192 changes: 192 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_asm_features.s
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,195 @@ v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1

// FIXME: Swapped order not accepted
// v_permlane32_swap_b32 v1, v2 fi:1 bound_ctrl:1

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, v2, v3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4a,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, s1, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x01,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, s1, v3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x02,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0x03,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0x04,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4a,0xd2,0x01,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, s1, v3 op_sel:[0,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x02,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x03,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0x04,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, 11, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x8b,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, 11, v3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x96,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0xa1,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0xac,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, 11, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x8b,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, 11, v3 op_sel:[0,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x96,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0xa1,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0xac,0x06,0x02,0x00]
v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, v2, v3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0x02,0x07,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, s1, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x01,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, s1, v3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x02,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0x03,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0x04,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x01,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, s1, v3 op_sel:[0,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x02,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0x03,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0x04,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, 11, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x8b,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, 11, v3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x96,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0xa1,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0xac,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, 11, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x8b,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, 11, v3 op_sel:[0,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x96,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0xa1,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0xac,0x06,0x02,0x00]
v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,1]
Loading

0 comments on commit d34c2cc

Please sign in to comment.