From b970a78335a3d46a6dbdaa762e7285e9a90c969c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 7 Nov 2024 10:40:14 -0800 Subject: [PATCH 01/40] [RISCV][GISel] Remove s32 support for G_CTPOP/CTLZ/CTTZ on RV64. (#115101) I plan to make i32 an illegal type for RV64 to match SelectionDAG and to remove i32 from the GPR register class. I've added 2 custom nodes for CTZW and CLZW to match SelectionDAG. For cpopw we pattern match G_AND+G_CTPOP in isel. --- .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 28 ++++++++-- llvm/lib/Target/RISCV/RISCVGISel.td | 4 -- llvm/lib/Target/RISCV/RISCVInstrGISel.td | 16 ++++++ .../instruction-select/ctlz-rv64.mir | 6 +-- .../instruction-select/ctpop-rv64.mir | 19 ------- .../instruction-select/cttz-rv64.mir | 6 +-- .../legalizer/legalize-ctlz-rv64.mir | 52 +++++++++---------- .../legalizer/legalize-ctpop-rv64.mir | 33 ++++++------ .../legalizer/legalize-cttz-rv64.mir | 40 ++++++-------- llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll | 4 +- 10 files changed, 101 insertions(+), 107 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 34742394a291ed..24528c5cdbdb0b 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -225,7 +225,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) auto &CountZerosUndefActions = getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF}); if (ST.hasStdExtZbb()) { - CountZerosActions.legalFor({{s32, s32}, {sXLen, sXLen}}) + CountZerosActions.legalFor({{sXLen, sXLen}}) + .customFor({{s32, s32}}) .clampScalar(0, s32, sXLen) .widenScalarToNextPow2(0) .scalarSameSizeAs(1, 0); @@ -237,9 +238,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP); if (ST.hasStdExtZbb()) { - CTPOPActions.legalFor({{s32, s32}, {sXLen, sXLen}}) - .clampScalar(0, s32, sXLen) - .widenScalarToNextPow2(0) + CTPOPActions.legalFor({{sXLen, sXLen}}) + .clampScalar(0, sXLen, sXLen) .scalarSameSizeAs(1, 0); } else { CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower(); @@ -1158,6 +1158,17 @@ bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI, return true; } +static unsigned getRISCVWOpcode(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_CTLZ: + return RISCV::G_CLZW; + case TargetOpcode::G_CTTZ: + return RISCV::G_CTZW; + } +} + bool RISCVLegalizerInfo::legalizeCustom( LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const { @@ -1194,6 +1205,15 @@ bool RISCVLegalizerInfo::legalizeCustom( return Helper.lower(MI, 0, /* Unused hint type */ LLT()) == LegalizerHelper::Legalized; } + case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTTZ: { + Helper.Observer.changingInstr(MI); + Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); + Helper.widenScalarDst(MI, sXLen); + MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); + Helper.Observer.changedInstr(MI); + return true; + } case TargetOpcode::G_IS_FPCLASS: { Register GISFPCLASS = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 36881b02da2e40..f312ab1a70ea06 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -264,10 +264,6 @@ def : PatGprGpr; //===----------------------------------------------------------------------===// let Predicates = [HasStdExtZbb, IsRV64] in { -def : PatGpr; -def : PatGpr; -def : PatGpr; - def : Pat<(i32 (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>; def : Pat<(i32 (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td index 763aead84dd8f4..927811eef6618b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td +++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td @@ -17,6 +17,22 @@ class RISCVGenericInstruction : GenericInstruction { let Namespace = "RISCV"; } +// Pseudo equivalent to a RISCVISD::CLZW. +def G_CLZW : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); + let hasSideEffects = false; +} +def : GINodeEquiv; + +// Pseudo equivalent to a RISCVISD::CTZW. +def G_CTZW : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); + let hasSideEffects = false; +} +def : GINodeEquiv; + // Pseudo equivalent to a RISCVISD::FCLASS. def G_FCLASS : RISCVGenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctlz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctlz-rv64.mir index 8c75bdd38d732a..f6e04a9999dbf8 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctlz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctlz-rv64.mir @@ -15,10 +15,8 @@ body: | ; RV64I-NEXT: $x10 = COPY [[CLZW]] ; RV64I-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0 - %2:gprb(s32) = G_CTLZ %1 - %3:gprb(s64) = G_ANYEXT %2 - $x10 = COPY %3(s64) + %1:gprb(s64) = G_CLZW %0 + $x10 = COPY %1(s64) PseudoRET implicit $x10 ... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctpop-rv64.mir index 7d584a8589b901..f91f029209220f 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctpop-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctpop-rv64.mir @@ -3,25 +3,6 @@ # RUN: -simplify-mir -verify-machineinstrs %s -o - \ # RUN: | FileCheck -check-prefix=RV64I %s ---- -name: ctpop_s32 -legalized: true -regBankSelected: true -body: | - bb.0.entry: - ; RV64I-LABEL: name: ctpop_s32 - ; RV64I: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; RV64I-NEXT: [[CPOPW:%[0-9]+]]:gpr = CPOPW [[COPY]] - ; RV64I-NEXT: $x10 = COPY [[CPOPW]] - ; RV64I-NEXT: PseudoRET implicit $x10 - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0 - %2:gprb(s32) = G_CTPOP %1 - %3:gprb(s64) = G_ANYEXT %2 - $x10 = COPY %3(s64) - PseudoRET implicit $x10 - -... --- name: ctpop_s64 legalized: true diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/cttz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/cttz-rv64.mir index b56d45f0993ada..17fb381da6cdbb 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/cttz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/cttz-rv64.mir @@ -15,10 +15,8 @@ body: | ; RV64I-NEXT: $x10 = COPY [[CTZW]] ; RV64I-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0 - %2:gprb(s32) = G_CTTZ %1 - %3:gprb(s64) = G_ANYEXT %2 - $x10 = COPY %3(s64) + %1:gprb(s64) = G_CTZW %0 + $x10 = COPY %1(s64) PseudoRET implicit $x10 ... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir index bc6aafb1e3b2cc..f4ea4f5eb43aa3 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir @@ -57,12 +57,12 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[AND]] + ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[CLZW]](s64) ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) @@ -133,12 +133,12 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[AND]] + ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[CLZW]](s64) ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) @@ -204,10 +204,8 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[TRUNC]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[CTLZ]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[COPY]] + ; RV64ZBB-NEXT: $x10 = COPY [[CLZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %1(s64) @@ -333,12 +331,12 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[AND]] + ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[CLZW]](s64) ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) @@ -409,12 +407,12 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[AND]] + ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[CLZW]](s64) ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) @@ -480,10 +478,8 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[TRUNC]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[CTLZ]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[COPY]] + ; RV64ZBB-NEXT: $x10 = COPY [[CLZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %1(s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir index ec885c170b5b60..48595dc9809c74 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir @@ -46,13 +46,11 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64) + ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64) + ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s8) = G_TRUNC %1(s64) @@ -106,13 +104,11 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64) + ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64) + ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s16) = G_TRUNC %1(s64) @@ -161,10 +157,11 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[TRUNC]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[CTPOP]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64) + ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64) + ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %1(s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir index f8285d609875ba..c3b6d357d241d7 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir @@ -52,10 +52,9 @@ body: | ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; RV64ZBB-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[ANYEXT]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s8) = G_TRUNC %1(s64) @@ -115,10 +114,9 @@ body: | ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; RV64ZBB-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[ANYEXT]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s16) = G_TRUNC %1(s64) @@ -171,10 +169,8 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[TRUNC]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[CTTZ]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[COPY]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %1(s64) @@ -282,10 +278,9 @@ body: | ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; RV64ZBB-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[ANYEXT]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s8) = G_TRUNC %1(s64) @@ -345,10 +340,9 @@ body: | ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; RV64ZBB-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[ANYEXT]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s16) = G_TRUNC %1(s64) @@ -401,10 +395,8 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[TRUNC]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[CTTZ]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[COPY]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %1(s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll index 61c96b325f17fd..77e57e6714d7ba 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll @@ -752,8 +752,6 @@ define i1 @ctpop_i32_ult_two(i32 signext %a) nounwind { ; RV64ZBB-LABEL: ctpop_i32_ult_two: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: cpopw a0, a0 -; RV64ZBB-NEXT: slli a0, a0, 32 -; RV64ZBB-NEXT: srli a0, a0, 32 ; RV64ZBB-NEXT: sltiu a0, a0, 2 ; RV64ZBB-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) @@ -794,6 +792,8 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind { ; RV64ZBB-LABEL: ctpop_i32_load: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: lw a0, 0(a0) +; RV64ZBB-NEXT: slli a0, a0, 32 +; RV64ZBB-NEXT: srli a0, a0, 32 ; RV64ZBB-NEXT: cpopw a0, a0 ; RV64ZBB-NEXT: ret %a = load i32, ptr %p From 22b4b1ab1050b4210f3c5dae54c0503ef7ad85f3 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 7 Nov 2024 10:43:11 -0800 Subject: [PATCH 02/40] Revert "[SLP][REVEC] Make GetMinMaxCost support FixedVectorType when REVEC is enabled. (#114946)" This reverts commit f58757b8dc167809b69ec00f9b5ab59281df0902. Failing buildbots: https://lab.llvm.org/buildbot/#/builders/174/builds/8058 https://lab.llvm.org/buildbot/#/builders/127/builds/1357 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 3 +- .../Transforms/SLPVectorizer/RISCV/revec.ll | 40 ------------------- 2 files changed, 1 insertion(+), 42 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2d34623b8ec5b5..184413b420089a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10980,8 +10980,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, // If the selects are the only uses of the compares, they will be // dead and we can adjust the cost by removing their cost. if (VI && SelectOnly) { - assert((!Ty->isVectorTy() || SLPReVec) && - "Expected only for scalar type."); + assert(!Ty->isVectorTy() && "Expected only for scalar type."); auto *CI = cast(VI->getOperand(0)); IntrinsicCost -= TTI->getCmpSelInstrCost( CI->getOpcode(), Ty, Builder.getInt1Ty(), CI->getPredicate(), diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll index e4dbc5829f6115..3d00ddf89aaa3b 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll @@ -94,43 +94,3 @@ entry: %23 = fcmp ogt <8 x float> zeroinitializer, %19 ret void } - -define void @test3(float %0) { -; CHECK-LABEL: @test3( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] -; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2) -; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> , i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2) -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; -entry: - br label %for.body.lr.ph - -for.body.lr.ph: - br i1 false, label %for.cond.cleanup, label %for.body - -for.cond.cleanup: ; preds = %for.body, %for.body.lr.ph - %1 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %5, %for.body ] - %2 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %6, %for.body ] - ret void - -for.body: - %3 = load <2 x float>, ptr null, align 4 - %4 = fcmp olt <2 x float> zeroinitializer, %3 - %5 = select <2 x i1> , <2 x float> %3, <2 x float> zeroinitializer - %6 = select <2 x i1> %4, <2 x float> %3, <2 x float> zeroinitializer - br label %for.cond.cleanup -} From ef73533f36d5f2132630e88899b5e64999cb8364 Mon Sep 17 00:00:00 2001 From: Zibi Sarbinowski Date: Thu, 7 Nov 2024 13:44:49 -0500 Subject: [PATCH 03/40] [z/OS][libc++] Remove `align_val_t` dependency in small_buffer.h (#114396) Rewriting `__alloc()` and `__dealloc()` template functions to avoid errors when `small_buffer.h` is included in the modules LIT tests. For example: ``` test-suite-install/include/c++/v1/__utility/small_buffer.h:69:81: error: use of undeclared identifier 'align_val_t' # | 69 | byte* __allocation = static_cast(::operator new[](sizeof(_Stored), align_val_t{alignof(_Stored)})); # | | ^ ``` --- libcxx/include/__utility/small_buffer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxx/include/__utility/small_buffer.h b/libcxx/include/__utility/small_buffer.h index 70e068f89f62ed..b44b37e90e7653 100644 --- a/libcxx/include/__utility/small_buffer.h +++ b/libcxx/include/__utility/small_buffer.h @@ -66,7 +66,7 @@ class __small_buffer { if constexpr (__fits_in_buffer<_Stored>) { return std::launder(reinterpret_cast<_Stored*>(__buffer_)); } else { - byte* __allocation = static_cast(::operator new[](sizeof(_Stored), align_val_t{alignof(_Stored)})); + byte* __allocation = static_cast(std::__libcpp_allocate(sizeof(_Stored), alignof(_Stored))); std::construct_at(reinterpret_cast(__buffer_), __allocation); return std::launder(reinterpret_cast<_Stored*>(__allocation)); } @@ -75,7 +75,7 @@ class __small_buffer { template _LIBCPP_HIDE_FROM_ABI void __dealloc() noexcept { if constexpr (!__fits_in_buffer<_Stored>) - ::operator delete[](*reinterpret_cast(__buffer_), sizeof(_Stored), align_val_t{alignof(_Stored)}); + std::__libcpp_deallocate(*reinterpret_cast(__buffer_), sizeof(_Stored), alignof(_Stored)); } template From 7bd9be2e0a74e6d17ec3f95ff364a4461dec4dbe Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 7 Nov 2024 10:54:12 -0800 Subject: [PATCH 04/40] [Driver] Use heterogenous lookups with std::set (NFC) (#115259) Heterogenous lookups allow us to call find with StringRef, avoiding a temporary heap allocation of std::string. --- clang/lib/Driver/ToolChains/HIPUtility.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp index c8075cbfe36b35..3f81c3cb0f80e8 100644 --- a/clang/lib/Driver/ToolChains/HIPUtility.cpp +++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp @@ -148,8 +148,8 @@ class HIPUndefinedFatBinSymbols { bool Verbose; std::set FatBinSymbols; std::set GPUBinHandleSymbols; - std::set DefinedFatBinSymbols; - std::set DefinedGPUBinHandleSymbols; + std::set> DefinedFatBinSymbols; + std::set> DefinedGPUBinHandleSymbols; const std::string FatBinPrefix = "__hip_fatbin"; const std::string GPUBinHandlePrefix = "__hip_gpubin_handle"; @@ -260,11 +260,10 @@ class HIPUndefinedFatBinSymbols { // Add undefined symbols if they are not in the defined sets if (isFatBinSymbol && - DefinedFatBinSymbols.find(Name.str()) == DefinedFatBinSymbols.end()) + DefinedFatBinSymbols.find(Name) == DefinedFatBinSymbols.end()) FatBinSymbols.insert(Name.str()); - else if (isGPUBinHandleSymbol && - DefinedGPUBinHandleSymbols.find(Name.str()) == - DefinedGPUBinHandleSymbols.end()) + else if (isGPUBinHandleSymbol && DefinedGPUBinHandleSymbols.find(Name) == + DefinedGPUBinHandleSymbols.end()) GPUBinHandleSymbols.insert(Name.str()); } } From c714f928b2f9ab3dd481f272a2aa72b83fd0562e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 7 Nov 2024 10:54:35 -0800 Subject: [PATCH 05/40] [InstallAPI] Call DenseMap::find without constructing std::string (NFC) (#115260) KnownIncludes is of DenseMap, so we don't need to allocate a temporary instance of std::string. --- clang/lib/InstallAPI/Frontend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/InstallAPI/Frontend.cpp b/clang/lib/InstallAPI/Frontend.cpp index 2ebe72bf021cf9..9e8c60fbda3d00 100644 --- a/clang/lib/InstallAPI/Frontend.cpp +++ b/clang/lib/InstallAPI/Frontend.cpp @@ -94,7 +94,7 @@ InstallAPIContext::findAndRecordFile(const FileEntry *FE, // included. This is primarily to resolve headers found // in a different location than what passed directly as input. StringRef IncludeName = PP.getHeaderSearchInfo().getIncludeNameForHeader(FE); - auto BackupIt = KnownIncludes.find(IncludeName.str()); + auto BackupIt = KnownIncludes.find(IncludeName); if (BackupIt != KnownIncludes.end()) { KnownFiles[FE] = BackupIt->second; return BackupIt->second; From 937e5069a740837ea3cb466df8e75a53f6d48254 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 7 Nov 2024 10:55:05 -0800 Subject: [PATCH 06/40] [IPO] Simplify code with DenseMap::operator[] (NFC) (#115261) --- llvm/lib/Transforms/IPO/Internalize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp index 0b8fde6489f8e7..4cdd1fa6110627 100644 --- a/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/llvm/lib/Transforms/IPO/Internalize.cpp @@ -176,7 +176,7 @@ void InternalizePass::checkComdat( if (!C) return; - ComdatInfo &Info = ComdatMap.try_emplace(C).first->second; + ComdatInfo &Info = ComdatMap[C]; ++Info.Size; if (shouldPreserveGV(GV)) Info.External = true; From 1ae5ecca4afb5134899d79e446afd0296d1ed5ef Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 7 Nov 2024 10:55:23 -0800 Subject: [PATCH 07/40] [Utils] Avoid repeated hash lookups (NFC) (#115262) --- llvm/lib/Transforms/Utils/Local.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 47a70492559610..768765b6c1e632 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2178,11 +2178,9 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB, auto V = DbgValueMap.find(VI); if (V != DbgValueMap.end()) { auto *DbgII = cast(V->second); - auto NewDI = NewDbgValueMap.find({Parent, DbgII}); - if (NewDI == NewDbgValueMap.end()) { - auto *NewDbgII = cast(DbgII->clone()); - NewDI = NewDbgValueMap.insert({{Parent, DbgII}, NewDbgII}).first; - } + auto [NewDI, Inserted] = NewDbgValueMap.try_emplace({Parent, DbgII}); + if (Inserted) + NewDI->second = cast(DbgII->clone()); DbgVariableIntrinsic *NewDbgII = NewDI->second; // If PHI contains VI as an operand more than once, we may // replaced it in NewDbgII; confirm that it is present. From b02e5bc5b1be9d94689ebe1cf1244b7da540fb19 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 7 Nov 2024 10:58:58 -0800 Subject: [PATCH 08/40] [Transforms] Remove unused includes (NFC) (#115263) Identified with misc-include-cleaner. --- llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp | 2 -- llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 1 - llvm/lib/Transforms/HipStdPar/HipStdPar.cpp | 3 +-- llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp | 1 - llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 1 - llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp | 1 - llvm/lib/Transforms/Scalar/ConstantHoisting.cpp | 1 - llvm/lib/Transforms/Scalar/LoopPredication.cpp | 1 - llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp | 2 -- .../Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp | 2 -- 10 files changed, 1 insertion(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp index 5f19d600a983aa..5e82ed2e98184e 100644 --- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp @@ -20,11 +20,9 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/IR/Analysis.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Utils/CallGraphUpdater.h" #include diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index bb6126026d9058..441a83310c6326 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -16,7 +16,6 @@ //===----------------------------------------------------------------------===// #include "CoroInternal.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/StackLifetime.h" diff --git a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp index b909bf5b2d7b61..92042ddab38dc7 100644 --- a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp +++ b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp @@ -41,13 +41,12 @@ #include "llvm/Transforms/HipStdPar/HipStdPar.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/ModuleUtils.h" diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 2c8b4e76312a0d..1d213e2aeae5a5 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -40,7 +40,6 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/ObjCARC.h" diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index a2434675a7b5ab..5bfbe95fafa05e 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -36,7 +36,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ObjCARCAliasAnalysis.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/Analysis/ObjCARCUtil.h" diff --git a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp index 23855231c5b988..155c9493e838f6 100644 --- a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp +++ b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp @@ -28,7 +28,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index dd37fe2b454138..889c432eef8466 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -64,7 +64,6 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SizeOpts.h" #include -#include #include #include #include diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index 31694ad1fa508a..1797a2f2366afa 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -192,7 +192,6 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ProfDataUtils.h" -#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp index ea2b419b17a59c..d3e0b807716ddb 100644 --- a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp +++ b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp @@ -14,10 +14,8 @@ #include "llvm/Transforms/Scalar/LowerWidenableCondition.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Scalar.h" diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp index 0d928af1902073..1dbdd80117563c 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp @@ -7,10 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Type.h" #include "llvm/SandboxIR/Instruction.h" #include "llvm/SandboxIR/Utils.h" From 4ac891cdd286b251a445c1e3a77d56f55a29858b Mon Sep 17 00:00:00 2001 From: Job Henandez Lara Date: Thu, 7 Nov 2024 11:02:37 -0800 Subject: [PATCH 09/40] [libc] Add the `src/stdlib/_Exit.h` header to `at_quick_exit_test.cpp` and `atexit_test.cpp` (#115351) Hello, I merged this https://github.com/llvm/llvm-project/pull/114904 a few mins ago and the tests failed because i did not add the header `src/stdlib/_Exit.h` in `at_quick_exit_test.cpp` and `atexit_test.cpp`. I ran both builds/tests and everything was good. thanks --- libc/test/src/stdlib/at_quick_exit_test.cpp | 1 + libc/test/src/stdlib/atexit_test.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/libc/test/src/stdlib/at_quick_exit_test.cpp b/libc/test/src/stdlib/at_quick_exit_test.cpp index 1ed5a83a61b8d0..c0aac4d20d92cc 100644 --- a/libc/test/src/stdlib/at_quick_exit_test.cpp +++ b/libc/test/src/stdlib/at_quick_exit_test.cpp @@ -8,6 +8,7 @@ #include "src/__support/CPP/array.h" #include "src/__support/CPP/utility.h" +#include "src/stdlib/_Exit.h" #include "src/stdlib/at_quick_exit.h" #include "src/stdlib/quick_exit.h" #include "test/UnitTest/Test.h" diff --git a/libc/test/src/stdlib/atexit_test.cpp b/libc/test/src/stdlib/atexit_test.cpp index 24f8b0451f3636..c25202ff54f696 100644 --- a/libc/test/src/stdlib/atexit_test.cpp +++ b/libc/test/src/stdlib/atexit_test.cpp @@ -8,6 +8,7 @@ #include "src/__support/CPP/array.h" #include "src/__support/CPP/utility.h" +#include "src/stdlib/_Exit.h" #include "src/stdlib/atexit.h" #include "src/stdlib/exit.h" #include "test/UnitTest/Test.h" From b7a8f5f4c978856852bc39dc3d29265756e37cfe Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 7 Nov 2024 10:41:23 -0800 Subject: [PATCH 10/40] [SLP][NFC]Exit early from attempt-to-reorder, if it is useless Adds early exits, which just save compile time. It can exit earl, if the total number of scalars is 2, or all scalars are constant, or the opcode is the same and not alternate. In this case reordering will not happen and compiler can exit early to save compile time --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 184413b420089a..be7ddeb89e789f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9362,6 +9362,12 @@ void BoUpSLP::reorderGatherNode(TreeEntry &TE) { DenseMap, SmallVector> LoadsMap; SmallSet LoadKeyUsed; + // Do not reorder nodes if it small (just 2 elements), all-constant or all + // instructions have same opcode already. + if (TE.Scalars.size() == 2 || (TE.getOpcode() && !TE.isAltShuffle()) || + all_of(TE.Scalars, isConstant)) + return; + if (any_of(seq(TE.Idx), [&](unsigned Idx) { return VectorizableTree[Idx]->isSame(TE.Scalars); })) From 200afcf6128911892d61c2a331186fe9a4da2a3e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 7 Nov 2024 11:10:01 -0800 Subject: [PATCH 11/40] [RISCV] Add combines_for_extload to RISCVPostLegalizerCombiner. --- llvm/lib/Target/RISCV/RISCVCombine.td | 4 ++-- llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td index a2e67eef03561b..60d942957c8861 100644 --- a/llvm/lib/Target/RISCV/RISCVCombine.td +++ b/llvm/lib/Target/RISCV/RISCVCombine.td @@ -23,6 +23,6 @@ def RISCVO0PreLegalizerCombiner: GICombiner< // TODO: Add more combines. def RISCVPostLegalizerCombiner : GICombiner<"RISCVPostLegalizerCombinerImpl", - [redundant_and, identity_combines, commute_constant_to_rhs, - constant_fold_cast_op]> { + [combines_for_extload, redundant_and, identity_combines, + commute_constant_to_rhs, constant_fold_cast_op]> { } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll index 77e57e6714d7ba..835b4e32ae3206 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll @@ -791,9 +791,7 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind { ; ; RV64ZBB-LABEL: ctpop_i32_load: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: lw a0, 0(a0) -; RV64ZBB-NEXT: slli a0, a0, 32 -; RV64ZBB-NEXT: srli a0, a0, 32 +; RV64ZBB-NEXT: lwu a0, 0(a0) ; RV64ZBB-NEXT: cpopw a0, a0 ; RV64ZBB-NEXT: ret %a = load i32, ptr %p From 60e3a81c4299baf80e7b80db9cb8368223ee9546 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 7 Nov 2024 11:24:00 -0800 Subject: [PATCH 12/40] [lldb] Add builtin e alias for expression (#115359) The changes in 461f859a72 (llvm/llvm-project#65974) resulted in a change in behavior not just for completion, but also for selection of inexect commands. Since many use `e` to mean `expression`, this change adds an alias for `e`. Note that the referenced change similarly aliases `h` to `help`. --- lldb/source/Interpreter/CommandInterpreter.cpp | 2 ++ .../API/functionalities/abbreviation/TestAbbreviations.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index c990972ca64bcf..227ed802aa933c 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -441,6 +441,8 @@ void CommandInterpreter::Initialize() { cmd_obj_sp = GetCommandSPExact("expression"); if (cmd_obj_sp) { + // Ensure `e` runs `expression`. + AddAlias("e", cmd_obj_sp); AddAlias("call", cmd_obj_sp, "--")->SetHelpLong(""); CommandAlias *parray_alias = AddAlias("parray", cmd_obj_sp, "--element-count %1 --"); diff --git a/lldb/test/API/functionalities/abbreviation/TestAbbreviations.py b/lldb/test/API/functionalities/abbreviation/TestAbbreviations.py index 02ee581da516d4..a8cbffbb7ba4a5 100644 --- a/lldb/test/API/functionalities/abbreviation/TestAbbreviations.py +++ b/lldb/test/API/functionalities/abbreviation/TestAbbreviations.py @@ -20,6 +20,10 @@ def test_command_abbreviations_and_aliases(self): self.assertTrue(result.Succeeded()) self.assertEqual("apropos script", result.GetOutput()) + command_interpreter.ResolveCommand("e", result) + self.assertTrue(result.Succeeded()) + self.assertEqual("expression", result.GetOutput()) + command_interpreter.ResolveCommand("h", result) self.assertTrue(result.Succeeded()) self.assertEqual("help", result.GetOutput()) From 3f4df523152054224709ba88e9afd4efa22021c9 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 7 Nov 2024 19:25:18 +0000 Subject: [PATCH 13/40] [gn build] Port 427a5cf105c4 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 8a9aff9c05d185..dc62280d12c666 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -561,6 +561,7 @@ if (current_toolchain == default_toolchain) { "__iterator/segmented_iterator.h", "__iterator/size.h", "__iterator/sortable.h", + "__iterator/static_bounded_iter.h", "__iterator/unreachable_sentinel.h", "__iterator/wrap_iter.h", "__locale", From e9cb9285ced8d914048e0ccaf1900ffc75bdeee4 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 7 Nov 2024 19:25:19 +0000 Subject: [PATCH 14/40] [gn build] Port cacbe71af7b1 --- llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 1dc4c3f14b930a..ba78c2cf9e75fc 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -68,6 +68,7 @@ static_library("Analysis") { "InstructionPrecedenceTracking.cpp", "InstructionSimplify.cpp", "InteractiveModelRunner.cpp", + "LastRunTrackingAnalysis.cpp", "LazyBlockFrequencyInfo.cpp", "LazyBranchProbabilityInfo.cpp", "LazyCallGraph.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn index 38ff30f3fab7d1..2e4fde266c77b8 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn @@ -33,6 +33,7 @@ unittest("AnalysisTests") { "IRSimilarityIdentifierTest.cpp", "IVDescriptorsTest.cpp", "InlineCostTest.cpp", + "LastRunTrackingAnalysisTest.cpp", "LazyCallGraphTest.cpp", "LoadsTest.cpp", "LoopInfoTest.cpp", From 64c921875a833136e7417c9077f55cc0c37773b7 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 7 Nov 2024 19:25:20 +0000 Subject: [PATCH 15/40] [gn build] Port d2aff182d379 --- llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn index e297aa9da4e50d..28efe0db6a82c9 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn @@ -88,7 +88,6 @@ static_library("Scalar") { "SpeculativeExecution.cpp", "StraightLineStrengthReduce.cpp", "StructurizeCFG.cpp", - "TLSVariableHoist.cpp", "TailRecursionElimination.cpp", "WarnMissedTransforms.cpp", ] From 3deee235986802694175259e078dfad0edcb40ed Mon Sep 17 00:00:00 2001 From: Ian Wood <75152913+IanWood1@users.noreply.github.com> Date: Thu, 7 Nov 2024 12:14:29 -0800 Subject: [PATCH 16/40] [mlir] IntegerRangeAnalysis: don't loop over splat attr (#115229) If the `DenseIntElementsAttr` is a splat value, there is no need to loop over the entire attr. Instead, just update with the splat value. --- mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp b/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp index 8682294c8a6972..59c9759d35393f 100644 --- a/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp +++ b/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp @@ -40,6 +40,11 @@ void arith::ConstantOp::inferResultRanges(ArrayRef argRanges, setResultRange(getResult(), ConstantIntRanges::constant(value)); return; } + if (auto splatAttr = llvm::dyn_cast_or_null(getValue())) { + setResultRange(getResult(), ConstantIntRanges::constant( + splatAttr.getSplatValue())); + return; + } if (auto arrayCstAttr = llvm::dyn_cast_or_null(getValue())) { std::optional result; From 1fef4ad188dfad0e39f93e4b0330780118f27305 Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Thu, 7 Nov 2024 15:18:32 -0500 Subject: [PATCH 17/40] [AMDGPU][True16][MC] update true16 flag on vinterp test (#115356) A non-funcitonal change. update true16 flag on vinterp dasm test --- .../{vinterp-fake16.txt => vinterp.txt} | 173 +++++++++--------- 1 file changed, 88 insertions(+), 85 deletions(-) rename llvm/test/MC/Disassembler/AMDGPU/{vinterp-fake16.txt => vinterp.txt} (60%) diff --git a/llvm/test/MC/Disassembler/AMDGPU/vinterp-fake16.txt b/llvm/test/MC/Disassembler/AMDGPU/vinterp.txt similarity index 60% rename from llvm/test/MC/Disassembler/AMDGPU/vinterp-fake16.txt rename to llvm/test/MC/Disassembler/AMDGPU/vinterp.txt index 239f1d8b3058da..0e19f39764e7f8 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/vinterp-fake16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/vinterp.txt @@ -1,252 +1,255 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefix=CHECK %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefix=CHECK %s +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK %s -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0 # Check that unused bits in the encoding are ignored. -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x80,0xcd,0x01,0x05,0x0e,0x1c +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f32 v1, v10, v20, v30 wait_exp:0{{$}} 0x01,0x00,0x00,0xcd,0x0a,0x29,0x7a,0x04 +# CHECK: v_interp_p10_f32 v1, v10, v20, v30 wait_exp:0 -# CHECK: v_interp_p10_f32 v2, v11, v21, v31 wait_exp:0{{$}} 0x02,0x00,0x00,0xcd,0x0b,0x2b,0x7e,0x04 +# CHECK: v_interp_p10_f32 v2, v11, v21, v31 wait_exp:0 -# CHECK: v_interp_p10_f32 v3, v12, v22, v32 wait_exp:0{{$}} 0x03,0x00,0x00,0xcd,0x0c,0x2d,0x82,0x04 +# CHECK: v_interp_p10_f32 v3, v12, v22, v32 wait_exp:0 -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x00,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p10_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p10_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p10_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p10_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x00,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x00,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:7{{$}} 0x00,0x87,0x00,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:7 -# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f32 v1, v10, v20, v30 wait_exp:0{{$}} 0x01,0x00,0x01,0xcd,0x0a,0x29,0x7a,0x04 +# CHECK: v_interp_p2_f32 v1, v10, v20, v30 wait_exp:0 -# CHECK: v_interp_p2_f32 v2, v11, v21, v31 wait_exp:0{{$}} 0x02,0x00,0x01,0xcd,0x0b,0x2b,0x7e,0x04 +# CHECK: v_interp_p2_f32 v2, v11, v21, v31 wait_exp:0 -# CHECK: v_interp_p2_f32 v3, v12, v22, v32 wait_exp:0{{$}} 0x03,0x00,0x01,0xcd,0x0c,0x2d,0x82,0x04 +# CHECK: v_interp_p2_f32 v3, v12, v22, v32 wait_exp:0 -# CHECK: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x01,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p2_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p2_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p2_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p2_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x01,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x01,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:7{{$}} 0x00,0x87,0x01,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:7 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p10_f16_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p10_f16_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0{{$}} 0x00,0x08,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0{{$}} 0x00,0x10,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0{{$}} 0x00,0x20,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0{{$}} 0x00,0x40,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0{{$}} 0x00,0x78,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0x4d,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p10_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x02,0xcd,0x01,0x05,0x0e,0xe4 +# CHECK: v_interp_p10_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p2_f16_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p2_f16_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0{{$}} 0x00,0x08,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0{{$}} 0x00,0x10,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0{{$}} 0x00,0x20,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0{{$}} 0x00,0x40,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0{{$}} 0x00,0x78,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0x4d,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0xe4 +# CHECK: v_interp_p2_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p10_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0{{$}} 0x00,0x08,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0{{$}} 0x00,0x10,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0{{$}} 0x00,0x20,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0{{$}} 0x00,0x40,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0{{$}} 0x00,0x78,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0x4d,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p10_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x04,0xcd,0x01,0x05,0x0e,0xe4 +# CHECK: v_interp_p10_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p2_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0{{$}} 0x00,0x08,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0{{$}} 0x00,0x10,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0{{$}} 0x00,0x20,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0{{$}} 0x00,0x40,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0{{$}} 0x00,0x78,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0x4d,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0xe4 +# CHECK: v_interp_p2_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 From 09fb01a5e564a0cb7c121e1cc529e9aa30d95108 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 7 Nov 2024 15:32:42 -0500 Subject: [PATCH 18/40] [gn build] Enable hwasan for aarch64 Android (#115219) --- llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn | 3 +++ .../gn/secondary/compiler-rt/lib/hwasan/BUILD.gn | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn index 57b86f53254f57..58f76d3def3db3 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn @@ -10,6 +10,9 @@ group("lib") { if (current_os == "linux" || current_os == "android") { deps += [ "//compiler-rt/lib/ubsan_minimal" ] } + if (current_os == "android" && current_cpu == "arm64") { + deps += [ "//compiler-rt/lib/hwasan" ] + } if (current_os != "baremetal") { deps += [ "//compiler-rt/lib/asan", diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn index e39d8114d1f473..0f1d3d2a50e34c 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn @@ -11,7 +11,7 @@ gen_version_script("version_script") { extra = "hwasan.syms.extra" output = "$target_gen_dir/hwasan.vers" libs = [ - ":hwasan", + ":hwasan_static", ":hwasan_cxx", ] lib_names = [ @@ -88,7 +88,7 @@ source_set("cxx_sources") { sources = [ "hwasan_new_delete.cpp" ] } -static_library("hwasan") { +static_library("hwasan_static") { output_dir = crt_current_out_dir output_name = "clang_rt.$hwasan_name$crt_current_target_suffix" complete_static_lib = true @@ -140,3 +140,13 @@ static_library("hwasan_preinit") { configs += [ "//llvm/utils/gn/build:crt_code" ] sources = [ "hwasan_preinit.cpp" ] } + +group("hwasan") { + deps = [ + ":hwasan_preinit", + ":hwasan_shared", + ":hwasan_static", + ":hwasan_cxx", + ":version_script", + ] +} From 15d1560ea4047a2b4b14c826767089f538ddda70 Mon Sep 17 00:00:00 2001 From: Chinmay Deshpande Date: Thu, 7 Nov 2024 13:01:58 -0800 Subject: [PATCH 19/40] [Clang] Improve EmitClangAttrSpellingListIndex (#114899) `EmitClangAttrSpellingListIndex()` performs a lot of unnecessary string comparisons which is wasteful in time and stack space. This commit attempts to refactor this method to be more performant. --- .../include/clang/Basic/AttributeCommonInfo.h | 1 + clang/lib/Basic/Attributes.cpp | 31 +++++++++- clang/utils/TableGen/ClangAttrEmitter.cpp | 59 ++++++++++++++++--- 3 files changed, 80 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index 5f024b4b5fd782..11c64547721739 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -67,6 +67,7 @@ class AttributeCommonInfo { IgnoredAttribute, UnknownAttribute, }; + enum class Scope { NONE, CLANG, GNU, MSVC, OMP, HLSL, GSL, RISCV }; private: const IdentifierInfo *AttrName = nullptr; diff --git a/clang/lib/Basic/Attributes.cpp b/clang/lib/Basic/Attributes.cpp index 867d241a2cf847..2d18fb3f9d5bb2 100644 --- a/clang/lib/Basic/Attributes.cpp +++ b/clang/lib/Basic/Attributes.cpp @@ -17,6 +17,8 @@ #include "clang/Basic/ParsedAttrInfo.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/StringMap.h" + using namespace clang; static int hasAttributeImpl(AttributeCommonInfo::Syntax Syntax, StringRef Name, @@ -153,12 +155,37 @@ std::string AttributeCommonInfo::getNormalizedFullName() const { normalizeName(getAttrName(), getScopeName(), getSyntax())); } +// Sorted list of attribute scope names +static constexpr std::pair ScopeList[] = + {{"", AttributeCommonInfo::Scope::NONE}, + {"clang", AttributeCommonInfo::Scope::CLANG}, + {"gnu", AttributeCommonInfo::Scope::GNU}, + {"gsl", AttributeCommonInfo::Scope::GSL}, + {"hlsl", AttributeCommonInfo::Scope::HLSL}, + {"msvc", AttributeCommonInfo::Scope::MSVC}, + {"omp", AttributeCommonInfo::Scope::OMP}, + {"riscv", AttributeCommonInfo::Scope::RISCV}}; + +AttributeCommonInfo::Scope +getScopeFromNormalizedScopeName(StringRef ScopeName) { + auto It = std::lower_bound( + std::begin(ScopeList), std::end(ScopeList), ScopeName, + [](const std::pair &Element, + StringRef Value) { return Element.first < Value; }); + assert(It != std::end(ScopeList) && It->first == ScopeName); + + return It->second; +} + unsigned AttributeCommonInfo::calculateAttributeSpellingListIndex() const { // Both variables will be used in tablegen generated // attribute spell list index matching code. auto Syntax = static_cast(getSyntax()); - StringRef Scope = normalizeAttrScopeName(getScopeName(), Syntax); - StringRef Name = normalizeAttrName(getAttrName(), Scope, Syntax); + StringRef ScopeName = normalizeAttrScopeName(getScopeName(), Syntax); + StringRef Name = normalizeAttrName(getAttrName(), ScopeName, Syntax); + + AttributeCommonInfo::Scope ComputedScope = + getScopeFromNormalizedScopeName(ScopeName); #include "clang/Sema/AttrSpellingListIndex.inc" } diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index 5a80c8c0b7ad36..932cf25f6a7c26 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSwitch.h" @@ -3843,19 +3844,59 @@ void EmitClangAttrSpellingListIndex(const RecordKeeper &Records, const Record &R = *I.second; std::vector Spellings = GetFlattenedSpellings(R); OS << " case AT_" << I.first << ": {\n"; - for (unsigned I = 0; I < Spellings.size(); ++ I) { - OS << " if (Name == \"" << Spellings[I].name() << "\" && " - << "getSyntax() == AttributeCommonInfo::AS_" << Spellings[I].variety() - << " && Scope == \"" << Spellings[I].nameSpace() << "\")\n" - << " return " << I << ";\n"; + + // If there are none or one spelling to check, resort to the default + // behavior of returning index as 0. + if (Spellings.size() <= 1) { + OS << " return 0;\n" + << " break;\n" + << " }\n"; + continue; } - OS << " break;\n"; - OS << " }\n"; + std::vector Names; + llvm::transform(Spellings, std::back_inserter(Names), + [](const FlattenedSpelling &FS) { return FS.name(); }); + llvm::sort(Names); + Names.erase(llvm::unique(Names), Names.end()); + + for (const auto &[Idx, FS] : enumerate(Spellings)) { + OS << " if ("; + if (Names.size() > 1) { + SmallVector SameLenNames; + llvm::copy_if( + Names, std::back_inserter(SameLenNames), + [&](StringRef N) { return N.size() == FS.name().size(); }); + + if (SameLenNames.size() == 1) { + OS << "Name.size() == " << FS.name().size() << " && "; + } else { + // FIXME: We currently fall back to comparing entire strings if there + // are 2 or more spelling names with the same length. This can be + // optimized to check only for the the first differing character + // between them instead. + OS << "Name == \"" << FS.name() << "\"" + << " && "; + } + } + + OS << "getSyntax() == AttributeCommonInfo::AS_" << FS.variety() + << " && ComputedScope == "; + if (FS.nameSpace() == "") + OS << "AttributeCommonInfo::Scope::NONE"; + else + OS << "AttributeCommonInfo::Scope::" + FS.nameSpace().upper(); + + OS << ")\n" + << " return " << Idx << ";\n"; + } + + OS << " break;\n" + << " }\n"; } - OS << " }\n"; - OS << " return 0;\n"; + OS << " }\n" + << " return 0;\n"; } // Emits code used by RecursiveASTVisitor to visit attributes From dd1c99bac4dc1d5ceeadc79dd31fa12f3e615f18 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 7 Nov 2024 13:12:40 -0800 Subject: [PATCH 20/40] [TableGen] Fix a warning This patch fixes: clang/utils/TableGen/ClangAttrEmitter.cpp:3869:51: error: captured structured bindings are a C++20 extension [-Werror,-Wc++20-extensions] --- clang/utils/TableGen/ClangAttrEmitter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index 932cf25f6a7c26..b450989aeeddca 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -3864,9 +3864,10 @@ void EmitClangAttrSpellingListIndex(const RecordKeeper &Records, OS << " if ("; if (Names.size() > 1) { SmallVector SameLenNames; + StringRef FSName = FS.name(); llvm::copy_if( Names, std::back_inserter(SameLenNames), - [&](StringRef N) { return N.size() == FS.name().size(); }); + [&](StringRef N) { return N.size() == FSName.size(); }); if (SameLenNames.size() == 1) { OS << "Name.size() == " << FS.name().size() << " && "; From 1f2509993e6e0717b547b5214b06550af4f3008f Mon Sep 17 00:00:00 2001 From: joaosaffran <126493771+joaosaffran@users.noreply.github.com> Date: Thu, 7 Nov 2024 13:15:58 -0800 Subject: [PATCH 21/40] [DirectX] introducing lowering for `bufferUpdateCounter` (#115041) - Adding custom lowering for `bufferUpdateCounter` - introduces llvm intrinsic `int_dx_updateCounter` - adds tests Closes #92147 --------- Co-authored-by: Joao Saffran --- llvm/include/llvm/IR/IntrinsicsDirectX.td | 3 ++ llvm/lib/Target/DirectX/DXIL.td | 7 ++++ llvm/lib/Target/DirectX/DXILOpLowering.cpp | 25 +++++++++++++ llvm/test/CodeGen/DirectX/updateCounter.ll | 41 ++++++++++++++++++++++ 4 files changed, 76 insertions(+) create mode 100644 llvm/test/CodeGen/DirectX/updateCounter.ll diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 43267033f024a7..c181424a6e95bf 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -35,6 +35,9 @@ def int_dx_typedBufferLoad_checkbit def int_dx_typedBufferStore : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>; +def int_dx_updateCounter + : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i8_ty]>; + // Cast between target extension handle types and dxil-style opaque handles def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>; diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 1aabff90e5ec6e..efaf96112c75d6 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -754,6 +754,13 @@ def BufferStore : DXILOp<69, bufferStore> { let stages = [Stages]; } +def UpdateCounter : DXILOp<70, bufferUpdateCounter> { + let Doc = "increments/decrements a buffer counter"; + let arguments = [HandleTy, Int8Ty]; + let result = VoidTy; + let stages = [Stages]; +} + def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> { let Doc = "checks whether a Sample, Gather, or Load operation " "accessed mapped tiles in a tiled resource"; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 8acc9c1efa08c0..0dd3a8dc1ad4ce 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -463,6 +463,28 @@ class OpLowerer { }); } + [[nodiscard]] bool lowerUpdateCounter(Function &F) { + IRBuilder<> &IRB = OpBuilder.getIRB(); + + return replaceFunction(F, [&](CallInst *CI) -> Error { + IRB.SetInsertPoint(CI); + Value *Handle = + createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); + Value *Op1 = CI->getArgOperand(1); + + std::array Args{Handle, Op1}; + + Expected OpCall = + OpBuilder.tryCreateOp(OpCode::UpdateCounter, Args, CI->getName()); + + if (Error E = OpCall.takeError()) + return E; + + CI->eraseFromParent(); + return Error::success(); + }); + } + [[nodiscard]] bool lowerTypedBufferStore(Function &F) { IRBuilder<> &IRB = OpBuilder.getIRB(); Type *Int8Ty = IRB.getInt8Ty(); @@ -600,6 +622,9 @@ class OpLowerer { case Intrinsic::dx_typedBufferStore: HasErrors |= lowerTypedBufferStore(F); break; + case Intrinsic::dx_updateCounter: + HasErrors |= lowerUpdateCounter(F); + break; // TODO: this can be removed when // https://github.com/llvm/llvm-project/issues/113192 is fixed case Intrinsic::dx_splitdouble: diff --git a/llvm/test/CodeGen/DirectX/updateCounter.ll b/llvm/test/CodeGen/DirectX/updateCounter.ll new file mode 100644 index 00000000000000..68ea1e9eac9d50 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/updateCounter.ll @@ -0,0 +1,41 @@ +; RUN: opt -S -dxil-op-lower %s | FileCheck %s + + +target triple = "dxil-pc-shadermodel6.6-compute" + + ; CHECK-LABEL: define void @update_counter_decrement_vector() { +define void @update_counter_decrement_vector() { + ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, + %buffer = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]] + ; CHECK-NEXT: call void @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1) + call void @llvm.dx.updateCounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 -1) + ret void +} + + ; CHECK-LABEL: define void @update_counter_increment_vector() { +define void @update_counter_increment_vector() { + ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, + %buffer = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]] + ; CHECK-NEXT: call void @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 1) + call void @llvm.dx.updateCounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 1) + ret void +} + +; CHECK-LABEL: define void @update_counter_decrement_scalar() { +define void @update_counter_decrement_scalar() { + ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, + %buffer = call target("dx.RawBuffer", i8, 0, 0) + @llvm.dx.handle.fromBinding.tdx.RawBuffer_i8_0_0t( + i32 1, i32 8, i32 1, i32 0, i1 false) + ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]] + ; CHECK-NEXT: call void @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1) + call void @llvm.dx.updateCounter(target("dx.RawBuffer", i8, 0, 0) %buffer, i8 -1) + ret void +} From 87feafc391ab1e35997994ad378af727e4947c67 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 7 Nov 2024 13:24:22 -0800 Subject: [PATCH 22/40] [RISCV][GISel] Custom promote s32 G_ROTL/ROTR on RV64. (#115107) I plan to make i32 an illegal type for RV64 to match SelectionDAG and to remove i32 from the GPR register class. RORW/ROLW target opcodes are added to match SelectionDAG. The regression in rv64zbb-zbkb.ll requires factoring isSExtCheaperThanZExt into the G_ANYEXT constant folder. That requires some interface changes so I didn't do it in this patch. --- .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 19 +++++++++-- llvm/lib/Target/RISCV/RISCVGISel.td | 8 ----- llvm/lib/Target/RISCV/RISCVInstrGISel.td | 16 +++++++++ .../instruction-select/rotate-rv64.mir | 34 +++++++------------ .../legalizer/legalize-rotate-rv64.mir | 14 +++----- .../CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll | 8 +++-- 6 files changed, 55 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 24528c5cdbdb0b..632c549abca52d 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -203,8 +203,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower(); getActionDefinitionsBuilder({G_ROTL, G_ROTR}) - .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), - {{s32, s32}, {sXLen, sXLen}}) + .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), {{sXLen, sXLen}}) + .customFor(ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()), + {{s32, s32}}) .lower(); getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower(); @@ -1162,6 +1163,10 @@ static unsigned getRISCVWOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_ROTL: + return RISCV::G_ROLW; + case TargetOpcode::G_ROTR: + return RISCV::G_RORW; case TargetOpcode::G_CTLZ: return RISCV::G_CLZW; case TargetOpcode::G_CTTZ: @@ -1205,6 +1210,16 @@ bool RISCVLegalizerInfo::legalizeCustom( return Helper.lower(MI, 0, /* Unused hint type */ LLT()) == LegalizerHelper::Legalized; } + case TargetOpcode::G_ROTL: + case TargetOpcode::G_ROTR: { + Helper.Observer.changingInstr(MI); + Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); + Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT); + Helper.widenScalarDst(MI, sXLen); + MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); + Helper.Observer.changedInstr(MI); + return true; + } case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: { Helper.Observer.changingInstr(MI); diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index f312ab1a70ea06..e3267642ceeed0 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -274,14 +274,6 @@ let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in { def : Pat<(i32 (and GPR:$rs1, (not GPR:$rs2))), (ANDN GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (or GPR:$rs1, (not GPR:$rs2))), (ORN GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (xor GPR:$rs1, (not GPR:$rs2))), (XNOR GPR:$rs1, GPR:$rs2)>; - -def : PatGprGpr; -def : PatGprGpr; -def : Pat<(i32 (rotr GPR:$rs1, uimm5i32:$imm)), - (RORIW GPR:$rs1, (i64 (as_i64imm $imm)))>; - -def : Pat<(i32 (rotl GPR:$rs1, uimm5i32:$rs2)), - (RORIW GPR:$rs1, (ImmSubFrom32 uimm5i32:$rs2))>; } // Predicates = [HasStdExtZbbOrZbkb, IsRV64] let Predicates = [HasStdExtZba, IsRV64] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td index 927811eef6618b..424623360d2556 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td +++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td @@ -17,6 +17,22 @@ class RISCVGenericInstruction : GenericInstruction { let Namespace = "RISCV"; } +// Pseudo equivalent to a RISCVISD::RORW. +def G_RORW : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2); + let hasSideEffects = false; +} +def : GINodeEquiv; + +// Pseudo equivalent to a RISCVISD::ROLW. +def G_ROLW : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2); + let hasSideEffects = false; +} +def : GINodeEquiv; + // Pseudo equivalent to a RISCVISD::CLZW. def G_CLZW : RISCVGenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir index 50b96e0ee972e6..edf7ef2203cbff 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir @@ -22,12 +22,9 @@ body: | ; CHECK-NEXT: $x10 = COPY [[ROLW]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:gprb(s64) = COPY $x11 - %6:gprb(s32) = G_TRUNC %2(s64) - %4:gprb(s32) = G_ROTL %1, %6(s32) - %5:gprb(s64) = G_ANYEXT %4(s32) - $x10 = COPY %5(s64) + %1:gprb(s64) = COPY $x11 + %2:gprb(s64) = G_ROLW %0, %1(s64) + $x10 = COPY %2(s64) PseudoRET implicit $x10 ... @@ -72,12 +69,9 @@ body: | ; CHECK-NEXT: $x10 = COPY [[RORW]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:gprb(s64) = COPY $x11 - %6:gprb(s32) = G_TRUNC %2(s64) - %4:gprb(s32) = G_ROTR %1, %6(s32) - %5:gprb(s64) = G_ANYEXT %4(s32) - $x10 = COPY %5(s64) + %1:gprb(s64) = COPY $x11 + %2:gprb(s64) = G_RORW %0, %1(s64) + $x10 = COPY %2(s64) PseudoRET implicit $x10 ... @@ -121,11 +115,9 @@ body: | ; CHECK-NEXT: $x10 = COPY [[RORIW]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:gprb(s32) = G_CONSTANT i32 15 - %3:gprb(s32) = G_ROTL %1, %2(s32) - %4:gprb(s64) = G_ANYEXT %3(s32) - $x10 = COPY %4(s64) + %1:gprb(s64) = G_CONSTANT i64 15 + %2:gprb(s64) = G_ROLW %0, %1(s64) + $x10 = COPY %2(s64) PseudoRET implicit $x10 ... @@ -169,11 +161,9 @@ body: | ; CHECK-NEXT: $x10 = COPY [[RORIW]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:gprb(s32) = G_CONSTANT i32 15 - %3:gprb(s32) = G_ROTR %1, %2(s32) - %4:gprb(s64) = G_ANYEXT %3(s32) - $x10 = COPY %4(s64) + %1:gprb(s64) = G_CONSTANT i64 15 + %2:gprb(s64) = G_RORW %0, %1(s64) + $x10 = COPY %2(s64) PseudoRET implicit $x10 ... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-rotate-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-rotate-rv64.mir index 2334fe1015e2f6..a0d23d891b14a4 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-rotate-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-rotate-rv64.mir @@ -109,12 +109,9 @@ body: | ; RV64ZBB_OR_RV64ZBKB: liveins: $x10, $x11 ; RV64ZBB_OR_RV64ZBKB-NEXT: {{ $}} ; RV64ZBB_OR_RV64ZBKB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB_OR_RV64ZBKB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[ROTL:%[0-9]+]]:_(s32) = G_ROTL [[TRUNC]], [[TRUNC1]](s32) - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ROTL]](s32) - ; RV64ZBB_OR_RV64ZBKB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB_OR_RV64ZBKB-NEXT: [[ROLW:%[0-9]+]]:_(s64) = G_ROLW [[COPY]], [[COPY1]] + ; RV64ZBB_OR_RV64ZBKB-NEXT: $x10 = COPY [[ROLW]](s64) ; RV64ZBB_OR_RV64ZBKB-NEXT: PseudoRET implicit $x10 %2:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %2(s64) @@ -268,12 +265,9 @@ body: | ; RV64ZBB_OR_RV64ZBKB: liveins: $x10, $x11 ; RV64ZBB_OR_RV64ZBKB-NEXT: {{ $}} ; RV64ZBB_OR_RV64ZBKB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB_OR_RV64ZBKB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[TRUNC]], [[TRUNC1]](s32) - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ROTR]](s32) - ; RV64ZBB_OR_RV64ZBKB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB_OR_RV64ZBKB-NEXT: [[RORW:%[0-9]+]]:_(s64) = G_RORW [[COPY]], [[COPY1]] + ; RV64ZBB_OR_RV64ZBKB-NEXT: $x10 = COPY [[RORW]](s64) ; RV64ZBB_OR_RV64ZBKB-NEXT: PseudoRET implicit $x10 %2:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %2(s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll index 3d78d15057ba41..d9b7f16131c352 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll @@ -166,7 +166,9 @@ define signext i32 @rol_i32_neg_constant_rhs(i32 signext %a) nounwind { ; ; RV64ZBB-ZBKB-LABEL: rol_i32_neg_constant_rhs: ; RV64ZBB-ZBKB: # %bb.0: -; RV64ZBB-ZBKB-NEXT: li a1, -2 +; RV64ZBB-ZBKB-NEXT: li a1, 1 +; RV64ZBB-ZBKB-NEXT: slli a1, a1, 32 +; RV64ZBB-ZBKB-NEXT: addi a1, a1, -2 ; RV64ZBB-ZBKB-NEXT: rolw a0, a1, a0 ; RV64ZBB-ZBKB-NEXT: ret %1 = tail call i32 @llvm.fshl.i32(i32 -2, i32 -2, i32 %a) @@ -250,7 +252,9 @@ define signext i32 @ror_i32_neg_constant_rhs(i32 signext %a) nounwind { ; ; RV64ZBB-ZBKB-LABEL: ror_i32_neg_constant_rhs: ; RV64ZBB-ZBKB: # %bb.0: -; RV64ZBB-ZBKB-NEXT: li a1, -2 +; RV64ZBB-ZBKB-NEXT: li a1, 1 +; RV64ZBB-ZBKB-NEXT: slli a1, a1, 32 +; RV64ZBB-ZBKB-NEXT: addi a1, a1, -2 ; RV64ZBB-ZBKB-NEXT: rorw a0, a1, a0 ; RV64ZBB-ZBKB-NEXT: ret %1 = tail call i32 @llvm.fshr.i32(i32 -2, i32 -2, i32 %a) From de41b137ddb68b5172f1ab042b0b0b495afbb490 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 7 Nov 2024 13:35:29 -0800 Subject: [PATCH 23/40] [Offload] Provide a CMake cache file to easily build offloading (#115074) Summary: This patch adds a cache file that will automatically enable openpm, offload, and all the fancy GPU libraries. --- offload/cmake/caches/Offload.cmake | 12 ++++++++++++ openmp/docs/SupportAndFAQ.rst | 18 +++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 offload/cmake/caches/Offload.cmake diff --git a/offload/cmake/caches/Offload.cmake b/offload/cmake/caches/Offload.cmake new file mode 100644 index 00000000000000..57363e99b10afc --- /dev/null +++ b/offload/cmake/caches/Offload.cmake @@ -0,0 +1,12 @@ +set(LLVM_ENABLE_PROJECTS "clang;clang-tools-extra;compiler-rt;lld" CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "libunwind;libcxx;libcxxabi;openmp;offload" CACHE STRING "") +set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "") + +set(CLANG_DEFAULT_CXX_STDLIB "libc++" CACHE STRING "") +set(CLANG_DEFAULT_LINKER "lld" CACHE STRING "") + +set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "") +set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "") +set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "") +set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;libcxx;libcxxabi" CACHE STRING "") +set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;libcxx;libcxxabi" CACHE STRING "") diff --git a/openmp/docs/SupportAndFAQ.rst b/openmp/docs/SupportAndFAQ.rst index dee707cf50f919..b645723dcfd5ec 100644 --- a/openmp/docs/SupportAndFAQ.rst +++ b/openmp/docs/SupportAndFAQ.rst @@ -51,7 +51,23 @@ All patches go through the regular `LLVM review process Q: How to build an OpenMP GPU offload capable compiler? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To build an *effective* OpenMP offload capable compiler, only one extra CMake + +The easiest way to create an offload capable compiler is to use the provided +CMake cache file. This will enable the projects and runtimes necessary for +offloading as well as some extra options. + +.. code-block:: sh + + $> cd llvm-project # The llvm-project checkout + $> mkdir build + $> cd build + $> cmake ../llvm -G Ninja \ + -C ../offload/cmake/caches/Offload.cmake \ # The preset cache file + -DCMAKE_BUILD_TYPE= \ # Select build type + -DCMAKE_INSTALL_PREFIX= \ # Where the libraries will live + $> ninja install + +To manually build an *effective* OpenMP offload capable compiler, only one extra CMake option, ``LLVM_ENABLE_RUNTIMES="openmp;offload"``, is needed when building LLVM (Generic information about building LLVM is available `here `__.). Make sure all backends that From e109c493210572535de25950e7b83f74b8d11a6a Mon Sep 17 00:00:00 2001 From: Pranav Kant Date: Thu, 7 Nov 2024 13:53:14 -0800 Subject: [PATCH 24/40] =?UTF-8?q?Revert=20"Add=20clang::lifetimebound=20an?= =?UTF-8?q?notation=20to=20llvm::function=5Fref=20(#1=E2=80=A6=20(#115376)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …15019)" This reverts commit 9f796159f28775b3f93d77e173c1fd3413c2e60e. This is breaking compiler-rt/lib/sanitizer_common/... Author knows about the breakage. --- llvm/include/llvm/ADT/STLFunctionalExtras.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/include/llvm/ADT/STLFunctionalExtras.h b/llvm/include/llvm/ADT/STLFunctionalExtras.h index 3b9d40959d7142..6f172504b3c167 100644 --- a/llvm/include/llvm/ADT/STLFunctionalExtras.h +++ b/llvm/include/llvm/ADT/STLFunctionalExtras.h @@ -16,7 +16,6 @@ #define LLVM_ADT_STLFUNCTIONALEXTRAS_H #include "llvm/ADT/STLForwardCompat.h" -#include "llvm/Support/Compiler.h" #include #include @@ -53,7 +52,7 @@ class function_ref { template function_ref( - Callable &&callable LLVM_LIFETIME_BOUND, + Callable &&callable, // This is not the copy-constructor. std::enable_if_t, function_ref>::value> * = nullptr, From dec38399795a7f238508ee100e5b057165724a60 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 7 Nov 2024 13:36:20 -0800 Subject: [PATCH 25/40] [SLP][NFC]Add a test with the missed vectorization opportunity for stores with same address --- .../RISCV/repeated-address-store.ll | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/RISCV/repeated-address-store.ll diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/repeated-address-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/repeated-address-store.ll new file mode 100644 index 00000000000000..f126192271cd95 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/repeated-address-store.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux -mattr=+v < %s | FileCheck %s + +define void @test(ptr %dest) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[DEST:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[INC3:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 3 +; CHECK-NEXT: store i32 1, ptr [[INC3]], align 2 +; CHECK-NEXT: store i32 1, ptr [[DEST]], align 4 +; CHECK-NEXT: [[INC1:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 1 +; CHECK-NEXT: store i32 1, ptr [[INC1]], align 2 +; CHECK-NEXT: [[INC2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 2 +; CHECK-NEXT: store i32 1, ptr [[INC2]], align 2 +; CHECK-NEXT: store i32 2, ptr [[DEST]], align 2 +; CHECK-NEXT: store i32 1, ptr [[INC3]], align 2 +; CHECK-NEXT: ret void +; +entry: + %inc3 = getelementptr inbounds i32, ptr %dest, i64 3 + store i32 1, ptr %inc3, align 2 + + store i32 1, ptr %dest, align 4 + %inc1 = getelementptr inbounds i32, ptr %dest, i64 1 + store i32 1, ptr %inc1, align 2 + %inc2 = getelementptr inbounds i32, ptr %dest, i64 2 + store i32 1, ptr %inc2, align 2 + store i32 2, ptr %dest, align 2 + store i32 1, ptr %inc3, align 2 + ret void +} From ae9d0623ad65d84022bb4ed8446b6491451ae575 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 7 Nov 2024 13:57:11 -0800 Subject: [PATCH 26/40] [RISCV][GISel] Remove s32 input support for G_SITOFP/UITOFP on RV64. (#115236) I plan to make i32 an illegal type for RV64 to match SelectionDAG and to remove i32 from the GPR register class. I've added a sexti32 ComplexPattern to select sext.w+fcvt.s.l as fcvt.s.w. The recently added zexti32 handles selecting and+fcvt.s.lu as fcvt.s.wu. There are still some regressions that suggest we should match g_zero_extend in zexti32. --- .../RISCV/GISel/RISCVInstructionSelector.cpp | 27 +++++ .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 4 +- llvm/lib/Target/RISCV/RISCVGISel.td | 3 + .../RISCV/GlobalISel/double-convert.ll | 20 ++- .../CodeGen/RISCV/GlobalISel/float-convert.ll | 20 ++- .../instruction-select/itofp-f16-rv64.mir | 46 ------- .../instruction-select/itofp-rv64.mir | 92 -------------- .../legalizer/legalize-itofp-f16-rv64.mir | 57 ++++----- .../legalizer/legalize-itofp-rv64.mir | 114 ++++++++---------- .../regbankselect/itofp-f16-rv64.mir | 46 ------- .../GlobalISel/regbankselect/itofp-rv64.mir | 92 -------------- 11 files changed, 138 insertions(+), 383 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 389bdbe6d5e912..d11647b78d7417 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -87,6 +87,12 @@ class RISCVInstructionSelector : public InstructionSelector { ComplexRendererFns selectShiftMask(MachineOperand &Root) const; ComplexRendererFns selectAddrRegImm(MachineOperand &Root) const; + ComplexRendererFns selectSExtBits(MachineOperand &Root, unsigned Bits) const; + template + ComplexRendererFns selectSExtBits(MachineOperand &Root) const { + return selectSExtBits(Root, Bits); + } + ComplexRendererFns selectZExtBits(MachineOperand &Root, unsigned Bits) const; template ComplexRendererFns selectZExtBits(MachineOperand &Root) const { @@ -248,6 +254,27 @@ RISCVInstructionSelector::selectShiftMask(MachineOperand &Root) const { return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(ShAmtReg); }}}; } +InstructionSelector::ComplexRendererFns +RISCVInstructionSelector::selectSExtBits(MachineOperand &Root, + unsigned Bits) const { + if (!Root.isReg()) + return std::nullopt; + Register RootReg = Root.getReg(); + MachineInstr *RootDef = MRI->getVRegDef(RootReg); + + if (RootDef->getOpcode() == TargetOpcode::G_SEXT_INREG && + RootDef->getOperand(2).getImm() == Bits) { + return { + {[=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }}}; + } + + unsigned Size = MRI->getType(RootReg).getScalarSizeInBits(); + if ((Size - KB->computeNumSignBits(RootReg)) < Bits) + return {{[=](MachineInstrBuilder &MIB) { MIB.add(Root); }}}; + + return std::nullopt; +} + InstructionSelector::ComplexRendererFns RISCVInstructionSelector::selectZExtBits(MachineOperand &Root, unsigned Bits) const { diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 632c549abca52d..0704b57ff95650 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -542,9 +542,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}); getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) - .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {s32, sXLen}))) + .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {sXLen}))) .widenScalarToNextPow2(1) - .minScalar(1, s32) + .minScalar(1, sXLen) .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}}) .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}); diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index e3267642ceeed0..10906aebf1bf84 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -96,6 +96,9 @@ def gi_sh2add_uw_op : GIComplexOperandMatcher">, def gi_sh3add_uw_op : GIComplexOperandMatcher">, GIComplexPatternEquiv; +def gi_sexti32 : GIComplexOperandMatcher">, + GIComplexPatternEquiv; + def gi_zexti32 : GIComplexOperandMatcher">, GIComplexPatternEquiv; def gi_zexti16 : GIComplexOperandMatcher">, diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll index 785cc2aafde11b..0e5cbe63004b62 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll @@ -98,11 +98,17 @@ define double @fcvt_d_wu(i32 %a) nounwind { } define double @fcvt_d_wu_load(ptr %p) nounwind { -; CHECKIFD-LABEL: fcvt_d_wu_load: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: lw a0, 0(a0) -; CHECKIFD-NEXT: fcvt.d.wu fa0, a0 -; CHECKIFD-NEXT: ret +; RV32IFD-LABEL: fcvt_d_wu_load: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: lw a0, 0(a0) +; RV32IFD-NEXT: fcvt.d.wu fa0, a0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_load: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lwu a0, 0(a0) +; RV64IFD-NEXT: fcvt.d.wu fa0, a0 +; RV64IFD-NEXT: ret %a = load i32, ptr %p %1 = uitofp i32 %a to double ret double %1 @@ -294,7 +300,9 @@ define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, ptr %1) nounwind { ; RV64IFD-LABEL: fcvt_d_wu_demanded_bits: ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: addiw a0, a0, 1 -; RV64IFD-NEXT: fcvt.d.wu fa5, a0 +; RV64IFD-NEXT: slli a2, a0, 32 +; RV64IFD-NEXT: srli a2, a2, 32 +; RV64IFD-NEXT: fcvt.d.wu fa5, a2 ; RV64IFD-NEXT: fsd fa5, 0(a1) ; RV64IFD-NEXT: ret %3 = add i32 %0, 1 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll index d6a36c5a702ac8..c5a36d063c0ad6 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll @@ -101,11 +101,17 @@ define float @fcvt_s_wu(i32 %a) nounwind { } define float @fcvt_s_wu_load(ptr %p) nounwind { -; CHECKIF-LABEL: fcvt_s_wu_load: -; CHECKIF: # %bb.0: -; CHECKIF-NEXT: lw a0, 0(a0) -; CHECKIF-NEXT: fcvt.s.wu fa0, a0 -; CHECKIF-NEXT: ret +; RV32IF-LABEL: fcvt_s_wu_load: +; RV32IF: # %bb.0: +; RV32IF-NEXT: lw a0, 0(a0) +; RV32IF-NEXT: fcvt.s.wu fa0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_load: +; RV64IF: # %bb.0: +; RV64IF-NEXT: lwu a0, 0(a0) +; RV64IF-NEXT: fcvt.s.wu fa0, a0 +; RV64IF-NEXT: ret %a = load i32, ptr %p %1 = uitofp i32 %a to float ret float %1 @@ -266,7 +272,9 @@ define signext i32 @fcvt_s_wu_demanded_bits(i32 signext %0, ptr %1) nounwind { ; RV64IF-LABEL: fcvt_s_wu_demanded_bits: ; RV64IF: # %bb.0: ; RV64IF-NEXT: addiw a0, a0, 1 -; RV64IF-NEXT: fcvt.s.wu fa5, a0 +; RV64IF-NEXT: slli a2, a0, 32 +; RV64IF-NEXT: srli a2, a2, 32 +; RV64IF-NEXT: fcvt.s.wu fa5, a2 ; RV64IF-NEXT: fsw fa5, 0(a1) ; RV64IF-NEXT: ret %3 = add i32 %0, 1 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-f16-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-f16-rv64.mir index 1afb1d9be6a099..b813a79c339ec5 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-f16-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-f16-rv64.mir @@ -2,52 +2,6 @@ # RUN: llc -mtriple=riscv64 -mattr=+zfh -run-pass=instruction-select \ # RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s ---- -name: sitofp_s64_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_H_W:%[0-9]+]]:fpr16 = nofpexcept FCVT_H_W [[COPY]], 7 - ; CHECK-NEXT: $f10_h = COPY [[FCVT_H_W]] - ; CHECK-NEXT: PseudoRET implicit $f10_h - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s16) = G_SITOFP %1(s32) - $f10_h = COPY %2(s16) - PseudoRET implicit $f10_h - -... ---- -name: uitofp_s64_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_H_WU:%[0-9]+]]:fpr16 = nofpexcept FCVT_H_WU [[COPY]], 7 - ; CHECK-NEXT: $f10_h = COPY [[FCVT_H_WU]] - ; CHECK-NEXT: PseudoRET implicit $f10_h - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s16) = G_UITOFP %1(s32) - $f10_h = COPY %2(s16) - PseudoRET implicit $f10_h - -... --- name: sitofp_s64_s64 legalized: true diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-rv64.mir index 31175d7af93f98..f99a15a850517f 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-rv64.mir @@ -2,52 +2,6 @@ # RUN: llc -mtriple=riscv64 -mattr=+d -run-pass=instruction-select \ # RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s ---- -name: sitofp_s32_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s32_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_S_W:%[0-9]+]]:fpr32 = nofpexcept FCVT_S_W [[COPY]], 7 - ; CHECK-NEXT: $f10_f = COPY [[FCVT_S_W]] - ; CHECK-NEXT: PseudoRET implicit $f10_f - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s32) = G_SITOFP %1(s32) - $f10_f = COPY %2(s32) - PseudoRET implicit $f10_f - -... ---- -name: uitofp_s32_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s32_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_S_WU:%[0-9]+]]:fpr32 = nofpexcept FCVT_S_WU [[COPY]], 7 - ; CHECK-NEXT: $f10_f = COPY [[FCVT_S_WU]] - ; CHECK-NEXT: PseudoRET implicit $f10_f - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s32) = G_UITOFP %1(s32) - $f10_f = COPY %2(s32) - PseudoRET implicit $f10_f - -... --- name: sitofp_s32_s64 legalized: true @@ -91,52 +45,6 @@ body: | $f10_f = COPY %1(s32) PseudoRET implicit $f10_f -... ---- -name: sitofp_s64_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_D_W:%[0-9]+]]:fpr64 = nofpexcept FCVT_D_W [[COPY]], 0 - ; CHECK-NEXT: $f10_d = COPY [[FCVT_D_W]] - ; CHECK-NEXT: PseudoRET implicit $f10_d - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s64) = G_SITOFP %1(s32) - $f10_d = COPY %2(s64) - PseudoRET implicit $f10_d - -... ---- -name: uitofp_s64_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_D_WU:%[0-9]+]]:fpr64 = nofpexcept FCVT_D_WU [[COPY]], 0 - ; CHECK-NEXT: $f10_d = COPY [[FCVT_D_WU]] - ; CHECK-NEXT: PseudoRET implicit $f10_d - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s64) = G_UITOFP %1(s32) - $f10_d = COPY %2(s64) - PseudoRET implicit $f10_d - ... --- name: sitofp_s64_s64 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-f16-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-f16-rv64.mir index 52c69d1acbffc7..6a70a331a02c8c 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-f16-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-f16-rv64.mir @@ -13,11 +13,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_h = COPY [[SITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -38,10 +37,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_h = COPY [[UITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -62,11 +60,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_h = COPY [[SITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -87,10 +84,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_h = COPY [[UITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -111,11 +107,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_h = COPY [[SITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -136,10 +131,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_h = COPY [[UITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -160,8 +154,8 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[SEXT_INREG]](s64) ; CHECK-NEXT: $f10_h = COPY [[SITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -182,8 +176,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_h = COPY [[UITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-rv64.mir index bc09a44dee2e09..2d6ee6250cf328 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-rv64.mir @@ -13,11 +13,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_f = COPY [[SITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -38,10 +37,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_f = COPY [[UITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -62,11 +60,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_f = COPY [[SITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -87,10 +84,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_f = COPY [[UITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -111,11 +107,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_f = COPY [[SITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -136,10 +131,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_f = COPY [[UITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -160,8 +154,8 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[SEXT_INREG]](s64) ; CHECK-NEXT: $f10_f = COPY [[SITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -182,8 +176,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_f = COPY [[UITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -244,11 +239,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_d = COPY [[SITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -269,10 +263,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_d = COPY [[UITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -293,11 +286,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_d = COPY [[SITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -318,10 +310,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_d = COPY [[UITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -342,11 +333,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_d = COPY [[SITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -367,10 +357,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_d = COPY [[UITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -391,8 +380,8 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[SEXT_INREG]](s64) ; CHECK-NEXT: $f10_d = COPY [[SITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -413,8 +402,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_d = COPY [[UITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-f16-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-f16-rv64.mir index 10da1f8fc9fb30..5b48a7e947ae67 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-f16-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-f16-rv64.mir @@ -3,52 +3,6 @@ # RUN: -simplify-mir -verify-machineinstrs %s \ # RUN: -o - | FileCheck %s ---- -name: sitofp_s16_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s16_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fprb(s16) = G_SITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_h = COPY [[SITOFP]](s16) - ; CHECK-NEXT: PseudoRET implicit $f10_h - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s16) = G_SITOFP %1(s32) - $f10_h = COPY %2(s16) - PseudoRET implicit $f10_h - -... ---- -name: uitofp_s16_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s16_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:fprb(s16) = G_UITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_h = COPY [[UITOFP]](s16) - ; CHECK-NEXT: PseudoRET implicit $f10_h - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s16) = G_UITOFP %1(s32) - $f10_h = COPY %2(s16) - PseudoRET implicit $f10_h - -... --- name: sitofp_s16_s64 legalized: true diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-rv64.mir index e0f039d5983ee8..6cb38cf38a6a56 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-rv64.mir @@ -3,52 +3,6 @@ # RUN: -simplify-mir -verify-machineinstrs %s \ # RUN: -o - | FileCheck %s ---- -name: sitofp_s32_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s32_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fprb(s32) = G_SITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_f = COPY [[SITOFP]](s32) - ; CHECK-NEXT: PseudoRET implicit $f10_f - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s32) = G_SITOFP %1(s32) - $f10_f = COPY %2(s32) - PseudoRET implicit $f10_f - -... ---- -name: uitofp_s32_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s32_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:fprb(s32) = G_UITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_f = COPY [[UITOFP]](s32) - ; CHECK-NEXT: PseudoRET implicit $f10_f - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s32) = G_UITOFP %1(s32) - $f10_f = COPY %2(s32) - PseudoRET implicit $f10_f - -... --- name: sitofp_s32_s64 legalized: true @@ -90,52 +44,6 @@ body: | $f10_f = COPY %1(s32) PseudoRET implicit $f10_f -... ---- -name: sitofp_s64_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fprb(s64) = G_SITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_d = COPY [[SITOFP]](s64) - ; CHECK-NEXT: PseudoRET implicit $f10_d - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s64) = G_SITOFP %1(s32) - $f10_d = COPY %2(s64) - PseudoRET implicit $f10_d - -... ---- -name: uitofp_s64_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:fprb(s64) = G_UITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_d = COPY [[UITOFP]](s64) - ; CHECK-NEXT: PseudoRET implicit $f10_d - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s64) = G_UITOFP %1(s32) - $f10_d = COPY %2(s64) - PseudoRET implicit $f10_d - ... --- name: sitofp_s64_s64 From 7475156d49406785a974b1205d11fe3de9c1553e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 7 Nov 2024 22:03:55 +0000 Subject: [PATCH 27/40] [Clang] Add __builtin_counted_by_ref builtin (#114495) The __builtin_counted_by_ref builtin is used on a flexible array pointer and returns a pointer to the "counted_by" attribute's COUNT argument, which is a field in the same non-anonymous struct as the flexible array member. This is useful for automatically setting the count field without needing the programmer's intervention. Otherwise it's possible to get this anti-pattern: ptr = alloc(, ..., COUNT); ptr->FAM[9] = 42; /* <<< Sanitizer will complain */ ptr->count = COUNT; To prevent this anti-pattern, the user can create an allocator that automatically performs the assignment: #define alloc(TY, FAM, COUNT) ({ \ TY __p = alloc(get_size(TY, COUNT)); \ if (__builtin_counted_by_ref(__p->FAM)) \ *__builtin_counted_by_ref(__p->FAM) = COUNT; \ __p; \ }) The builtin's behavior is heavily dependent upon the "counted_by" attribute existing. It's main utility is during allocation to avoid the above anti-pattern. If the flexible array member doesn't have that attribute, the builtin becomes a no-op. Therefore, if the flexible array member has a "count" field not referenced by "counted_by", it must be set explicitly after the allocation as this builtin will return a "nullptr" and the assignment will most likely be elided. --------- Co-authored-by: Bill Wendling Co-authored-by: Aaron Ballman --- clang/docs/LanguageExtensions.rst | 68 +++++++ clang/docs/ReleaseNotes.rst | 23 +++ clang/include/clang/Basic/Builtins.td | 6 + .../clang/Basic/DiagnosticSemaKinds.td | 12 ++ clang/include/clang/Sema/Sema.h | 2 + clang/lib/AST/Decl.cpp | 4 + clang/lib/CodeGen/CGBuiltin.cpp | 29 +++ clang/lib/CodeGen/CGExpr.cpp | 29 +-- clang/lib/CodeGen/CodeGenFunction.h | 4 + clang/lib/Sema/SemaChecking.cpp | 53 ++++++ clang/lib/Sema/SemaExpr.cpp | 69 +++++++ .../AST/ast-print-builtin-counted-by-ref.c | 23 +++ clang/test/CodeGen/builtin-counted-by-ref.c | 177 ++++++++++++++++++ clang/test/Sema/builtin-counted-by-ref.c | 123 ++++++++++++ clang/test/Sema/builtin-counted-by-ref.cpp | 8 + 15 files changed, 618 insertions(+), 12 deletions(-) create mode 100644 clang/test/AST/ast-print-builtin-counted-by-ref.c create mode 100644 clang/test/CodeGen/builtin-counted-by-ref.c create mode 100644 clang/test/Sema/builtin-counted-by-ref.c create mode 100644 clang/test/Sema/builtin-counted-by-ref.cpp diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index f00422cd8b8045..f7285352b9deb9 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -3774,6 +3774,74 @@ type-generic alternative to the ``__builtin_clz{,l,ll}`` (respectively ``__builtin_ctz{,l,ll}``) builtins, with support for other integer types, such as ``unsigned __int128`` and C23 ``unsigned _BitInt(N)``. +``__builtin_counted_by_ref`` +---------------------------- + +``__builtin_counted_by_ref`` returns a pointer to the count field from the +``counted_by`` attribute. + +The argument must be a flexible array member. If the argument isn't a flexible +array member or doesn't have the ``counted_by`` attribute, the builtin returns +``(void *)0``. + +**Syntax**: + +.. code-block:: c + + T *__builtin_counted_by_ref(void *array) + +**Examples**: + +.. code-block:: c + + #define alloc(P, FAM, COUNT) ({ \ + size_t __ignored_assignment; \ + typeof(P) __p = NULL; \ + __p = malloc(MAX(sizeof(*__p), \ + sizeof(*__p) + sizeof(*__p->FAM) * COUNT)); \ + \ + *_Generic( \ + __builtin_counted_by_ref(__p->FAM), \ + void *: &__ignored_assignment, \ + default: __builtin_counted_by_ref(__p->FAM)) = COUNT; \ + \ + __p; \ + }) + +**Description**: + +The ``__builtin_counted_by_ref`` builtin allows the programmer to prevent a +common error associated with the ``counted_by`` attribute. When using the +``counted_by`` attribute, the ``count`` field **must** be set before the +flexible array member can be accessed. Otherwise, the sanitizers may view such +accesses as false positives. For instance, it's not uncommon for programmers to +initialize the flexible array before setting the ``count`` field: + +.. code-block:: c + + struct s { + int dummy; + short count; + long array[] __attribute__((counted_by(count))); + }; + + struct s *ptr = malloc(sizeof(struct s) + sizeof(long) * COUNT); + + for (int i = 0; i < COUNT; ++i) + ptr->array[i] = i; + + ptr->count = COUNT; + +Enforcing the rule that ``ptr->count = COUNT;`` must occur after every +allocation of a struct with a flexible array member with the ``counted_by`` +attribute is prone to failure in large code bases. This builtin mitigates this +for allocators (like in Linux) that are implemented in a way where the counter +assignment can happen automatically. + +**Note:** The value returned by ``__builtin_counted_by_ref`` cannot be assigned +to a variable, have its address taken, or passed into or returned from a +function, because doing so violates bounds safety conventions. + Multiprecision Arithmetic Builtins ---------------------------------- diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 46beb3fe39dec8..0b0f2053f634ee 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -313,6 +313,29 @@ Non-comprehensive list of changes in this release as well as declarations. - ``__builtin_abs`` function can now be used in constant expressions. +- The new builtin ``__builtin_counted_by_ref`` was added. In contexts where the + programmer needs access to the ``counted_by`` attribute's field, but it's not + available --- e.g. in macros. For instace, it can be used to automatically + set the counter during allocation in the Linux kernel: + + .. code-block:: c + + /* A simplified version of Linux allocation macros */ + #define alloc(PTR, FAM, COUNT) ({ \ + sizeof_t __ignored_assignment; \ + typeof(P) __p; \ + size_t __size = sizeof(*P) + sizeof(*P->FAM) * COUNT; \ + __p = malloc(__size); \ + *_Generic( \ + __builtin_counted_by_ref(__p->FAM), \ + void *: &__ignored_assignment, \ + default: __builtin_counted_by_ref(__p->FAM)) = COUNT; \ + __p; \ + }) + + The flexible array member (FAM) can now be accessed immediately without causing + issues with the sanitizer because the counter is automatically set. + New Compiler Flags ------------------ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index e484c3969fe228..4360e0bf9840f1 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4932,3 +4932,9 @@ def ArithmeticFence : LangBuiltin<"ALL_LANGUAGES"> { let Attributes = [CustomTypeChecking, Constexpr]; let Prototype = "void(...)"; } + +def CountedByRef : Builtin { + let Spellings = ["__builtin_counted_by_ref"]; + let Attributes = [NoThrow, CustomTypeChecking]; + let Prototype = "int(...)"; +} diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c96a3f6d6e157f..6a244c276facd6 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6652,6 +6652,18 @@ def warn_counted_by_attr_elt_type_unknown_size : Warning, InGroup; +// __builtin_counted_by_ref diagnostics: +def err_builtin_counted_by_ref_must_be_flex_array_member : Error< + "'__builtin_counted_by_ref' argument must reference a flexible array member">; +def err_builtin_counted_by_ref_cannot_leak_reference : Error< + "value returned by '__builtin_counted_by_ref' cannot be assigned to a " + "variable, have its address taken, or passed into or returned from a function">; +def err_builtin_counted_by_ref_invalid_lhs_use : Error< + "value returned by '__builtin_counted_by_ref' cannot be used in " + "%select{an array subscript|a binary}0 expression">; +def err_builtin_counted_by_ref_has_side_effects : Error< + "'__builtin_counted_by_ref' argument cannot have side-effects">; + let CategoryName = "ARC Semantic Issue" in { // ARC-mode diagnostics. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index aa2f5ff3ef7207..fad446a05e782f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2510,6 +2510,8 @@ class Sema final : public SemaBase { bool BuiltinNonDeterministicValue(CallExpr *TheCall); + bool BuiltinCountedByRef(CallExpr *TheCall); + // Matrix builtin handling. ExprResult BuiltinMatrixTranspose(CallExpr *TheCall, ExprResult CallResult); ExprResult BuiltinMatrixColumnMajorLoad(CallExpr *TheCall, diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 8204e3509dd563..047f354b200745 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3657,6 +3657,10 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { (!hasAttr() && !hasAttr())) return 0; + if (getASTContext().getLangOpts().CPlusPlus && + BuiltinID == Builtin::BI__builtin_counted_by_ref) + return 0; + const ASTContext &Context = getASTContext(); if (!Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) return BuiltinID; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0ef9058640db6a..1b4891d94eee77 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3691,6 +3691,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, /*EmittedE=*/nullptr, IsDynamic)); } + case Builtin::BI__builtin_counted_by_ref: { + // Default to returning '(void *) 0'. + llvm::Value *Result = llvm::ConstantPointerNull::get( + llvm::PointerType::getUnqual(getLLVMContext())); + + const Expr *Arg = E->getArg(0)->IgnoreParenImpCasts(); + + if (auto *UO = dyn_cast(Arg); + UO && UO->getOpcode() == UO_AddrOf) { + Arg = UO->getSubExpr()->IgnoreParenImpCasts(); + + if (auto *ASE = dyn_cast(Arg)) + Arg = ASE->getBase()->IgnoreParenImpCasts(); + } + + if (const MemberExpr *ME = dyn_cast_if_present(Arg)) { + if (auto *CATy = + ME->getMemberDecl()->getType()->getAs(); + CATy && CATy->getKind() == CountAttributedType::CountedBy) { + const auto *FAMDecl = cast(ME->getMemberDecl()); + if (const FieldDecl *CountFD = FAMDecl->findCountedByField()) + Result = GetCountedByFieldExprGEP(Arg, FAMDecl, CountFD); + else + llvm::report_fatal_error("Cannot find the counted_by 'count' field"); + } + } + + return RValue::get(Result); + } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); // FIXME: Technically these constants should of type 'int', yes? diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 3388a6df466d45..096f4c4f550435 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1145,15 +1145,7 @@ static bool getGEPIndicesToField(CodeGenFunction &CGF, const RecordDecl *RD, return false; } -/// This method is typically called in contexts where we can't generate -/// side-effects, like in __builtin_dynamic_object_size. When finding -/// expressions, only choose those that have either already been emitted or can -/// be loaded without side-effects. -/// -/// - \p FAMDecl: the \p Decl for the flexible array member. It may not be -/// within the top-level struct. -/// - \p CountDecl: must be within the same non-anonymous struct as \p FAMDecl. -llvm::Value *CodeGenFunction::EmitLoadOfCountedByField( +llvm::Value *CodeGenFunction::GetCountedByFieldExprGEP( const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl) { const RecordDecl *RD = CountDecl->getParent()->getOuterLexicalRecordContext(); @@ -1182,12 +1174,25 @@ llvm::Value *CodeGenFunction::EmitLoadOfCountedByField( return nullptr; Indices.push_back(Builder.getInt32(0)); - Res = Builder.CreateInBoundsGEP( + return Builder.CreateInBoundsGEP( ConvertType(QualType(RD->getTypeForDecl(), 0)), Res, RecIndicesTy(llvm::reverse(Indices)), "..counted_by.gep"); +} - return Builder.CreateAlignedLoad(ConvertType(CountDecl->getType()), Res, - getIntAlign(), "..counted_by.load"); +/// This method is typically called in contexts where we can't generate +/// side-effects, like in __builtin_dynamic_object_size. When finding +/// expressions, only choose those that have either already been emitted or can +/// be loaded without side-effects. +/// +/// - \p FAMDecl: the \p Decl for the flexible array member. It may not be +/// within the top-level struct. +/// - \p CountDecl: must be within the same non-anonymous struct as \p FAMDecl. +llvm::Value *CodeGenFunction::EmitLoadOfCountedByField( + const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl) { + if (llvm::Value *GEP = GetCountedByFieldExprGEP(Base, FAMDecl, CountDecl)) + return Builder.CreateAlignedLoad(ConvertType(CountDecl->getType()), GEP, + getIntAlign(), "..counted_by.load"); + return nullptr; } void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 3ff4458fb32024..90dc399f1341f3 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3305,6 +3305,10 @@ class CodeGenFunction : public CodeGenTypeCache { const FieldDecl *FAMDecl, uint64_t &Offset); + llvm::Value *GetCountedByFieldExprGEP(const Expr *Base, + const FieldDecl *FAMDecl, + const FieldDecl *CountDecl); + /// Build an expression accessing the "counted_by" field. llvm::Value *EmitLoadOfCountedByField(const Expr *Base, const FieldDecl *FAMDecl, diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index d78968179b1fdc..96008b14225a4c 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2973,6 +2973,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, } break; } + case Builtin::BI__builtin_counted_by_ref: + if (BuiltinCountedByRef(TheCall)) + return ExprError(); + break; } if (getLangOpts().HLSL && HLSL().CheckBuiltinFunctionCall(BuiltinID, TheCall)) @@ -5575,6 +5579,55 @@ bool Sema::BuiltinSetjmp(CallExpr *TheCall) { return false; } +bool Sema::BuiltinCountedByRef(CallExpr *TheCall) { + if (checkArgCount(TheCall, 1)) + return true; + + ExprResult ArgRes = UsualUnaryConversions(TheCall->getArg(0)); + if (ArgRes.isInvalid()) + return true; + + // For simplicity, we support only limited expressions for the argument. + // Specifically a pointer to a flexible array member:'ptr->array'. This + // allows us to reject arguments with complex casting, which really shouldn't + // be a huge problem. + const Expr *Arg = ArgRes.get()->IgnoreParenImpCasts(); + if (!isa(Arg->getType()) && !Arg->getType()->isArrayType()) + return Diag(Arg->getBeginLoc(), + diag::err_builtin_counted_by_ref_must_be_flex_array_member) + << Arg->getSourceRange(); + + if (Arg->HasSideEffects(Context)) + return Diag(Arg->getBeginLoc(), + diag::err_builtin_counted_by_ref_has_side_effects) + << Arg->getSourceRange(); + + if (const auto *ME = dyn_cast(Arg)) { + if (!ME->isFlexibleArrayMemberLike( + Context, getLangOpts().getStrictFlexArraysLevel())) + return Diag(Arg->getBeginLoc(), + diag::err_builtin_counted_by_ref_must_be_flex_array_member) + << Arg->getSourceRange(); + + if (auto *CATy = + ME->getMemberDecl()->getType()->getAs(); + CATy && CATy->getKind() == CountAttributedType::CountedBy) { + const auto *FAMDecl = cast(ME->getMemberDecl()); + if (const FieldDecl *CountFD = FAMDecl->findCountedByField()) { + TheCall->setType(Context.getPointerType(CountFD->getType())); + return false; + } + } + } else { + return Diag(Arg->getBeginLoc(), + diag::err_builtin_counted_by_ref_must_be_flex_array_member) + << Arg->getSourceRange(); + } + + TheCall->setType(Context.getPointerType(Context.VoidTy)); + return false; +} + namespace { class UncoveredArgHandler { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index df8f025030e2b1..68527d9da8c799 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9209,6 +9209,38 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, LHSType = Context.getCanonicalType(LHSType).getUnqualifiedType(); RHSType = Context.getCanonicalType(RHSType).getUnqualifiedType(); + // __builtin_counted_by_ref cannot be assigned to a variable, used in + // function call, or in a return. + auto FindBuiltinCountedByRefExpr = [&](Expr *E) -> CallExpr * { + struct BuiltinCountedByRefVisitor + : public RecursiveASTVisitor { + CallExpr *TheCall = nullptr; + bool VisitCallExpr(CallExpr *CE) { + if (CE->getBuiltinCallee() == Builtin::BI__builtin_counted_by_ref) { + TheCall = CE; + return false; + } + return true; + } + bool VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *UE) { + // A UnaryExprOrTypeTraitExpr---e.g. sizeof, __alignof, etc.---isn't + // the same as a CallExpr, so if we find a __builtin_counted_by_ref() + // call in one, ignore it. + return false; + } + } V; + V.TraverseStmt(E); + return V.TheCall; + }; + static llvm::SmallPtrSet Diagnosed; + if (auto *CE = FindBuiltinCountedByRefExpr(RHS.get()); + CE && !Diagnosed.count(CE)) { + Diagnosed.insert(CE); + Diag(CE->getExprLoc(), + diag::err_builtin_counted_by_ref_cannot_leak_reference) + << CE->getSourceRange(); + } + // Common case: no conversion required. if (LHSType == RHSType) { Kind = CK_NoOp; @@ -13757,6 +13789,43 @@ QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS, ConvTy = CheckAssignmentConstraints(Loc, LHSType, RHSType); } + // __builtin_counted_by_ref can't be used in a binary expression or array + // subscript on the LHS. + int DiagOption = -1; + auto FindInvalidUseOfBoundsSafetyCounter = [&](Expr *E) -> CallExpr * { + struct BuiltinCountedByRefVisitor + : public RecursiveASTVisitor { + CallExpr *CE = nullptr; + bool InvalidUse = false; + int Option = -1; + + bool VisitCallExpr(CallExpr *E) { + if (E->getBuiltinCallee() == Builtin::BI__builtin_counted_by_ref) { + CE = E; + return false; + } + return true; + } + + bool VisitArraySubscriptExpr(ArraySubscriptExpr *E) { + InvalidUse = true; + Option = 0; // report 'array expression' in diagnostic. + return true; + } + bool VisitBinaryOperator(BinaryOperator *E) { + InvalidUse = true; + Option = 1; // report 'binary expression' in diagnostic. + return true; + } + } V; + V.TraverseStmt(E); + DiagOption = V.Option; + return V.InvalidUse ? V.CE : nullptr; + }; + if (auto *CE = FindInvalidUseOfBoundsSafetyCounter(LHSExpr)) + Diag(CE->getExprLoc(), diag::err_builtin_counted_by_ref_invalid_lhs_use) + << DiagOption << CE->getSourceRange(); + if (DiagnoseAssignmentResult(ConvTy, Loc, LHSType, RHSType, RHS.get(), AssignmentAction::Assigning)) return QualType(); diff --git a/clang/test/AST/ast-print-builtin-counted-by-ref.c b/clang/test/AST/ast-print-builtin-counted-by-ref.c new file mode 100644 index 00000000000000..c0ff7515fc8208 --- /dev/null +++ b/clang/test/AST/ast-print-builtin-counted-by-ref.c @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux -ast-print %s -o - | FileCheck %s + +typedef unsigned long int size_t; + +int global_array[42]; +int global_int; + +struct fam_struct { + int x; + char count; + int array[] __attribute__((counted_by(count))); +}; + +// CHECK-LABEL: void test1(struct fam_struct *ptr, int size) { +// CHECK-NEXT: size_t __ignored_assignment; +// CHECK-NEXT: *_Generic(__builtin_counted_by_ref(ptr->array), void *: &__ignored_assignment, default: __builtin_counted_by_ref(ptr->array)) = 42; +void test1(struct fam_struct *ptr, int size) { + size_t __ignored_assignment; + + *_Generic(__builtin_counted_by_ref(ptr->array), + void *: &__ignored_assignment, + default: __builtin_counted_by_ref(ptr->array)) = 42; // ok +} diff --git a/clang/test/CodeGen/builtin-counted-by-ref.c b/clang/test/CodeGen/builtin-counted-by-ref.c new file mode 100644 index 00000000000000..8ad715879aa767 --- /dev/null +++ b/clang/test/CodeGen/builtin-counted-by-ref.c @@ -0,0 +1,177 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=X86_64 +// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=I386 + +struct a { + char x; + short count; + int array[] __attribute__((counted_by(count))); +}; + +// X86_64-LABEL: define dso_local ptr @test1( +// X86_64-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] { +// X86_64-NEXT: [[ENTRY:.*:]] +// X86_64-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// X86_64-NEXT: [[P:%.*]] = alloca ptr, align 8 +// X86_64-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 +// X86_64-NEXT: [[MUL:%.*]] = mul i64 4, [[CONV]] +// X86_64-NEXT: [[ADD:%.*]] = add i64 4, [[MUL]] +// X86_64-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 noundef [[ADD]]) #[[ATTR2:[0-9]+]] +// X86_64-NEXT: store ptr [[CALL]], ptr [[P]], align 8 +// X86_64-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP1]] to i16 +// X86_64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 8 +// X86_64-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[TMP2]], i32 0, i32 1 +// X86_64-NEXT: store i16 [[CONV1]], ptr [[DOT_COUNTED_BY_GEP]], align 2 +// X86_64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P]], align 8 +// X86_64-NEXT: ret ptr [[TMP3]] +// +// I386-LABEL: define dso_local ptr @test1( +// I386-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] { +// I386-NEXT: [[ENTRY:.*:]] +// I386-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// I386-NEXT: [[P:%.*]] = alloca ptr, align 4 +// I386-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[MUL:%.*]] = mul i32 4, [[TMP0]] +// I386-NEXT: [[ADD:%.*]] = add i32 4, [[MUL]] +// I386-NEXT: [[CALL:%.*]] = call ptr @malloc(i32 noundef [[ADD]]) #[[ATTR2:[0-9]+]] +// I386-NEXT: store ptr [[CALL]], ptr [[P]], align 4 +// I386-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[CONV:%.*]] = trunc i32 [[TMP1]] to i16 +// I386-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 4 +// I386-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[TMP2]], i32 0, i32 1 +// I386-NEXT: store i16 [[CONV]], ptr [[DOT_COUNTED_BY_GEP]], align 2 +// I386-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P]], align 4 +// I386-NEXT: ret ptr [[TMP3]] +// +struct a *test1(int size) { + struct a *p = __builtin_malloc(sizeof(struct a) + sizeof(int) * size); + + *__builtin_counted_by_ref(p->array) = size; + return p; +} + +struct b { + int _filler; + struct { + int __filler; + struct { + int ___filler; + struct { + char count; + }; + }; + }; + struct { + int filler_; + struct { + int filler__; + struct { + long array[] __attribute__((counted_by(count))); + }; + }; + }; +}; + +// X86_64-LABEL: define dso_local ptr @test2( +// X86_64-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0]] { +// X86_64-NEXT: [[ENTRY:.*:]] +// X86_64-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// X86_64-NEXT: [[P:%.*]] = alloca ptr, align 8 +// X86_64-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 +// X86_64-NEXT: [[MUL:%.*]] = mul i64 4, [[CONV]] +// X86_64-NEXT: [[ADD:%.*]] = add i64 4, [[MUL]] +// X86_64-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 noundef [[ADD]]) #[[ATTR2]] +// X86_64-NEXT: store ptr [[CALL]], ptr [[P]], align 8 +// X86_64-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP1]] to i8 +// X86_64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 8 +// X86_64-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP2]], i32 0, i32 1, i32 1, i32 1, i32 0 +// X86_64-NEXT: store i8 [[CONV1]], ptr [[DOT_COUNTED_BY_GEP]], align 1 +// X86_64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P]], align 8 +// X86_64-NEXT: ret ptr [[TMP3]] +// +// I386-LABEL: define dso_local ptr @test2( +// I386-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0]] { +// I386-NEXT: [[ENTRY:.*:]] +// I386-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// I386-NEXT: [[P:%.*]] = alloca ptr, align 4 +// I386-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[MUL:%.*]] = mul i32 4, [[TMP0]] +// I386-NEXT: [[ADD:%.*]] = add i32 4, [[MUL]] +// I386-NEXT: [[CALL:%.*]] = call ptr @malloc(i32 noundef [[ADD]]) #[[ATTR2]] +// I386-NEXT: store ptr [[CALL]], ptr [[P]], align 4 +// I386-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[CONV:%.*]] = trunc i32 [[TMP1]] to i8 +// I386-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 4 +// I386-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP2]], i32 0, i32 1, i32 1, i32 1, i32 0 +// I386-NEXT: store i8 [[CONV]], ptr [[DOT_COUNTED_BY_GEP]], align 1 +// I386-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P]], align 4 +// I386-NEXT: ret ptr [[TMP3]] +// +struct b *test2(int size) { + struct b *p = __builtin_malloc(sizeof(struct a) + sizeof(int) * size); + + *__builtin_counted_by_ref(p->array) = size; + return p; +} + +struct c { + char x; + short count; + int array[]; +}; + +// X86_64-LABEL: define dso_local ptr @test3( +// X86_64-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0]] { +// X86_64-NEXT: [[ENTRY:.*:]] +// X86_64-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// X86_64-NEXT: [[P:%.*]] = alloca ptr, align 8 +// X86_64-NEXT: [[__IGNORED:%.*]] = alloca i64, align 8 +// X86_64-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 +// X86_64-NEXT: [[MUL:%.*]] = mul i64 4, [[CONV]] +// X86_64-NEXT: [[ADD:%.*]] = add i64 4, [[MUL]] +// X86_64-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 noundef [[ADD]]) #[[ATTR2]] +// X86_64-NEXT: store ptr [[CALL]], ptr [[P]], align 8 +// X86_64-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV1:%.*]] = sext i32 [[TMP1]] to i64 +// X86_64-NEXT: store i64 [[CONV1]], ptr [[__IGNORED]], align 8 +// X86_64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 8 +// X86_64-NEXT: ret ptr [[TMP2]] +// +// I386-LABEL: define dso_local ptr @test3( +// I386-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0]] { +// I386-NEXT: [[ENTRY:.*:]] +// I386-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// I386-NEXT: [[P:%.*]] = alloca ptr, align 4 +// I386-NEXT: [[__IGNORED:%.*]] = alloca i32, align 4 +// I386-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[MUL:%.*]] = mul i32 4, [[TMP0]] +// I386-NEXT: [[ADD:%.*]] = add i32 4, [[MUL]] +// I386-NEXT: [[CALL:%.*]] = call ptr @malloc(i32 noundef [[ADD]]) #[[ATTR2]] +// I386-NEXT: store ptr [[CALL]], ptr [[P]], align 4 +// I386-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: store i32 [[TMP1]], ptr [[__IGNORED]], align 4 +// I386-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 4 +// I386-NEXT: ret ptr [[TMP2]] +// +struct c *test3(int size) { + struct c *p = __builtin_malloc(sizeof(struct c) + sizeof(int) * size); + unsigned long int __ignored; + + *_Generic( + __builtin_counted_by_ref(p->array), + void *: &__ignored, + default: __builtin_counted_by_ref(p->array)) = size; + + return p; +} diff --git a/clang/test/Sema/builtin-counted-by-ref.c b/clang/test/Sema/builtin-counted-by-ref.c new file mode 100644 index 00000000000000..5a7ecefcb78976 --- /dev/null +++ b/clang/test/Sema/builtin-counted-by-ref.c @@ -0,0 +1,123 @@ +// RUN: %clang_cc1 -std=c99 -fsyntax-only -verify %s + +typedef unsigned long int size_t; + +int global_array[42]; +int global_int; + +struct fam_struct { + int x; + char count; + int array[] __attribute__((counted_by(count))); +}; + +void test1(struct fam_struct *ptr, int size, int idx) { + size_t size_of = sizeof(__builtin_counted_by_ref(ptr->array)); // ok + + *__builtin_counted_by_ref(ptr->array) = size; // ok + + { + size_t __ignored_assignment; + *_Generic(__builtin_counted_by_ref(ptr->array), + void *: &__ignored_assignment, + default: __builtin_counted_by_ref(ptr->array)) = 42; // ok + } +} + +void test2(struct fam_struct *ptr, int idx) { + __builtin_counted_by_ref(); // expected-error {{too few arguments to function call, expected 1, have 0}} + __builtin_counted_by_ref(ptr->array, ptr->x, ptr->count); // expected-error {{too many arguments to function call, expected 1, have 3}} +} + +void test3(struct fam_struct *ptr, int idx) { + __builtin_counted_by_ref(&ptr->array[0]); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(&ptr->array[idx]); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(&ptr->array); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(ptr->x); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(&ptr->x); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(global_array); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(global_int); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(&global_int); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} +} + +void test4(struct fam_struct *ptr, int idx) { + __builtin_counted_by_ref(ptr++->array); // expected-error {{'__builtin_counted_by_ref' argument cannot have side-effects}} + __builtin_counted_by_ref(&ptr->array[idx++]); // expected-error {{'__builtin_counted_by_ref' argument cannot have side-effects}} +} + +void foo(char *); + +void *test5(struct fam_struct *ptr, int size, int idx) { + char *ref = __builtin_counted_by_ref(ptr->array); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + + ref = __builtin_counted_by_ref(ptr->array); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + ref = (char *)(int *)(42 + &*__builtin_counted_by_ref(ptr->array)); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + foo(__builtin_counted_by_ref(ptr->array)); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + foo(ref = __builtin_counted_by_ref(ptr->array)); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + + if ((ref = __builtin_counted_by_ref(ptr->array))) // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + ; + + for (char *p = __builtin_counted_by_ref(ptr->array); p && *p; ++p) // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + ; + + return __builtin_counted_by_ref(ptr->array); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} +} + +void test6(struct fam_struct *ptr, int size, int idx) { + *(__builtin_counted_by_ref(ptr->array) + 4) = 37; // expected-error {{value returned by '__builtin_counted_by_ref' cannot be used in a binary expression}} + __builtin_counted_by_ref(ptr->array)[3] = 37; // expected-error {{value returned by '__builtin_counted_by_ref' cannot be used in an array subscript expression}} +} + +struct non_fam_struct { + char x; + long *pointer; + int array[42]; + short count; +}; + +void *test7(struct non_fam_struct *ptr, int size) { + *__builtin_counted_by_ref(ptr->array) = size // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + *__builtin_counted_by_ref(&ptr->array[0]) = size; // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + *__builtin_counted_by_ref(ptr->pointer) = size; // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + *__builtin_counted_by_ref(&ptr->pointer[0]) = size; // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} +} + +struct char_count { + char count; + int array[] __attribute__((counted_by(count))); +} *cp; + +struct short_count { + short count; + int array[] __attribute__((counted_by(count))); +} *sp; + +struct int_count { + int count; + int array[] __attribute__((counted_by(count))); +} *ip; + +struct unsigned_count { + unsigned count; + int array[] __attribute__((counted_by(count))); +} *up; + +struct long_count { + long count; + int array[] __attribute__((counted_by(count))); +} *lp; + +struct unsigned_long_count { + unsigned long count; + int array[] __attribute__((counted_by(count))); +} *ulp; + +void test8(void) { + _Static_assert(_Generic(__builtin_counted_by_ref(cp->array), char * : 1, default : 0) == 1, "wrong return type"); + _Static_assert(_Generic(__builtin_counted_by_ref(sp->array), short * : 1, default : 0) == 1, "wrong return type"); + _Static_assert(_Generic(__builtin_counted_by_ref(ip->array), int * : 1, default : 0) == 1, "wrong return type"); + _Static_assert(_Generic(__builtin_counted_by_ref(up->array), unsigned int * : 1, default : 0) == 1, "wrong return type"); + _Static_assert(_Generic(__builtin_counted_by_ref(lp->array), long * : 1, default : 0) == 1, "wrong return type"); + _Static_assert(_Generic(__builtin_counted_by_ref(ulp->array), unsigned long * : 1, default : 0) == 1, "wrong return type"); +} diff --git a/clang/test/Sema/builtin-counted-by-ref.cpp b/clang/test/Sema/builtin-counted-by-ref.cpp new file mode 100644 index 00000000000000..b9ec9c908dcaa6 --- /dev/null +++ b/clang/test/Sema/builtin-counted-by-ref.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -x c++ -fsyntax-only -verify %s + +struct fam_struct { + int x; + char count; + int array[] __attribute__((counted_by(count))); // expected-warning {{'counted_by' attribute ignored}} +}; + From bdf8e308b7ea430f619ca3aa1199a76eb6b4e2d4 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Thu, 7 Nov 2024 14:15:33 -0800 Subject: [PATCH 28/40] AMDGPU: Don't avoid clamp of bit shift in BFE pattern (#115372) Enable pattern matching from "x<<32-y>>32-y" to "bfe x, 0, y" when we know y is in [0,31]. This is the follow-up for the PR: https://github.com/llvm/llvm-project/pull/114279 to fix the issue: https://github.com/llvm/llvm-project/issues/114282 --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 1 - llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 1 + llvm/lib/Target/AMDGPU/SIInstructions.td | 17 ++++++++++ llvm/test/CodeGen/AMDGPU/bfe-patterns.ll | 33 ++++++++++--------- llvm/test/CodeGen/AMDGPU/extract-lowbits.ll | 24 +++++--------- 5 files changed, 43 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 21fffba14287ef..e3a330d45aaa57 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -22,7 +22,6 @@ #include "SIISelLowering.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/UniformityAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 11c4cdd560c2f3..5ae0b179d7d0e6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -17,6 +17,7 @@ #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "SIModeRegisterDefaults.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 52df38c352cf53..0658e030ffa5d6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3553,6 +3553,23 @@ def : AMDGPUPat < (V_BFE_U32_e64 $src, (i32 0), $width) >; +def uint5Bits : PatLeaf<(i32 VGPR_32:$width), [{ + return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxTrailingOnes() <= 5; +}]>; + +// x << (bitwidth - y) >> (bitwidth - y) +def : AMDGPUPat < + (DivergentBinFrag (shl_oneuse i32:$src, (sub 32, uint5Bits:$width)), + (sub 32, uint5Bits:$width)), + (V_BFE_U32_e64 $src, (i32 0), $width) +>; + +def : AMDGPUPat < + (DivergentBinFrag (shl_oneuse i32:$src, (sub 32, uint5Bits:$width)), + (sub 32, uint5Bits:$width)), + (V_BFE_I32_e64 $src, (i32 0), $width) +>; + // SHA-256 Ma patterns // ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll index c57a35aa1880db..bdba8c57dc745d 100644 --- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll @@ -17,9 +17,8 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] -; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3 -; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2 -; SI-NEXT: v_lshrrev_b32_e32 v2, v3, v2 +; SI-NEXT: v_and_b32_e32 v3, 31, v3 +; SI-NEXT: v_bfe_u32 v2, v2, 0, v3 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -38,9 +37,8 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4 -; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3 -; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3 +; VI-NEXT: v_and_b32_e32 v2, 31, v4 +; VI-NEXT: v_bfe_u32 v2, v3, 0, v2 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -49,7 +47,8 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x %src = load volatile i32, ptr addrspace(1) %in0.gep %width = load volatile i32, ptr addrspace(1) %in0.gep - %sub = sub i32 32, %width + %width5 = and i32 %width, 31 + %sub = sub i32 32, %width5 %shl = shl i32 %src, %sub %bfe = lshr i32 %shl, %sub store i32 %bfe, ptr addrspace(1) %out.gep @@ -72,6 +71,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] ; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: v_and_b32_e32 v3, 31, v3 ; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3 ; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2 ; SI-NEXT: v_lshrrev_b32_e32 v3, v3, v2 @@ -95,7 +95,8 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4 +; VI-NEXT: v_and_b32_e32 v2, 31, v4 +; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v2 ; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3 ; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 @@ -108,7 +109,8 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x %src = load volatile i32, ptr addrspace(1) %in0.gep %width = load volatile i32, ptr addrspace(1) %in0.gep - %sub = sub i32 32, %width + %width5 = and i32 %width, 31 + %sub = sub i32 32, %width5 %shl = shl i32 %src, %sub %bfe = lshr i32 %shl, %sub store i32 %bfe, ptr addrspace(1) %out.gep @@ -219,9 +221,8 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] -; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3 -; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2 -; SI-NEXT: v_ashrrev_i32_e32 v2, v3, v2 +; SI-NEXT: v_and_b32_e32 v3, 31, v3 +; SI-NEXT: v_bfe_i32 v2, v2, 0, v3 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -240,9 +241,8 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4 -; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3 -; VI-NEXT: v_ashrrev_i32_e32 v2, v2, v3 +; VI-NEXT: v_and_b32_e32 v2, 31, v4 +; VI-NEXT: v_bfe_i32 v2, v3, 0, v2 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -251,7 +251,8 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x %src = load volatile i32, ptr addrspace(1) %in0.gep %width = load volatile i32, ptr addrspace(1) %in0.gep - %sub = sub i32 32, %width + %width5 = and i32 %width, 31 + %sub = sub i32 32, %width5 %shl = shl i32 %src, %sub %bfe = ashr i32 %shl, %sub store i32 %bfe, ptr addrspace(1) %out.gep diff --git a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll index 3d9616f02d52d1..3de8db2c6a448e 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll @@ -150,22 +150,14 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; ---------------------------------------------------------------------------- ; define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { -; SI-LABEL: bzhi32_d0: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 -; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 -; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: bzhi32_d0: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 -; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 -; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] - %numhighbits = sub i32 32, %numlowbits +; GCN-LABEL: bzhi32_d0: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v1, 31, v1 +; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %numlow5bits = and i32 %numlowbits, 31 + %numhighbits = sub i32 32, %numlow5bits %highbitscleared = shl i32 %val, %numhighbits %masked = lshr i32 %highbitscleared, %numhighbits ret i32 %masked From 62db1c8a076c7167e404412182f4a8915f4ff6ee Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 7 Nov 2024 14:10:44 -0800 Subject: [PATCH 29/40] [SLP]Better decision making on whether to try stores packs for vectorization Since the stores are sorted by distance, comparing the indices in the original array and early exit, if the index is less than the index of the last store, not always the best strategy. Better to remove such stores explicitly to try better to check for the vectorization opportunity. Fixes #115008 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 15 ++++++++++----- .../SLPVectorizer/RISCV/repeated-address-store.ll | 7 +------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index be7ddeb89e789f..b2f677fb84f983 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -18515,25 +18515,30 @@ bool SLPVectorizerPass::vectorizeStores( } // Try to vectorize the first found set to avoid duplicate analysis. TryToVectorize(Set.second); + unsigned ItIdx = It->first; + int ItDist = It->second; StoreIndexToDistSet PrevSet; - PrevSet.swap(Set.second); + copy_if(Set.second, std::inserter(PrevSet, PrevSet.end()), + [&](const std::pair &Pair) { + return Pair.first > ItIdx; + }); + Set.second.clear(); Set.first = Idx; Set.second.emplace(Idx, 0); // Insert stores that followed previous match to try to vectorize them // with this store. - unsigned StartIdx = It->first + 1; + unsigned StartIdx = ItIdx + 1; SmallBitVector UsedStores(Idx - StartIdx); // Distances to previously found dup store (or this store, since they // store to the same addresses). SmallVector Dists(Idx - StartIdx, 0); for (const std::pair &Pair : reverse(PrevSet)) { // Do not try to vectorize sequences, we already tried. - if (Pair.first <= It->first || - VectorizedStores.contains(Stores[Pair.first])) + if (VectorizedStores.contains(Stores[Pair.first])) break; unsigned BI = Pair.first - StartIdx; UsedStores.set(BI); - Dists[BI] = Pair.second - It->second; + Dists[BI] = Pair.second - ItDist; } for (unsigned I = StartIdx; I < Idx; ++I) { unsigned BI = I - StartIdx; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/repeated-address-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/repeated-address-store.ll index f126192271cd95..48928d2dfd4738 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/repeated-address-store.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/repeated-address-store.ll @@ -6,12 +6,7 @@ define void @test(ptr %dest) { ; CHECK-SAME: ptr [[DEST:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[INC3:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 3 -; CHECK-NEXT: store i32 1, ptr [[INC3]], align 2 -; CHECK-NEXT: store i32 1, ptr [[DEST]], align 4 -; CHECK-NEXT: [[INC1:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 1 -; CHECK-NEXT: store i32 1, ptr [[INC1]], align 2 -; CHECK-NEXT: [[INC2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 2 -; CHECK-NEXT: store i32 1, ptr [[INC2]], align 2 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[DEST]], align 4 ; CHECK-NEXT: store i32 2, ptr [[DEST]], align 2 ; CHECK-NEXT: store i32 1, ptr [[INC3]], align 2 ; CHECK-NEXT: ret void From c02da382471fd0b338af76ce220e9567e3cb854a Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 7 Nov 2024 14:23:05 -0800 Subject: [PATCH 30/40] [RISCV] Add tests for deinterleave(2-8) shuffles --- .../rvv/fixed-vectors-shuffle-deinterleave.ll | 308 ++++++++++++++++++ 1 file changed, 308 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll new file mode 100644 index 00000000000000..a8f75f8d1c24d1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll @@ -0,0 +1,308 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -mattr=+v \ +; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32V +; RUN: llc < %s -mtriple=riscv64 -mattr=+v \ +; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64V + +define void @deinterleave3_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave3_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: vmul.vx v9, v9, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vadd.vi v9, v9, -8 +; CHECK-NEXT: li a0, 56 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave3_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave3_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 1 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: vmadd.vx v10, a0, v9 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: li a0, 24 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave4_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave4_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vwaddu.vv v10, v8, v9 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vwmaccu.vx v10, a0, v9 +; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vsll.vi v9, v9, 2 +; CHECK-NEXT: vadd.vi v9, v9, -8 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave4_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave4_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -9 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmacc.vx v9, a0, v10 +; CHECK-NEXT: vsll.vi v10, v10, 2 +; CHECK-NEXT: vadd.vi v10, v10, 1 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v11, v8, v9, v0.t +; CHECK-NEXT: vse8.v v11, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave5_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave5_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmul.vx v9, v9, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vadd.vi v9, v9, -8 +; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave5_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave5_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 1 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmadd.vx v10, a0, v9 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v9, v8, 3, v0.t +; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave6_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave6_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: li a0, 6 +; CHECK-NEXT: vmul.vx v9, v9, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 4, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave6_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave6_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 1 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a0, 6 +; CHECK-NEXT: vmadd.vx v10, a0, v9 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v9, v8, 5, v0.t +; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave7_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave7_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: li a0, 7 +; CHECK-NEXT: vmul.vx v9, v9, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 6, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave7_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave7_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -6 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a0, 6 +; CHECK-NEXT: vmadd.vx v10, a0, v9 +; CHECK-NEXT: vmv.v.i v0, 6 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v11, v8, 1 +; CHECK-NEXT: vrgather.vv v11, v9, v10, v0.t +; CHECK-NEXT: vse8.v v11, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave8_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave8_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave8_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave8_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.v.i v0, -3 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t +; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32V: {{.*}} +; RV64V: {{.*}} From 02668f60a9b5c0d5b8b6e60b4e897f763ad59a91 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 7 Nov 2024 14:34:05 -0800 Subject: [PATCH 31/40] [RISCV] Match single source deinterleave shuffles for vnsrl (#114878) We had previously only been matching the two source case where both sources came from a wider source type. We can also match the single source case - provided the result is m4 or smaller because we will need a wider type to represent the source. The main goal of this to ensure that vnsrl matching is robust to a possible change in canonicalization for length changing shuffles that I'm considering, but it has the nice effect of picking up a few cases we missed along the way. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 68 +++++++++++-------- .../rvv/fixed-vectors-deinterleave-load.ll | 31 ++++----- .../fixed-vectors-shuffle-changes-length.ll | 42 +++++------- .../rvv/fixed-vectors-shufflevector-vnsrl.ll | 33 ++++++--- .../RISCV/rvv/vector-deinterleave-fixed.ll | 30 ++++---- 5 files changed, 112 insertions(+), 92 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5600524b69a620..48a7c1f047ff46 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4426,48 +4426,58 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, } // Is this a shuffle extracts either the even or odd elements of a vector? -// That is, specifically, either (a) or (b) below. -// t34: v8i8 = extract_subvector t11, Constant:i64<0> -// t33: v8i8 = extract_subvector t11, Constant:i64<8> -// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 -// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 -// Returns {Src Vector, Even Elements} on success -static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, - SDValue V2, ArrayRef Mask, - const RISCVSubtarget &Subtarget) { +// That is, specifically, either (a) or (b) in the options below. +// Single operand shuffle is easy: +// a) t35: v8i8 = vector_shuffle<0,2,4,6,u,u,u,u> t34, undef +// b) t35: v8i8 = vector_shuffle<1,3,5,7,u,u,u,u> t34, undef +// Double operand shuffle: +// t34: v8i8 = extract_subvector t11, Constant:i64<0> +// t33: v8i8 = extract_subvector t11, Constant:i64<8> +// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 +// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 +static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, + SDValue V2, ArrayRef Mask, + const RISCVSubtarget &Subtarget) { // Need to be able to widen the vector. if (VT.getScalarSizeInBits() >= Subtarget.getELen()) - return false; + return SDValue(); + + // First index must be the first even or odd element from V1. + if (Mask[0] != 0 && Mask[0] != 1) + return SDValue(); + + // The others must increase by 2 each time. + for (unsigned i = 1; i != Mask.size(); ++i) + if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2) + return SDValue(); + + if (1 == count_if(Mask, [](int Idx) { return Idx != -1; })) + return SDValue(); + + if (V2.isUndef() && + RISCVTargetLowering::getLMUL(ContainerVT) != RISCVII::VLMUL::LMUL_8) + return V1; // Both input must be extracts. if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) - return false; + return SDValue(); // Extracting from the same source. SDValue Src = V1.getOperand(0); if (Src != V2.getOperand(0)) - return false; + return SDValue(); // Src needs to have twice the number of elements. if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) - return false; + return SDValue(); // The extracts must extract the two halves of the source. if (V1.getConstantOperandVal(1) != 0 || V2.getConstantOperandVal(1) != Mask.size()) - return false; - - // First index must be the first even or odd element from V1. - if (Mask[0] != 0 && Mask[0] != 1) - return false; - - // The others must increase by 2 each time (or be undef). - for (unsigned i = 1; i != Mask.size(); ++i) - if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2) - return false; + return SDValue(); - return true; + return Src; } /// Is this shuffle interleaving contiguous elements from one vector into the @@ -4597,7 +4607,8 @@ static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, assert(Src.getSimpleValueType().isFixedLengthVector()); ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); - // The source is a vector of type + // The source is a vector of type (For the single source + // case, the high half is undefined) MVT SrcContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(), ContainerVT.getVectorElementCount() * 2); @@ -5300,10 +5311,9 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // If this is a deinterleave and we can widen the vector, then we can use // vnsrl to deinterleave. - if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) { - return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0, - Subtarget, DAG); - } + if (SDValue Src = + isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) + return getDeinterleaveViaVNSRL(DL, VT, Src, Mask[0] == 0, Subtarget, DAG); if (SDValue V = lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index 76720c5641563c..060a5c4224fe15 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -11,29 +11,28 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: vlm.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v8, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v12, v9, 1, v0 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vadd.vv v11, v9, v9 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vrgather.vv v9, v10, v11 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: li a0, -256 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vadd.vi v12, v11, -16 -; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t -; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vadd.vi v12, v11, 1 -; CHECK-NEXT: vrgather.vv v13, v10, v12 -; CHECK-NEXT: vadd.vi v10, v11, -15 -; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmsne.vi v8, v13, 0 +; CHECK-NEXT: vadd.vi v9, v11, -16 +; CHECK-NEXT: vrgather.vv v8, v10, v9, v0.t +; CHECK-NEXT: vmsne.vi v9, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v12, 8 +; CHECK-NEXT: vadd.vi v11, v11, -15 +; CHECK-NEXT: vrgather.vv v8, v10, v11, v0.t +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: ret %vec = load <32 x i1>, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index dcd35b4558e5ea..c9e6a8730eec7e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -99,45 +99,39 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, 1 -; RV32-NEXT: vmv.v.i v14, 6 +; RV32-NEXT: vmv.v.i v13, 6 ; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v14, v12, 1 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vadd.vi v15, v12, 1 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vrgatherei16.vv v12, v8, v15 +; RV32-NEXT: vslideup.vi v13, v12, 1 +; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmv4r.v v20, v8 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vmv2r.v v22, v14 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.i v0, 10 -; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 8 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vrgatherei16.vv v12, v8, v14, v0.t -; RV32-NEXT: vmv1r.v v8, v12 +; RV32-NEXT: vnsrl.wx v8, v20, a0 +; RV32-NEXT: vrgatherei16.vv v8, v16, v13, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: v4i32_v16i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vadd.vv v12, v12, v12 -; RV64-NEXT: vadd.vi v14, v12, 1 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vrgatherei16.vv v12, v8, v14 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.v.i v0, 10 ; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 8 +; RV64-NEXT: vslidedown.vi v16, v8, 8 +; RV64-NEXT: vmv4r.v v20, v8 +; RV64-NEXT: li a0, 32 +; RV64-NEXT: vmv2r.v v22, v12 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vnsrl.wx v8, v20, a0 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v0, 10 ; RV64-NEXT: li a0, 3 ; RV64-NEXT: slli a0, a0, 33 ; RV64-NEXT: addi a0, a0, 1 ; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: vrgatherei16.vv v12, v8, v10, v0.t -; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: vrgatherei16.vv v8, v16, v10, v0.t ; RV64-NEXT: ret %2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> ret <4 x i32> %2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index 3af3540e1964b6..15c2c2298c0dd6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -444,10 +444,8 @@ define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <8 x i8>, ptr %in, align 1 @@ -461,10 +459,8 @@ define void @vnsrl_0_i8_single_src2(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <8 x i8>, ptr %in, align 1 @@ -472,3 +468,24 @@ entry: store <8 x i8> %shuffle.i5, ptr %out, align 1 ret void } + +; Can't match the m8 result type as the source would have to be m16 which +; isn't a legal type. +define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_0_i32_single_src_m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vadd.vv v16, v16, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 +; CHECK-NEXT: vse32.v v24, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <64 x i32>, ptr %in, align 4 + %shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> + store <64 x i32> %shuffle.i5, ptr %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index b2973826d65ded..075e463e41a6b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -7,28 +7,28 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; CHECK-LABEL: vector_deinterleave_v16i1_v32i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vv v11, v9, v9 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vrgather.vv v9, v10, v11 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v12, v9, 1, v0 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vv v11, v9, v9 ; CHECK-NEXT: li a0, -256 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vadd.vi v12, v11, -16 -; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t -; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vadd.vi v12, v11, 1 -; CHECK-NEXT: vrgather.vv v13, v10, v12 -; CHECK-NEXT: vadd.vi v10, v11, -15 -; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmsne.vi v8, v13, 0 +; CHECK-NEXT: vadd.vi v9, v11, -16 +; CHECK-NEXT: vrgather.vv v8, v10, v9, v0.t +; CHECK-NEXT: vmsne.vi v9, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v12, 8 +; CHECK-NEXT: vadd.vi v11, v11, -15 +; CHECK-NEXT: vrgather.vv v8, v10, v11, v0.t +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: ret %retval = call {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1> %vec) From e189d61924ba0165b3a344c3d945b3e2aa373485 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 7 Nov 2024 14:40:00 -0800 Subject: [PATCH 32/40] [memprof] Add extractCallsFromIR (#115218) This patch adds extractCallsFromIR, a function to extract calls from the IR, which will be used to undrift call site locations in the MemProf profile. In a nutshell, the MemProf undrifting works as follows: - Extract call site locations from the IR. - Extract call site locations from the MemProf profile. - Undrift the call site locations with longestCommonSequence. This patch implements the first bullet point above. Specifically, given the IR, the new function returns a map from caller GUIDs to lists of corresponding call sites. For example: Given: foo() { f1(); f2(); f3(); } extractCallsFromIR returns: Caller: foo -> {{(Line 1, Column 3), Callee: f1}, {(Line 2, Column 3), Callee: f2}, {(Line 2, Column 9), Callee: f3}} where the line numbers, relative to the beginning of the caller, and column numbers are sorted in the ascending order. The value side of the map -- the list of call sites -- can be directly passed to longestCommonSequence. To facilitate the review process, I've only implemented basic features in extractCallsFromIR in this patch. - The new function extracts calls from the LLVM "call" instructions only. It does not look into the inline stack. - It does not recognize or treat heap allocation functions in any special way. I will address these missing features in subsequent patches. --- .../Transforms/Instrumentation/MemProfiler.h | 32 ++++++ .../Instrumentation/MemProfiler.cpp | 47 ++++++++ .../Transforms/Instrumentation/CMakeLists.txt | 1 + .../Instrumentation/MemProfUseTest.cpp | 104 ++++++++++++++++++ 4 files changed, 184 insertions(+) create mode 100644 llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h index f92c6b4775a2a2..f168ffc4fdb1ef 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -57,6 +57,38 @@ class MemProfUsePass : public PassInfoMixin { IntrusiveRefCntPtr FS; }; +namespace memprof { + +struct LineLocation { + LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Column(D) {} + + bool operator<(const LineLocation &O) const { + return LineOffset < O.LineOffset || + (LineOffset == O.LineOffset && Column < O.Column); + } + + bool operator==(const LineLocation &O) const { + return LineOffset == O.LineOffset && Column == O.Column; + } + + bool operator!=(const LineLocation &O) const { + return LineOffset != O.LineOffset || Column != O.Column; + } + + uint64_t getHashCode() const { return ((uint64_t)Column << 32) | LineOffset; } + + uint32_t LineOffset; + uint32_t Column; +}; + +// A pair of a call site location and its corresponding callee GUID. +using CallEdgeTy = std::pair; + +// Extract all calls from the IR. Arrange them in a map from caller GUIDs to a +// list of call sites, each of the form {LineLocation, CalleeGUID}. +DenseMap> extractCallsFromIR(Module &M); + +} // namespace memprof } // namespace llvm #endif diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 70bee30fd151f6..0b4d3ff201e622 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -795,6 +795,53 @@ struct AllocMatchInfo { bool Matched = false; }; +DenseMap> +memprof::extractCallsFromIR(Module &M) { + DenseMap> Calls; + + auto GetOffset = [](const DILocation *DIL) { + return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & + 0xffff; + }; + + for (Function &F : M) { + if (F.isDeclaration()) + continue; + + for (auto &BB : F) { + for (auto &I : BB) { + const DILocation *DIL = I.getDebugLoc(); + if (!DIL) + continue; + + if (!isa(&I) || isa(&I)) + continue; + + auto *CB = dyn_cast(&I); + auto *CalledFunction = CB->getCalledFunction(); + // Disregard indirect calls and intrinsics. + if (!CalledFunction || CalledFunction->isIntrinsic()) + continue; + + StringRef CalleeName = CalledFunction->getName(); + uint64_t CallerGUID = + IndexedMemProfRecord::getGUID(DIL->getSubprogramLinkageName()); + uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName); + LineLocation Loc = {GetOffset(DIL), DIL->getColumn()}; + Calls[CallerGUID].emplace_back(Loc, CalleeGUID); + } + } + } + + // Sort each call list by the source location. + for (auto &[CallerGUID, CallList] : Calls) { + llvm::sort(CallList); + CallList.erase(llvm::unique(CallList), CallList.end()); + } + + return Calls; +} + static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, diff --git a/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt b/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt index 1f249b0049d062..80fac2353be416 100644 --- a/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_unittest(InstrumentationTests + MemProfUseTest.cpp PGOInstrumentationTest.cpp ) diff --git a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp new file mode 100644 index 00000000000000..a510a57099aba4 --- /dev/null +++ b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp @@ -0,0 +1,104 @@ +//===- MemProfUseTest.cpp - MemProf use tests -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/ProfileData/MemProf.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Transforms/Instrumentation/MemProfiler.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { +using namespace llvm; +using namespace llvm::memprof; +using testing::FieldsAre; +using testing::Pair; +using testing::SizeIs; + +TEST(MemProf, ExtractDirectCallsFromIR) { + // The following IR is generated from: + // + // void f1(); + // void f2(); + // void f3(); + // + // void foo() { + // f1(); + // f2(); f3(); + // } + StringRef IR = R"IR( +define dso_local void @_Z3foov() !dbg !10 { +entry: + call void @_Z2f1v(), !dbg !13 + call void @_Z2f2v(), !dbg !14 + call void @_Z2f3v(), !dbg !15 + ret void, !dbg !16 +} + +declare !dbg !17 void @_Z2f1v() + +declare !dbg !18 void @_Z2f2v() + +declare !dbg !19 void @_Z2f3v() + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "foobar.cc", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"MemProfProfileFilename", !"memprof.profraw"} +!6 = !{i32 8, !"PIC Level", i32 2} +!7 = !{i32 7, !"PIE Level", i32 2} +!8 = !{i32 7, !"uwtable", i32 2} +!9 = !{!"clang"} +!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 5, type: !11, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!11 = !DISubroutineType(types: !12) +!12 = !{} +!13 = !DILocation(line: 6, column: 3, scope: !10) +!14 = !DILocation(line: 7, column: 3, scope: !10) +!15 = !DILocation(line: 7, column: 9, scope: !10) +!16 = !DILocation(line: 8, column: 1, scope: !10) +!17 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !1, file: !1, line: 1, type: !11, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) +!18 = !DISubprogram(name: "f2", linkageName: "_Z2f2v", scope: !1, file: !1, line: 2, type: !11, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) +!19 = !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !11, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) +)IR"; + + LLVMContext Ctx; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString(IR, Err, Ctx); + ASSERT_TRUE(M); + + auto Calls = extractCallsFromIR(*M); + + // Expect exactly one caller. + ASSERT_THAT(Calls, SizeIs(1)); + + auto It = Calls.begin(); + ASSERT_NE(It, Calls.end()); + + const auto &[CallerGUID, CallSites] = *It; + EXPECT_EQ(CallerGUID, IndexedMemProfRecord::getGUID("_Z3foov")); + ASSERT_THAT(CallSites, SizeIs(3)); + + // Verify that call sites show up in the ascending order of their source + // locations. + EXPECT_THAT(CallSites[0], + Pair(FieldsAre(1U, 3U), IndexedMemProfRecord::getGUID("_Z2f1v"))); + EXPECT_THAT(CallSites[1], + Pair(FieldsAre(2U, 3U), IndexedMemProfRecord::getGUID("_Z2f2v"))); + EXPECT_THAT(CallSites[2], + Pair(FieldsAre(2U, 9U), IndexedMemProfRecord::getGUID("_Z2f3v"))); +} +} // namespace From 53e49f15ab0b9b03e5671faea6f7870914b8f0ea Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Thu, 7 Nov 2024 14:40:21 -0800 Subject: [PATCH 33/40] [clang][serialization] Pass `ASTContext` explicitly (#115235) This patch removes `ASTWriter::Context` and starts passing `ASTContext &` explicitly to functions that actually need it. This is a non-functional change with the end-goal of being able to write lightweight PCM files with no `ASTContext` at all. --- .../clang/Serialization/ASTRecordWriter.h | 7 +- clang/include/clang/Serialization/ASTWriter.h | 28 ++--- clang/lib/Serialization/ASTWriter.cpp | 118 +++++++++--------- clang/lib/Serialization/ASTWriterDecl.cpp | 55 ++++---- clang/lib/Serialization/ASTWriterStmt.cpp | 15 +-- 5 files changed, 111 insertions(+), 112 deletions(-) diff --git a/clang/include/clang/Serialization/ASTRecordWriter.h b/clang/include/clang/Serialization/ASTRecordWriter.h index d6090ba1a6c690..67720a0aebc1ca 100644 --- a/clang/include/clang/Serialization/ASTRecordWriter.h +++ b/clang/include/clang/Serialization/ASTRecordWriter.h @@ -60,8 +60,9 @@ class ASTRecordWriter public: /// Construct a ASTRecordWriter that uses the default encoding scheme. - ASTRecordWriter(ASTWriter &W, ASTWriter::RecordDataImpl &Record) - : DataStreamBasicWriter(W.getASTContext()), Writer(&W), Record(&Record) {} + ASTRecordWriter(ASTContext &Context, ASTWriter &W, + ASTWriter::RecordDataImpl &Record) + : DataStreamBasicWriter(Context), Writer(&W), Record(&Record) {} /// Construct a ASTRecordWriter that uses the same encoding scheme as another /// ASTRecordWriter. @@ -208,7 +209,7 @@ class ASTRecordWriter /// Emit a reference to a type. void AddTypeRef(QualType T) { - return Writer->AddTypeRef(T, *Record); + return Writer->AddTypeRef(getASTContext(), T, *Record); } void writeQualType(QualType T) { AddTypeRef(T); diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index d0e841f367c1e0..dc9fcd3c33726e 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -119,9 +119,6 @@ class ASTWriter : public ASTDeserializationListener, /// The PCM manager which manages memory buffers for pcm files. InMemoryModuleCache &ModuleCache; - /// The ASTContext we're writing. - ASTContext *Context = nullptr; - /// The preprocessor we're writing. Preprocessor *PP = nullptr; @@ -545,7 +542,7 @@ class ASTWriter : public ASTDeserializationListener, unsigned getSubmoduleID(Module *Mod); /// Write the given subexpression to the bitstream. - void WriteSubStmt(Stmt *S); + void WriteSubStmt(ASTContext &Context, Stmt *S); void WriteBlockInfoBlock(); void WriteControlBlock(Preprocessor &PP, StringRef isysroot); @@ -564,25 +561,25 @@ class ASTWriter : public ASTDeserializationListener, void WriteHeaderSearch(const HeaderSearch &HS); void WritePreprocessorDetail(PreprocessingRecord &PPRec, uint64_t MacroOffsetsBase); - void WriteSubmodules(Module *WritingModule); + void WriteSubmodules(Module *WritingModule, ASTContext &Context); void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, bool isModule); unsigned TypeExtQualAbbrev = 0; void WriteTypeAbbrevs(); - void WriteType(QualType T); + void WriteType(ASTContext &Context, QualType T); bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC); - void GenerateNameLookupTable(const DeclContext *DC, + void GenerateNameLookupTable(ASTContext &Context, const DeclContext *DC, llvm::SmallVectorImpl &LookupTable); uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, const DeclContext *DC); uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC); void WriteTypeDeclOffsets(); void WriteFileDeclIDsMap(); - void WriteComments(); + void WriteComments(ASTContext &Context); void WriteSelectors(Sema &SemaRef); void WriteReferencedSelectorsPool(Sema &SemaRef); void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver &IdResolver, @@ -590,8 +587,10 @@ class ASTWriter : public ASTDeserializationListener, void WriteDeclAndTypes(ASTContext &Context); void PrepareWritingSpecialDecls(Sema &SemaRef); void WriteSpecialDeclRecords(Sema &SemaRef); - void WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord); - void WriteDeclContextVisibleUpdate(const DeclContext *DC); + void WriteDeclUpdatesBlocks(ASTContext &Context, + RecordDataImpl &OffsetsRecord); + void WriteDeclContextVisibleUpdate(ASTContext &Context, + const DeclContext *DC); void WriteFPPragmaOptions(const FPOptionsOverride &Opts); void WriteOpenCLExtensions(Sema &SemaRef); void WriteCUDAPragmas(Sema &SemaRef); @@ -653,11 +652,6 @@ class ASTWriter : public ASTDeserializationListener, bool GeneratingReducedBMI = false); ~ASTWriter() override; - ASTContext &getASTContext() const { - assert(Context && "requested AST context when not writing AST"); - return *Context; - } - const LangOptions &getLangOpts() const; /// Get a timestamp for output into the AST file. The actual timestamp @@ -723,10 +717,10 @@ class ASTWriter : public ASTDeserializationListener, uint32_t getMacroDirectivesOffset(const IdentifierInfo *Name); /// Emit a reference to a type. - void AddTypeRef(QualType T, RecordDataImpl &Record); + void AddTypeRef(ASTContext &Context, QualType T, RecordDataImpl &Record); /// Force a type to be emitted and get its ID. - serialization::TypeID GetOrCreateTypeID(QualType T); + serialization::TypeID GetOrCreateTypeID(ASTContext &Context, QualType T); /// Find the first local declaration of a given local redeclarable /// decl. diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index b95e29cbc02515..016d1d4acad137 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -277,8 +277,8 @@ class ASTTypeWriter { ASTRecordWriter BasicWriter; public: - ASTTypeWriter(ASTWriter &Writer) - : Writer(Writer), BasicWriter(Writer, Record) {} + ASTTypeWriter(ASTContext &Context, ASTWriter &Writer) + : Writer(Writer), BasicWriter(Context, Writer, Record) {} uint64_t write(QualType T) { if (T.hasLocalNonFastQualifiers()) { @@ -2872,7 +2872,7 @@ static unsigned getNumberOfModules(Module *Mod) { return ChildModules + 1; } -void ASTWriter::WriteSubmodules(Module *WritingModule) { +void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext &Context) { // Enter the submodule description block. Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5); @@ -3124,7 +3124,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { // Emit the reachable initializers. // The initializer may only be unreachable in reduced BMI. RecordData Inits; - for (Decl *D : Context->getModuleInitializers(Mod)) + for (Decl *D : Context.getModuleInitializers(Mod)) if (wasDeclEmitted(D)) AddDeclRef(D, Inits); if (!Inits.empty()) @@ -3259,7 +3259,7 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, //===----------------------------------------------------------------------===// /// Write the representation of a type to the AST stream. -void ASTWriter::WriteType(QualType T) { +void ASTWriter::WriteType(ASTContext &Context, QualType T) { TypeIdx &IdxRef = TypeIdxs[T]; if (IdxRef.getValue() == 0) // we haven't seen this type before. IdxRef = TypeIdx(0, NextTypeID++); @@ -3269,7 +3269,8 @@ void ASTWriter::WriteType(QualType T) { assert(Idx.getValue() >= FirstTypeID && "Writing predefined type"); // Emit the type's representation. - uint64_t Offset = ASTTypeWriter(*this).write(T) - DeclTypesBlockStartOffset; + uint64_t Offset = + ASTTypeWriter(Context, *this).write(T) - DeclTypesBlockStartOffset; // Record the offset for this type. uint64_t Index = Idx.getValue() - FirstTypeID; @@ -3393,7 +3394,7 @@ void ASTWriter::WriteFileDeclIDsMap() { Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs)); } -void ASTWriter::WriteComments() { +void ASTWriter::WriteComments(ASTContext &Context) { Stream.EnterSubblock(COMMENTS_BLOCK_ID, 3); auto _ = llvm::make_scope_exit([this] { Stream.ExitBlock(); }); if (!PP->getPreprocessorOpts().WriteCommentListToPCH) @@ -3406,7 +3407,7 @@ void ASTWriter::WriteComments() { return; RecordData Record; - for (const auto &FO : Context->Comments.OrderedComments) { + for (const auto &FO : Context.Comments.OrderedComments) { for (const auto &OC : FO.second) { const RawComment *I = OC.second; Record.clear(); @@ -3656,7 +3657,7 @@ void ASTWriter::WriteReferencedSelectorsPool(Sema &SemaRef) { return; RecordData Record; - ASTRecordWriter Writer(*this, Record); + ASTRecordWriter Writer(SemaRef.Context, *this, Record); // Note: this writes out all references even for a dependent AST. But it is // very tricky to fix, and given that @selector shouldn't really appear in @@ -4137,9 +4138,9 @@ static bool isLookupResultNotInteresting(ASTWriter &Writer, return true; } -void -ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC, - llvm::SmallVectorImpl &LookupTable) { +void ASTWriter::GenerateNameLookupTable( + ASTContext &Context, const DeclContext *ConstDC, + llvm::SmallVectorImpl &LookupTable) { assert(!ConstDC->hasLazyLocalLexicalLookups() && !ConstDC->hasLazyExternalLexicalLookups() && "must call buildLookups first"); @@ -4234,8 +4235,8 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC, // another declaration in the redecl chain. Any non-implicit constructor or // conversion function which doesn't occur in all the lexical contexts // would be an ODR violation. - auto ImplicitCtorName = Context->DeclarationNames.getCXXConstructorName( - Context->getCanonicalType(Context->getRecordType(D))); + auto ImplicitCtorName = Context.DeclarationNames.getCXXConstructorName( + Context.getCanonicalType(Context.getRecordType(D))); if (ConstructorNameSet.erase(ImplicitCtorName)) Names.push_back(ImplicitCtorName); @@ -4415,7 +4416,7 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context, // Create the on-disk hash table in a buffer. SmallString<4096> LookupTable; - GenerateNameLookupTable(DC, LookupTable); + GenerateNameLookupTable(Context, DC, LookupTable); // Write the lookup table RecordData::value_type Record[] = {DECL_CONTEXT_VISIBLE}; @@ -4431,14 +4432,15 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context, /// DeclContext in a dependent AST file. As such, they only exist for the TU /// (in C++), for namespaces, and for classes with forward-declared unscoped /// enumeration members (in C++11). -void ASTWriter::WriteDeclContextVisibleUpdate(const DeclContext *DC) { +void ASTWriter::WriteDeclContextVisibleUpdate(ASTContext &Context, + const DeclContext *DC) { StoredDeclsMap *Map = DC->getLookupPtr(); if (!Map || Map->empty()) return; // Create the on-disk hash table in a buffer. SmallString<4096> LookupTable; - GenerateNameLookupTable(DC, LookupTable); + GenerateNameLookupTable(Context, DC, LookupTable); // If we're updating a namespace, select a key declaration as the key for the // update record; those are the only ones that will be checked on reload. @@ -4753,15 +4755,14 @@ void ASTWriter::AddString(StringRef Str, RecordDataImpl &Record) { } bool ASTWriter::PreparePathForOutput(SmallVectorImpl &Path) { - assert(Context && "should have context when outputting path"); + assert(WritingAST && "can't prepare path for output when not writing AST"); // Leave special file names as they are. StringRef PathStr(Path.data(), Path.size()); if (PathStr == "" || PathStr == "") return false; - bool Changed = - cleanPathForOutput(Context->getSourceManager().getFileManager(), Path); + bool Changed = cleanPathForOutput(PP->getFileManager(), Path); // Remove a prefix to make the path relative, if relevant. const char *PathBegin = Path.data(); @@ -4850,7 +4851,7 @@ ASTWriter::~ASTWriter() = default; const LangOptions &ASTWriter::getLangOpts() const { assert(WritingAST && "can't determine lang opts when not writing AST"); - return Context->getLangOpts(); + return PP->getLangOpts(); } time_t ASTWriter::getTimestampForOutput(const FileEntry *E) const { @@ -4874,11 +4875,9 @@ ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef, StringRef OutputFile, WriteBlockInfoBlock(); - Context = &SemaRef.Context; PP = &SemaRef.PP; this->WritingModule = WritingModule; ASTFileSignature Signature = WriteASTCore(SemaRef, isysroot, WritingModule); - Context = nullptr; PP = nullptr; this->WritingModule = nullptr; this->BaseDirectory.clear(); @@ -5417,14 +5416,14 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, // Form the record of special types. RecordData SpecialTypes; - AddTypeRef(Context.getRawCFConstantStringType(), SpecialTypes); - AddTypeRef(Context.getFILEType(), SpecialTypes); - AddTypeRef(Context.getjmp_bufType(), SpecialTypes); - AddTypeRef(Context.getsigjmp_bufType(), SpecialTypes); - AddTypeRef(Context.ObjCIdRedefinitionType, SpecialTypes); - AddTypeRef(Context.ObjCClassRedefinitionType, SpecialTypes); - AddTypeRef(Context.ObjCSelRedefinitionType, SpecialTypes); - AddTypeRef(Context.getucontext_tType(), SpecialTypes); + AddTypeRef(Context, Context.getRawCFConstantStringType(), SpecialTypes); + AddTypeRef(Context, Context.getFILEType(), SpecialTypes); + AddTypeRef(Context, Context.getjmp_bufType(), SpecialTypes); + AddTypeRef(Context, Context.getsigjmp_bufType(), SpecialTypes); + AddTypeRef(Context, Context.ObjCIdRedefinitionType, SpecialTypes); + AddTypeRef(Context, Context.ObjCClassRedefinitionType, SpecialTypes); + AddTypeRef(Context, Context.ObjCSelRedefinitionType, SpecialTypes); + AddTypeRef(Context, Context.getucontext_tType(), SpecialTypes); PrepareWritingSpecialDecls(SemaRef); @@ -5523,7 +5522,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, WriteFileDeclIDsMap(); WriteSourceManagerBlock(PP.getSourceManager()); - WriteComments(); + WriteComments(Context); WritePreprocessor(PP, isModule); WriteHeaderSearch(PP.getHeaderSearchInfo()); WriteSelectors(SemaRef); @@ -5536,7 +5535,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, // If we're emitting a module, write out the submodule information. if (WritingModule) - WriteSubmodules(WritingModule); + WriteSubmodules(WritingModule, SemaRef.Context); Stream.EmitRecord(SPECIAL_TYPES, SpecialTypes); @@ -5656,12 +5655,12 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { WriteTypeAbbrevs(); WriteDeclAbbrevs(); do { - WriteDeclUpdatesBlocks(DeclUpdatesOffsetsRecord); + WriteDeclUpdatesBlocks(Context, DeclUpdatesOffsetsRecord); while (!DeclTypesToEmit.empty()) { DeclOrType DOT = DeclTypesToEmit.front(); DeclTypesToEmit.pop(); if (DOT.isType()) - WriteType(DOT.getType()); + WriteType(Context, DOT.getType()); else WriteDecl(Context, DOT.getDecl()); } @@ -5757,18 +5756,19 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv)); // And a visible updates block for the translation unit. - WriteDeclContextVisibleUpdate(TU); + WriteDeclContextVisibleUpdate(Context, TU); // If we have any extern "C" names, write out a visible update for them. if (Context.ExternCContext) - WriteDeclContextVisibleUpdate(Context.ExternCContext); + WriteDeclContextVisibleUpdate(Context, Context.ExternCContext); // Write the visible updates to DeclContexts. for (auto *DC : UpdatedDeclContexts) - WriteDeclContextVisibleUpdate(DC); + WriteDeclContextVisibleUpdate(Context, DC); } -void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { +void ASTWriter::WriteDeclUpdatesBlocks(ASTContext &Context, + RecordDataImpl &OffsetsRecord) { if (DeclUpdates.empty()) return; @@ -5781,7 +5781,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { bool HasUpdatedBody = false; bool HasAddedVarDefinition = false; RecordData RecordData; - ASTRecordWriter Record(*this, RecordData); + ASTRecordWriter Record(Context, *this, RecordData); for (auto &Update : DeclUpdate.second) { DeclUpdateKind Kind = (DeclUpdateKind)Update.getKind(); @@ -5827,7 +5827,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { Record.push_back(RD->isParamDestroyedInCallee()); Record.push_back(llvm::to_underlying(RD->getArgPassingRestrictions())); Record.AddCXXDefinitionData(RD); - Record.AddOffset(WriteDeclContextLexicalBlock(*Context, RD)); + Record.AddOffset(WriteDeclContextLexicalBlock(Context, RD)); // This state is sometimes updated by template instantiation, when we // switch from the specialization referring to the template declaration @@ -5880,7 +5880,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { } case UPD_CXX_DEDUCED_RETURN_TYPE: - Record.push_back(GetOrCreateTypeID(Update.getType())); + Record.push_back(GetOrCreateTypeID(Context, Update.getType())); break; case UPD_DECL_MARKED_USED: @@ -6022,8 +6022,7 @@ ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) { unsigned ModuleFileIndex = 0; // See SourceLocationEncoding.h for the encoding details. - if (Context->getSourceManager().isLoadedSourceLocation(Loc) && - Loc.isValid()) { + if (PP->getSourceManager().isLoadedSourceLocation(Loc) && Loc.isValid()) { assert(getChain()); auto SLocMapI = getChain()->GlobalSLocOffsetMap.find( SourceManager::MaxLoadedOffset - Loc.getOffset() - 1); @@ -6184,8 +6183,9 @@ void ASTRecordWriter::AddTypeLoc(TypeLoc TL, LocSeq *OuterSeq) { TLW.Visit(TL); } -void ASTWriter::AddTypeRef(QualType T, RecordDataImpl &Record) { - Record.push_back(GetOrCreateTypeID(T)); +void ASTWriter::AddTypeRef(ASTContext &Context, QualType T, + RecordDataImpl &Record) { + Record.push_back(GetOrCreateTypeID(Context, T)); } template @@ -6213,9 +6213,8 @@ static TypeID MakeTypeID(ASTContext &Context, QualType T, return IdxForType(T).asTypeID(FastQuals); } -TypeID ASTWriter::GetOrCreateTypeID(QualType T) { - assert(Context); - return MakeTypeID(*Context, T, [&](QualType T) -> TypeIdx { +TypeID ASTWriter::GetOrCreateTypeID(ASTContext &Context, QualType T) { + return MakeTypeID(Context, T, [&](QualType T) -> TypeIdx { if (T.isNull()) return TypeIdx(); assert(!T.getLocalFastQualifiers()); @@ -6335,7 +6334,7 @@ void ASTWriter::associateDeclWithFile(const Decl *D, LocalDeclID ID) { if (isa(D)) return; - SourceManager &SM = Context->getSourceManager(); + SourceManager &SM = PP->getSourceManager(); SourceLocation FileLoc = SM.getFileLoc(Loc); assert(SM.isLocalSourceLocation(FileLoc)); FileID FID; @@ -6530,10 +6529,10 @@ void ASTRecordWriter::AddCXXBaseSpecifier(const CXXBaseSpecifier &Base) { : SourceLocation()); } -static uint64_t EmitCXXBaseSpecifiers(ASTWriter &W, +static uint64_t EmitCXXBaseSpecifiers(ASTContext &Context, ASTWriter &W, ArrayRef Bases) { ASTWriter::RecordData Record; - ASTRecordWriter Writer(W, Record); + ASTRecordWriter Writer(Context, W, Record); Writer.push_back(Bases.size()); for (auto &Base : Bases) @@ -6544,14 +6543,14 @@ static uint64_t EmitCXXBaseSpecifiers(ASTWriter &W, // FIXME: Move this out of the main ASTRecordWriter interface. void ASTRecordWriter::AddCXXBaseSpecifiers(ArrayRef Bases) { - AddOffset(EmitCXXBaseSpecifiers(*Writer, Bases)); + AddOffset(EmitCXXBaseSpecifiers(getASTContext(), *Writer, Bases)); } static uint64_t -EmitCXXCtorInitializers(ASTWriter &W, +EmitCXXCtorInitializers(ASTContext &Context, ASTWriter &W, ArrayRef CtorInits) { ASTWriter::RecordData Record; - ASTRecordWriter Writer(W, Record); + ASTRecordWriter Writer(Context, W, Record); Writer.push_back(CtorInits.size()); for (auto *Init : CtorInits) { @@ -6585,7 +6584,7 @@ EmitCXXCtorInitializers(ASTWriter &W, // FIXME: Move this out of the main ASTRecordWriter interface. void ASTRecordWriter::AddCXXCtorInitializers( ArrayRef CtorInits) { - AddOffset(EmitCXXCtorInitializers(*Writer, CtorInits)); + AddOffset(EmitCXXCtorInitializers(getASTContext(), *Writer, CtorInits)); } void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { @@ -6613,18 +6612,17 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { bool ModulesCodegen = !D->isDependentType() && - (Writer->Context->getLangOpts().ModulesDebugInfo || - D->isInNamedModule()); + (Writer->getLangOpts().ModulesDebugInfo || D->isInNamedModule()); Record->push_back(ModulesCodegen); if (ModulesCodegen) Writer->AddDeclRef(D, Writer->ModularCodegenDecls); // IsLambda bit is already saved. - AddUnresolvedSet(Data.Conversions.get(*Writer->Context)); + AddUnresolvedSet(Data.Conversions.get(getASTContext())); Record->push_back(Data.ComputedVisibleConversions); if (Data.ComputedVisibleConversions) - AddUnresolvedSet(Data.VisibleConversions.get(*Writer->Context)); + AddUnresolvedSet(Data.VisibleConversions.get(getASTContext())); // Data.Definition is the owning decl, no need to write it. if (!Data.IsLambda) { diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index b9ce3db41ef916..ad357e30d57529 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -34,7 +34,6 @@ using namespace serialization; namespace clang { class ASTDeclWriter : public DeclVisitor { ASTWriter &Writer; - ASTContext &Context; ASTRecordWriter Record; serialization::DeclCode Code; @@ -45,7 +44,7 @@ namespace clang { public: ASTDeclWriter(ASTWriter &Writer, ASTContext &Context, ASTWriter::RecordDataImpl &Record, bool GeneratingReducedBMI) - : Writer(Writer), Context(Context), Record(Writer, Record), + : Writer(Writer), Record(Context, Writer, Record), Code((serialization::DeclCode)0), AbbrevToUse(0), GeneratingReducedBMI(GeneratingReducedBMI) {} @@ -217,7 +216,7 @@ namespace clang { // If we have any lazy specializations, and the external AST source is // our chained AST reader, we can just write out the DeclIDs. Otherwise, // we need to resolve them to actual declarations. - if (Writer.Chain != Writer.Context->getExternalSource() && + if (Writer.Chain != Record.getASTContext().getExternalSource() && Common->LazySpecializations) { D->LoadLazySpecializations(); assert(!Common->LazySpecializations); @@ -811,8 +810,8 @@ void ASTDeclWriter::VisitObjCMethodDecl(ObjCMethodDecl *D) { Record.push_back(D->isRedeclaration()); Record.push_back(D->hasRedeclaration()); if (D->hasRedeclaration()) { - assert(Context.getObjCMethodRedeclaration(D)); - Record.AddDeclRef(Context.getObjCMethodRedeclaration(D)); + assert(Record.getASTContext().getObjCMethodRedeclaration(D)); + Record.AddDeclRef(Record.getASTContext().getObjCMethodRedeclaration(D)); } // FIXME: stable encoding for @required/@optional @@ -1039,7 +1038,8 @@ void ASTDeclWriter::VisitFieldDecl(FieldDecl *D) { Record.AddStmt(D->getBitWidth()); if (!D->getDeclName() || D->isPlaceholderVar(Writer.getLangOpts())) - Record.AddDeclRef(Context.getInstantiatedFromUnnamedFieldDecl(D)); + Record.AddDeclRef( + Record.getASTContext().getInstantiatedFromUnnamedFieldDecl(D)); if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && @@ -1118,11 +1118,11 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) { // strong definition in the module interface is provided by the // compilation of that unit, not by its users. (Inline variables are still // emitted in module users.) - ModulesCodegen = - (Writer.WritingModule->isInterfaceOrPartition() || - (D->hasAttr() && - Writer.Context->getLangOpts().BuildingPCHWithObjectFile)) && - Writer.Context->GetGVALinkageForVariable(D) >= GVA_StrongExternal; + ModulesCodegen = (Writer.WritingModule->isInterfaceOrPartition() || + (D->hasAttr() && + Writer.getLangOpts().BuildingPCHWithObjectFile)) && + Record.getASTContext().GetGVALinkageForVariable(D) >= + GVA_StrongExternal; } VarDeclBits.addBit(ModulesCodegen); @@ -1163,7 +1163,7 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) { Writer.AddDeclRef(D, Writer.ModularCodegenDecls); if (D->hasAttr()) { - BlockVarCopyInit Init = Writer.Context->getBlockVarCopyInit(D); + BlockVarCopyInit Init = Record.getASTContext().getBlockVarCopyInit(D); Record.AddStmt(Init.getCopyExpr()); if (Init.getCopyExpr()) Record.push_back(Init.canThrow()); @@ -1411,7 +1411,7 @@ void ASTDeclWriter::VisitUsingDecl(UsingDecl *D) { Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName()); Record.AddDeclRef(D->FirstUsingShadow.getPointer()); Record.push_back(D->hasTypename()); - Record.AddDeclRef(Context.getInstantiatedFromUsingDecl(D)); + Record.AddDeclRef(Record.getASTContext().getInstantiatedFromUsingDecl(D)); Code = serialization::DECL_USING; } @@ -1421,7 +1421,7 @@ void ASTDeclWriter::VisitUsingEnumDecl(UsingEnumDecl *D) { Record.AddSourceLocation(D->getEnumLoc()); Record.AddTypeSourceInfo(D->getEnumType()); Record.AddDeclRef(D->FirstUsingShadow.getPointer()); - Record.AddDeclRef(Context.getInstantiatedFromUsingEnumDecl(D)); + Record.AddDeclRef(Record.getASTContext().getInstantiatedFromUsingEnumDecl(D)); Code = serialization::DECL_USING_ENUM; } @@ -1440,7 +1440,8 @@ void ASTDeclWriter::VisitUsingShadowDecl(UsingShadowDecl *D) { Record.AddDeclRef(D->getTargetDecl()); Record.push_back(D->getIdentifierNamespace()); Record.AddDeclRef(D->UsingOrNextShadow); - Record.AddDeclRef(Context.getInstantiatedFromUsingShadowDecl(D)); + Record.AddDeclRef( + Record.getASTContext().getInstantiatedFromUsingShadowDecl(D)); if (D->getDeclContext() == D->getLexicalDeclContext() && D->getFirstDecl() == D->getMostRecentDecl() && !D->hasAttrs() && @@ -1544,7 +1545,7 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { // FIXME: Avoid adding the key function if the class is defined in // module purview since in that case the key function is meaningless. if (D->isCompleteDefinition()) - Record.AddDeclRef(Context.getCurrentKeyFunction(D)); + Record.AddDeclRef(Record.getASTContext().getCurrentKeyFunction(D)); Code = serialization::DECL_CXX_RECORD; } @@ -1735,7 +1736,8 @@ void ASTDeclWriter::VisitClassTemplateDecl(ClassTemplateDecl *D) { // Force emitting the corresponding deduction guide in reduced BMI mode. // Otherwise, the deduction guide may be optimized out incorrectly. if (Writer.isGeneratingReducedBMI()) { - auto Name = Context.DeclarationNames.getCXXDeductionGuideName(D); + auto Name = + Record.getASTContext().DeclarationNames.getCXXDeductionGuideName(D); for (auto *DG : D->getDeclContext()->noload_lookup(Name)) Writer.GetDeclRef(DG->getCanonicalDecl()); } @@ -1791,8 +1793,9 @@ void ASTDeclWriter::VisitClassTemplateSpecializationDecl( // FIXME: Would it be more efficient to add a callback register function // in sema to register the deduction guide? if (Writer.isWritingStdCXXNamedModules()) { - auto Name = Context.DeclarationNames.getCXXDeductionGuideName( - D->getSpecializedTemplate()); + auto Name = + Record.getASTContext().DeclarationNames.getCXXDeductionGuideName( + D->getSpecializedTemplate()); for (auto *DG : D->getDeclContext()->noload_lookup(Name)) Writer.GetDeclRef(DG->getCanonicalDecl()); } @@ -2022,8 +2025,10 @@ void ASTDeclWriter::VisitDeclContext(DeclContext *DC) { // details. Writer.DelayedNamespace.push_back(cast(DC)); } else { - LexicalOffset = Writer.WriteDeclContextLexicalBlock(Context, DC); - VisibleOffset = Writer.WriteDeclContextVisibleBlock(Context, DC); + LexicalOffset = + Writer.WriteDeclContextLexicalBlock(Record.getASTContext(), DC); + VisibleOffset = + Writer.WriteDeclContextVisibleBlock(Record.getASTContext(), DC); } Record.AddOffset(LexicalOffset); @@ -2880,18 +2885,18 @@ void ASTRecordWriter::AddFunctionDefinition(const FunctionDecl *FD) { // strong definition in the module interface is provided by the // compilation of that unit, not by its users. (Inline functions are still // emitted in module users.) - Linkage = Writer->Context->GetGVALinkageForFunction(FD); + Linkage = getASTContext().GetGVALinkageForFunction(FD); ModulesCodegen = *Linkage >= GVA_StrongExternal; } - if (Writer->Context->getLangOpts().ModulesCodegen || + if (Writer->getLangOpts().ModulesCodegen || (FD->hasAttr() && - Writer->Context->getLangOpts().BuildingPCHWithObjectFile)) { + Writer->getLangOpts().BuildingPCHWithObjectFile)) { // Under -fmodules-codegen, codegen is performed for all non-internal, // non-always_inline functions, unless they are available elsewhere. if (!FD->hasAttr()) { if (!Linkage) - Linkage = Writer->Context->GetGVALinkageForFunction(FD); + Linkage = getASTContext().GetGVALinkageForFunction(FD); ModulesCodegen = *Linkage != GVA_Internal && *Linkage != GVA_AvailableExternally; } diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 321e0031661ee2..7f700c2977e09c 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -91,8 +91,9 @@ namespace clang { PakedBitsWriter CurrentPackingBits; public: - ASTStmtWriter(ASTWriter &Writer, ASTWriter::RecordData &Record) - : Writer(Writer), Record(Writer, Record), + ASTStmtWriter(ASTContext &Context, ASTWriter &Writer, + ASTWriter::RecordData &Record) + : Writer(Writer), Record(Context, Writer, Record), Code(serialization::STMT_NULL_PTR), AbbrevToUse(0), CurrentPackingBits(this->Record) {} @@ -2112,7 +2113,7 @@ void ASTStmtWriter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *E) { // propagted. DeclarationName Name = E->getName(); for (auto *Found : - Writer.getASTContext().getTranslationUnitDecl()->lookup(Name)) + Record.getASTContext().getTranslationUnitDecl()->lookup(Name)) if (Found->isFromASTFile()) Writer.GetDeclRef(Found); @@ -2952,9 +2953,9 @@ void ASTWriter::ClearSwitchCaseIDs() { /// Write the given substatement or subexpression to the /// bitstream. -void ASTWriter::WriteSubStmt(Stmt *S) { +void ASTWriter::WriteSubStmt(ASTContext &Context, Stmt *S) { RecordData Record; - ASTStmtWriter Writer(*this, Record); + ASTStmtWriter Writer(Context, *this, Record); ++NumStatements; if (!S) { @@ -3003,7 +3004,7 @@ void ASTRecordWriter::FlushStmts() { assert(Writer->ParentStmts.empty() && "unexpected entries in parent stmt map"); for (unsigned I = 0, N = StmtsToEmit.size(); I != N; ++I) { - Writer->WriteSubStmt(StmtsToEmit[I]); + Writer->WriteSubStmt(getASTContext(), StmtsToEmit[I]); assert(N == StmtsToEmit.size() && "record modified while being written!"); @@ -3024,7 +3025,7 @@ void ASTRecordWriter::FlushSubStmts() { // that a simple stack machine can be used when loading), and don't emit a // STMT_STOP after each one. for (unsigned I = 0, N = StmtsToEmit.size(); I != N; ++I) { - Writer->WriteSubStmt(StmtsToEmit[N - I - 1]); + Writer->WriteSubStmt(getASTContext(), StmtsToEmit[N - I - 1]); assert(N == StmtsToEmit.size() && "record modified while being written!"); } From fd799add2186356dc19e81106a1428a2edf7c20b Mon Sep 17 00:00:00 2001 From: Alexander Richardson Date: Thu, 7 Nov 2024 14:50:10 -0800 Subject: [PATCH 34/40] Reapply "[libc++abi] Stop copying headers to the build directory" This was needed before https://github.com/llvm/llvm-project/pull/115077 since the compiler-rt test build made assumptions about the build layout of libc++ and libc++abi, but now they link against a local installation of these libraries so we no longer need this workaround. The last attempt at landing this was reverted due to buildbot failures which should be fixed by https://github.com/llvm/llvm-zorg/pull/299. Pull Request: https://github.com/llvm/llvm-project/pull/115379 --- libcxxabi/CMakeLists.txt | 6 ------ libcxxabi/include/CMakeLists.txt | 13 ------------- 2 files changed, 19 deletions(-) diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index da0e8b286cddc1..50e9a296a4a13b 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -86,12 +86,6 @@ set(LIBCXXABI_STATIC_OUTPUT_NAME "c++abi" CACHE STRING "Output name for the stat set(LIBCXXABI_INSTALL_INCLUDE_DIR "${CMAKE_INSTALL_INCLUDEDIR}/c++/v1" CACHE STRING "Path to install the libc++abi headers at.") -if(LLVM_LIBRARY_OUTPUT_INTDIR) - set(LIBCXXABI_GENERATED_INCLUDE_DIR "${LLVM_BINARY_DIR}/include/c++/v1") -else() - set(LIBCXXABI_GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}/include/c++/v1") -endif() - set(LIBCXXABI_LIBCXX_LIBRARY_PATH "" CACHE PATH "The path to libc++ library.") set(LIBCXXABI_LIBRARY_VERSION "1.0" CACHE STRING "Version of libc++abi. This will be reflected in the name of the shared \ diff --git a/libcxxabi/include/CMakeLists.txt b/libcxxabi/include/CMakeLists.txt index 5b1cc2545016ec..0deb7b1eb9e715 100644 --- a/libcxxabi/include/CMakeLists.txt +++ b/libcxxabi/include/CMakeLists.txt @@ -3,20 +3,7 @@ set(files cxxabi.h ) -foreach(f ${files}) - set(src "${CMAKE_CURRENT_SOURCE_DIR}/${f}") - set(dst "${LIBCXXABI_GENERATED_INCLUDE_DIR}/${f}") - add_custom_command(OUTPUT ${dst} - DEPENDS ${src} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst} - COMMENT "Copying CXXABI header ${f}") - list(APPEND _all_includes "${dst}") -endforeach() - -add_custom_target(generate-cxxabi-headers ALL DEPENDS ${_all_includes}) - add_library(cxxabi-headers INTERFACE) -add_dependencies(cxxabi-headers generate-cxxabi-headers) target_include_directories(cxxabi-headers INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}") if (LIBCXXABI_INSTALL_HEADERS) From 3b1b1271fb552c996d9fdfa9a997f33013dd275f Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Fri, 8 Nov 2024 01:50:42 +0300 Subject: [PATCH 35/40] [Xtensa] Implement support for the BranchRelaxation. (#113450) --- llvm/include/llvm/CodeGen/MachineFunction.h | 4 + llvm/lib/CodeGen/MachineFunction.cpp | 31 +++ llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp | 3 + .../lib/Target/Xtensa/XtensaFrameLowering.cpp | 21 +- llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp | 207 +++++++++++++++ llvm/lib/Target/Xtensa/XtensaInstrInfo.h | 17 ++ .../Target/Xtensa/XtensaMachineFunctionInfo.h | 42 +++ llvm/lib/Target/Xtensa/XtensaRegisterInfo.h | 4 + .../lib/Target/Xtensa/XtensaTargetMachine.cpp | 11 + llvm/lib/Target/Xtensa/XtensaTargetMachine.h | 4 + llvm/test/CodeGen/Xtensa/branch-relaxation.ll | 246 ++++++++++++++++++ llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll | 12 +- 12 files changed, 592 insertions(+), 10 deletions(-) create mode 100644 llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h create mode 100644 llvm/test/CodeGen/Xtensa/branch-relaxation.ll diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 053e7062fb4995..21b192a27cad9d 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -868,6 +868,10 @@ class LLVM_ABI MachineFunction { /// it are renumbered. void RenumberBlocks(MachineBasicBlock *MBBFrom = nullptr); + /// Return an estimate of the function's code size, + /// taking into account block and function alignment + int64_t estimateFunctionSizeInBytes(); + /// print - Print out the MachineFunction in a format suitable for debugging /// to the specified stream. void print(raw_ostream &OS, const SlotIndexes* = nullptr) const; diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index b56888a0f71fe6..7eb1c5efb5cf71 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -378,6 +378,37 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { MBBNumberingEpoch++; } +int64_t MachineFunction::estimateFunctionSizeInBytes() { + const TargetInstrInfo &TII = *getSubtarget().getInstrInfo(); + const Align FunctionAlignment = getAlignment(); + MachineFunction::iterator MBBI = begin(), E = end(); + /// Offset - Distance from the beginning of the function to the end + /// of the basic block. + int64_t Offset = 0; + + for (; MBBI != E; ++MBBI) { + const Align Alignment = MBBI->getAlignment(); + int64_t BlockSize = 0; + + for (auto &MI : *MBBI) { + BlockSize += TII.getInstSizeInBytes(MI); + } + + int64_t OffsetBB; + if (Alignment <= FunctionAlignment) { + OffsetBB = alignTo(Offset, Alignment); + } else { + // The alignment of this MBB is larger than the function's alignment, so + // we can't tell whether or not it will insert nops. Assume that it will. + OffsetBB = alignTo(Offset, Alignment) + Alignment.value() - + FunctionAlignment.value(); + } + Offset = OffsetBB + BlockSize; + } + + return Offset; +} + /// This method iterates over the basic blocks and assigns their IsBeginSection /// and IsEndSection fields. This must be called after MBB layout is finalized /// and the SectionID's are assigned to MBBs. diff --git a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp index db86637ecf83f3..95dfafc13f3908 100644 --- a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp +++ b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp @@ -69,6 +69,9 @@ void XtensaAsmPrinter::emitMachineConstantPoolValue( const BlockAddress *BA = cast(ACPV)->getBlockAddress(); MCSym = GetBlockAddressSymbol(BA); + } else if (ACPV->isMachineBasicBlock()) { + const MachineBasicBlock *MBB = cast(ACPV)->getMBB(); + MCSym = MBB->getSymbol(); } else if (ACPV->isJumpTable()) { unsigned Idx = cast(ACPV)->getIndex(); MCSym = this->GetJTISymbol(Idx, false); diff --git a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp index f46d386c9186aa..005ba10b813133 100644 --- a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp @@ -12,6 +12,7 @@ #include "XtensaFrameLowering.h" #include "XtensaInstrInfo.h" +#include "XtensaMachineFunctionInfo.h" #include "XtensaSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -260,14 +261,26 @@ void XtensaFrameLowering::processFunctionBeforeFrameFinalized( // Set scavenging frame index if necessary. MachineFrameInfo &MFI = MF.getFrameInfo(); uint64_t MaxSPOffset = MFI.estimateStackSize(MF); + auto *XtensaFI = MF.getInfo(); + unsigned ScavSlotsNum = 0; - if (isInt<12>(MaxSPOffset)) - return; + if (!isInt<12>(MaxSPOffset)) + ScavSlotsNum = 1; + + // Far branches over 18-bit offset require a spill slot for scratch register. + bool IsLargeFunction = !isInt<18>(MF.estimateFunctionSizeInBytes()); + if (IsLargeFunction) + ScavSlotsNum = std::max(ScavSlotsNum, 1u); const TargetRegisterClass &RC = Xtensa::ARRegClass; unsigned Size = TRI->getSpillSize(RC); Align Alignment = TRI->getSpillAlign(RC); - int FI = MF.getFrameInfo().CreateStackObject(Size, Alignment, false); + for (unsigned I = 0; I < ScavSlotsNum; I++) { + int FI = MFI.CreateStackObject(Size, Alignment, false); + RS->addScavengingFrameIndex(FI); - RS->addScavengingFrameIndex(FI); + if (IsLargeFunction && + XtensaFI->getBranchRelaxationScratchFrameIndex() == -1) + XtensaFI->setBranchRelaxationScratchFrameIndex(FI); + } } diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp index b2b4376ca040b6..4c440da715fefe 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp @@ -13,11 +13,14 @@ //===----------------------------------------------------------------------===// #include "XtensaInstrInfo.h" +#include "XtensaConstantPoolValue.h" +#include "XtensaMachineFunctionInfo.h" #include "XtensaTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #define GET_INSTRINFO_CTOR_DTOR #include "XtensaGenInstrInfo.inc" @@ -186,6 +189,18 @@ void XtensaInstrInfo::loadImmediate(MachineBasicBlock &MBB, } } +unsigned XtensaInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case TargetOpcode::INLINEASM: { // Inline Asm: Variable size. + const MachineFunction *MF = MI.getParent()->getParent(); + const char *AsmStr = MI.getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + default: + return MI.getDesc().getSize(); + } +} + bool XtensaInstrInfo::reverseBranchCondition( SmallVectorImpl &Cond) const { assert(Cond.size() <= 4 && "Invalid branch condition!"); @@ -244,6 +259,74 @@ bool XtensaInstrInfo::reverseBranchCondition( } } +MachineBasicBlock * +XtensaInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { + unsigned OpCode = MI.getOpcode(); + switch (OpCode) { + case Xtensa::BR_JT: + case Xtensa::JX: + return nullptr; + case Xtensa::J: + return MI.getOperand(0).getMBB(); + case Xtensa::BEQ: + case Xtensa::BNE: + case Xtensa::BLT: + case Xtensa::BLTU: + case Xtensa::BGE: + case Xtensa::BGEU: + return MI.getOperand(2).getMBB(); + case Xtensa::BEQI: + case Xtensa::BNEI: + case Xtensa::BLTI: + case Xtensa::BLTUI: + case Xtensa::BGEI: + case Xtensa::BGEUI: + return MI.getOperand(2).getMBB(); + case Xtensa::BEQZ: + case Xtensa::BNEZ: + case Xtensa::BLTZ: + case Xtensa::BGEZ: + return MI.getOperand(1).getMBB(); + default: + llvm_unreachable("Unknown branch opcode"); + } +} + +bool XtensaInstrInfo::isBranchOffsetInRange(unsigned BranchOp, + int64_t BrOffset) const { + switch (BranchOp) { + case Xtensa::J: + BrOffset -= 4; + return isIntN(18, BrOffset); + case Xtensa::JX: + return true; + case Xtensa::BR_JT: + return true; + case Xtensa::BEQ: + case Xtensa::BNE: + case Xtensa::BLT: + case Xtensa::BLTU: + case Xtensa::BGE: + case Xtensa::BGEU: + case Xtensa::BEQI: + case Xtensa::BNEI: + case Xtensa::BLTI: + case Xtensa::BLTUI: + case Xtensa::BGEI: + case Xtensa::BGEUI: + BrOffset -= 4; + return isIntN(8, BrOffset); + case Xtensa::BEQZ: + case Xtensa::BNEZ: + case Xtensa::BLTZ: + case Xtensa::BGEZ: + BrOffset -= 4; + return isIntN(12, BrOffset); + default: + llvm_unreachable("Unknown branch opcode"); + } +} + bool XtensaInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -376,6 +459,130 @@ unsigned XtensaInstrInfo::insertBranch( return Count; } +void XtensaInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &DestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, int64_t BrOffset, + RegScavenger *RS) const { + assert(RS && "RegScavenger required for long branching"); + assert(MBB.empty() && + "new block should be inserted for expanding unconditional branch"); + assert(MBB.pred_size() == 1); + + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineConstantPool *ConstantPool = MF->getConstantPool(); + auto *XtensaFI = MF->getInfo(); + MachineBasicBlock *JumpToMBB = &DestBB; + + if (!isInt<32>(BrOffset)) + report_fatal_error( + "Branch offsets outside of the signed 32-bit range not supported"); + + Register ScratchReg = MRI.createVirtualRegister(&Xtensa::ARRegClass); + auto II = MBB.end(); + + // Create l32r without last operand. We will add this operand later when + // JumpToMMB will be calculated and placed to the ConstantPool. + MachineInstr &L32R = *BuildMI(MBB, II, DL, get(Xtensa::L32R), ScratchReg); + BuildMI(MBB, II, DL, get(Xtensa::JX)).addReg(ScratchReg, RegState::Kill); + + RS->enterBasicBlockEnd(MBB); + Register ScavRegister = + RS->scavengeRegisterBackwards(Xtensa::ARRegClass, L32R.getIterator(), + /*RestoreAfter=*/false, /*SpAdj=*/0, + /*AllowSpill=*/false); + if (ScavRegister != Xtensa::NoRegister) + RS->setRegUsed(ScavRegister); + else { + // The case when there is no scavenged register needs special handling. + // Pick A8 because it doesn't make a difference + ScavRegister = Xtensa::A12; + + int FrameIndex = XtensaFI->getBranchRelaxationScratchFrameIndex(); + if (FrameIndex == -1) + report_fatal_error( + "Unable to properly handle scavenged register for indirect jump, " + "function code size is significantly larger than estimated"); + + storeRegToStackSlot(MBB, L32R, ScavRegister, /*IsKill=*/true, FrameIndex, + &Xtensa::ARRegClass, &RI, Register()); + RI.eliminateFrameIndex(std::prev(L32R.getIterator()), + /*SpAdj=*/0, /*FIOperandNum=*/1); + + loadRegFromStackSlot(RestoreBB, RestoreBB.end(), ScavRegister, FrameIndex, + &Xtensa::ARRegClass, &RI, Register()); + RI.eliminateFrameIndex(RestoreBB.back(), + /*SpAdj=*/0, /*FIOperandNum=*/1); + JumpToMBB = &RestoreBB; + } + + XtensaConstantPoolValue *C = XtensaConstantPoolMBB::Create( + MF->getFunction().getContext(), JumpToMBB, 0); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align(4)); + L32R.addOperand(MachineOperand::CreateCPI(Idx, 0)); + + MRI.replaceRegWith(ScratchReg, ScavRegister); + MRI.clearVirtRegs(); +} + +unsigned XtensaInstrInfo::insertConstBranchAtInst( + MachineBasicBlock &MBB, MachineInstr *I, int64_t offset, + ArrayRef Cond, DebugLoc DL, int *BytesAdded) const { + // Shouldn't be a fall through. + assert(&MBB && "InsertBranch must not be told to insert a fallthrough"); + assert(Cond.size() <= 4 && + "Xtensa branch conditions have less than four components!"); + + if (Cond.empty() || (Cond[0].getImm() == Xtensa::J)) { + // Unconditional branch + MachineInstr *MI = BuildMI(MBB, I, DL, get(Xtensa::J)).addImm(offset); + if (BytesAdded && MI) + *BytesAdded += getInstSizeInBytes(*MI); + return 1; + } + + unsigned Count = 0; + unsigned BR_C = Cond[0].getImm(); + MachineInstr *MI = nullptr; + switch (BR_C) { + case Xtensa::BEQ: + case Xtensa::BNE: + case Xtensa::BLT: + case Xtensa::BLTU: + case Xtensa::BGE: + case Xtensa::BGEU: + MI = BuildMI(MBB, I, DL, get(BR_C)) + .addImm(offset) + .addReg(Cond[1].getReg()) + .addReg(Cond[2].getReg()); + break; + case Xtensa::BEQI: + case Xtensa::BNEI: + case Xtensa::BLTI: + case Xtensa::BLTUI: + case Xtensa::BGEI: + case Xtensa::BGEUI: + MI = BuildMI(MBB, I, DL, get(BR_C)) + .addImm(offset) + .addReg(Cond[1].getReg()) + .addImm(Cond[2].getImm()); + break; + case Xtensa::BEQZ: + case Xtensa::BNEZ: + case Xtensa::BLTZ: + case Xtensa::BGEZ: + MI = BuildMI(MBB, I, DL, get(BR_C)).addImm(offset).addReg(Cond[1].getReg()); + break; + default: + llvm_unreachable("Invalid branch type!"); + } + if (BytesAdded && MI) + *BytesAdded += getInstSizeInBytes(*MI); + ++Count; + return Count; +} + unsigned XtensaInstrInfo::insertBranchAtInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *TBB, diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h index 9f45cf7c29ada7..31da4d481d3097 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h @@ -38,6 +38,8 @@ class XtensaInstrInfo : public XtensaGenInstrInfo { void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; + // Return the XtensaRegisterInfo, which this class owns. const XtensaRegisterInfo &getRegisterInfo() const { return RI; } @@ -77,6 +79,11 @@ class XtensaInstrInfo : public XtensaGenInstrInfo { bool reverseBranchCondition(SmallVectorImpl &Cond) const override; + MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; + + bool isBranchOffsetInRange(unsigned BranchOpc, + int64_t BrOffset) const override; + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, @@ -90,12 +97,22 @@ class XtensaInstrInfo : public XtensaGenInstrInfo { const DebugLoc &DL, int *BytesAdded = nullptr) const override; + void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &DestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset = 0, + RegScavenger *RS = nullptr) const override; + unsigned insertBranchAtInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *TBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const; + unsigned insertConstBranchAtInst(MachineBasicBlock &MBB, MachineInstr *I, + int64_t offset, + ArrayRef Cond, DebugLoc DL, + int *BytesAdded) const; + // Return true if MI is a conditional or unconditional branch. // When returning true, set Cond to the mask of condition-code // values on which the instruction will branch, and set Target diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h new file mode 100644 index 00000000000000..c38c060b9387ff --- /dev/null +++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h @@ -0,0 +1,42 @@ +//==- XtensaMachineFunctionInfo.h - Xtensa machine function info --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares Xtensa-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class XtensaMachineFunctionInfo : public MachineFunctionInfo { + /// FrameIndex of the spill slot for the scratch register in BranchRelaxation. + int BranchRelaxationScratchFrameIndex = -1; + +public: + explicit XtensaMachineFunctionInfo(const Function &F, + const TargetSubtargetInfo *STI) {} + + int getBranchRelaxationScratchFrameIndex() const { + return BranchRelaxationScratchFrameIndex; + } + void setBranchRelaxationScratchFrameIndex(int Index) { + BranchRelaxationScratchFrameIndex = Index; + } +}; + +} // namespace llvm + +#endif /* LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H */ diff --git a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h index 8643ebb1c0f157..ede0eeb90b42de 100644 --- a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h +++ b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h @@ -38,6 +38,10 @@ class XtensaRegisterInfo : public XtensaGenRegisterInfo { return true; } + bool trackLivenessAfterRegAlloc(const MachineFunction &) const override { + return true; + } + const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp index 49c7faf84df1d3..8bbb2156e26904 100644 --- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp +++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp @@ -14,6 +14,7 @@ #include "XtensaTargetMachine.h" #include "TargetInfo/XtensaTargetInfo.h" +#include "XtensaMachineFunctionInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -83,6 +84,13 @@ XtensaTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } +MachineFunctionInfo *XtensaTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return XtensaMachineFunctionInfo::create(Allocator, + F, STI); +} + namespace { /// Xtensa Code Generator Pass Configuration Options. class XtensaPassConfig : public TargetPassConfig { @@ -95,6 +103,7 @@ class XtensaPassConfig : public TargetPassConfig { } bool addInstSelector() override; + void addPreEmitPass() override; }; } // end anonymous namespace @@ -103,6 +112,8 @@ bool XtensaPassConfig::addInstSelector() { return false; } +void XtensaPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } + TargetPassConfig *XtensaTargetMachine::createPassConfig(PassManagerBase &PM) { return new XtensaPassConfig(*this, PM); } diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h index f371f22ed3d0e7..6975076b5d6997 100644 --- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h +++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h @@ -45,6 +45,10 @@ class XtensaTargetMachine : public LLVMTargetMachine { return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + protected: mutable StringMap> SubtargetMap; }; diff --git a/llvm/test/CodeGen/Xtensa/branch-relaxation.ll b/llvm/test/CodeGen/Xtensa/branch-relaxation.ll new file mode 100644 index 00000000000000..15dbd855f71acb --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/branch-relaxation.ll @@ -0,0 +1,246 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +define i32 @jump(i1 %a) { +; XTENSA-LABEL: jump: +; XTENSA: movi a8, 1 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: beqz a8, .LBB0_2 +; XTENSA-NEXT: # %bb.1: # %iftrue +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: .space 1024 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: movi a2, 1 +; XTENSA-NEXT: ret +; XTENSA-NEXT: .LBB0_2: # %jmp +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: movi a2, 1 +; XTENSA-NEXT: ret + br i1 %a, label %iftrue, label %jmp + +jmp: + call void asm sideeffect "", ""() + br label %tail + +iftrue: + call void asm sideeffect "", ""() + br label %space + +space: + call void asm sideeffect ".space 1024", ""() + br label %tail + +tail: + ret i32 1 +} + +define i32 @jx(i1 %a) { +; XTENSA-LABEL: jx: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: .cfi_def_cfa_offset 16 +; XTENSA-NEXT: movi a8, 1 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: bnez a8, .LBB1_1 +; XTENSA-NEXT: # %bb.4: +; XTENSA-NEXT: l32r a8, .LCPI1_0 +; XTENSA-NEXT: jx a8 +; XTENSA-NEXT: .LBB1_1: # %iftrue +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: .space 1048576 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: j .LBB1_3 +; XTENSA-NEXT: .LBB1_2: # %jmp +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: .LBB1_3: # %tail +; XTENSA-NEXT: movi a2, 1 +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + br i1 %a, label %iftrue, label %jmp + +jmp: + call void asm sideeffect "", ""() + br label %tail + +iftrue: + call void asm sideeffect "", ""() + br label %space + +space: + call void asm sideeffect ".space 1048576", ""() + br label %tail + +tail: + ret i32 1 +} + +define void @relax_spill() { +; XTENSA-LABEL: relax_spill: +; XTENSA: addi a8, a1, -32 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: .cfi_def_cfa_offset 32 +; XTENSA-NEXT: s32i a12, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a13, a1, 12 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a14, a1, 8 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 4 # 4-byte Folded Spill +; XTENSA-NEXT: .cfi_offset a12, -4 +; XTENSA-NEXT: .cfi_offset a13, -8 +; XTENSA-NEXT: .cfi_offset a14, -12 +; XTENSA-NEXT: .cfi_offset a15, -16 +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a2, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a3, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a4, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a5, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a6, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a7, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a8, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a9, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a10, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a11, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a12, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a13, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a14, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a15, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: beq a5, a6, .LBB2_1 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: s32i a12, a1, 0 +; XTENSA-NEXT: l32r a12, .LCPI2_0 +; XTENSA-NEXT: jx a12 +; XTENSA-NEXT: .LBB2_1: # %iftrue +; XTENSA-NEXT: #APP +; XTENSA-NEXT: .space 536870912 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: j .LBB2_2 +; XTENSA-NEXT: .LBB2_4: # %iffalse +; XTENSA-NEXT: l32i a12, a1, 0 +; XTENSA-NEXT: .LBB2_2: # %iffalse +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a2 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a3 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a4 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a5 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a6 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a7 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a8 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a9 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a10 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a11 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a12 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a13 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a14 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a15 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: l32i a15, a1, 4 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a14, a1, 8 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a13, a1, 12 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a12, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 32 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + call void asm sideeffect "", ""() + %a2 = call i32 asm sideeffect "addi a2, a3, 1", "={a2}"() + %a3 = call i32 asm sideeffect "addi a3, a3, 1", "={a3}"() + %a4 = call i32 asm sideeffect "addi a4, a3, 1", "={a4}"() + %a5 = call i32 asm sideeffect "addi a5, a3, 1", "={a5}"() + %a6 = call i32 asm sideeffect "addi a6, a3, 1", "={a6}"() + %a7 = call i32 asm sideeffect "addi a7, a3, 1", "={a7}"() + %a8 = call i32 asm sideeffect "addi a8, a3, 1", "={a8}"() + %a9 = call i32 asm sideeffect "addi a9, a3, 1", "={a9}"() + %a10 = call i32 asm sideeffect "addi a10, a3, 1", "={a10}"() + %a11 = call i32 asm sideeffect "addi a11, a3, 1", "={a11}"() + %a12 = call i32 asm sideeffect "addi a12, a3, 1", "={a12}"() + %a13 = call i32 asm sideeffect "addi a13, a3, 1", "={a13}"() + %a14 = call i32 asm sideeffect "addi a14, a3, 1", "={a14}"() + %a15 = call i32 asm sideeffect "addi a15, a3, 1", "={a15}"() + + %cmp = icmp eq i32 %a5, %a6 + br i1 %cmp, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 536870912", ""() + br label %iffalse + +iffalse: + call void asm sideeffect "", ""() + call void asm sideeffect "# reg use $0", "{a2}"(i32 %a2) + call void asm sideeffect "# reg use $0", "{a3}"(i32 %a3) + call void asm sideeffect "# reg use $0", "{a4}"(i32 %a4) + call void asm sideeffect "# reg use $0", "{a5}"(i32 %a5) + call void asm sideeffect "# reg use $0", "{a6}"(i32 %a6) + call void asm sideeffect "# reg use $0", "{a7}"(i32 %a7) + call void asm sideeffect "# reg use $0", "{a8}"(i32 %a8) + call void asm sideeffect "# reg use $0", "{a9}"(i32 %a9) + call void asm sideeffect "# reg use $0", "{a10}"(i32 %a10) + call void asm sideeffect "# reg use $0", "{a11}"(i32 %a11) + call void asm sideeffect "# reg use $0", "{a12}"(i32 %a12) + call void asm sideeffect "# reg use $0", "{a13}"(i32 %a13) + call void asm sideeffect "# reg use $0", "{a14}"(i32 %a14) + call void asm sideeffect "# reg use $0", "{a15}"(i32 %a15) + ret void +} diff --git a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll index bad57d58b28a67..60303235386256 100644 --- a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll @@ -8,8 +8,8 @@ declare i32 @llvm.ctpop.i32(i32) define i32 @test_cttz_i32(i32 %a) nounwind { ; XTENSA-LABEL: test_cttz_i32: -; XTENSA: beqz a2, .LBB0_1 -; XTENSA-NEXT: # %bb.2: # %cond.false +; XTENSA: beqz a2, .LBB0_2 +; XTENSA-NEXT: # %bb.1: # %cond.false ; XTENSA-NEXT: movi a8, -1 ; XTENSA-NEXT: xor a8, a2, a8 ; XTENSA-NEXT: addi a9, a2, -1 @@ -33,7 +33,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: extui a2, a8, 24, 8 ; XTENSA-NEXT: ret -; XTENSA-NEXT: .LBB0_1: +; XTENSA-NEXT: .LBB0_2: ; XTENSA-NEXT: movi a2, 32 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) @@ -71,8 +71,8 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { define i32 @test_ctlz_i32(i32 %a) nounwind { ; XTENSA-LABEL: test_ctlz_i32: -; XTENSA: beqz a2, .LBB2_1 -; XTENSA-NEXT: # %bb.2: # %cond.false +; XTENSA: beqz a2, .LBB2_2 +; XTENSA-NEXT: # %bb.1: # %cond.false ; XTENSA-NEXT: srli a8, a2, 1 ; XTENSA-NEXT: or a8, a2, a8 ; XTENSA-NEXT: srli a9, a8, 2 @@ -104,7 +104,7 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: extui a2, a8, 24, 8 ; XTENSA-NEXT: ret -; XTENSA-NEXT: .LBB2_1: +; XTENSA-NEXT: .LBB2_2: ; XTENSA-NEXT: movi a2, 32 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false) From 49ee6069db372ce326bc36678e745459868c3771 Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Thu, 7 Nov 2024 14:51:12 -0800 Subject: [PATCH 36/40] [BOLT][AArch64] Add support for compact code model (#112110) Add `--compact-code-model` option that executes alternative branch relaxation with an assumption that the resulting binary has less than 128MB of code. The relaxation is done in `relaxLocalBranches()`, which operates on a function level and executes on multiple functions in parallel. Running the new option on AArch64 Clang binary produces slightly smaller code and the relaxation finishes in about 1/10th of the time. Note that the new `.text` has to be smaller than 128MB, *and* `.plt` has to be closer than 128MB to `.text`. --- bolt/include/bolt/Core/BinaryBasicBlock.h | 3 + bolt/include/bolt/Core/FunctionLayout.h | 3 +- bolt/include/bolt/Passes/LongJmp.h | 13 + bolt/lib/Core/FunctionLayout.cpp | 4 +- bolt/lib/Passes/LongJmp.cpp | 289 +++++++++++++++++++++- bolt/test/AArch64/compact-code-model.s | 92 +++++++ 6 files changed, 399 insertions(+), 5 deletions(-) create mode 100644 bolt/test/AArch64/compact-code-model.s diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h index b4f31cf2bae6f6..25cccc4edecf68 100644 --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -819,6 +819,9 @@ class BinaryBasicBlock { return OutputAddressRange; } + uint64_t getOutputStartAddress() const { return OutputAddressRange.first; } + uint64_t getOutputEndAddress() const { return OutputAddressRange.second; } + bool hasLocSyms() const { return LocSyms != nullptr; } /// Return mapping of input offsets to symbols in the output. diff --git a/bolt/include/bolt/Core/FunctionLayout.h b/bolt/include/bolt/Core/FunctionLayout.h index 6a13cbec69fee7..ee4dd689b8dd64 100644 --- a/bolt/include/bolt/Core/FunctionLayout.h +++ b/bolt/include/bolt/Core/FunctionLayout.h @@ -123,7 +123,8 @@ class FunctionFragment { const_iterator begin() const; iterator end(); const_iterator end() const; - const BinaryBasicBlock *front() const; + BinaryBasicBlock *front() const; + BinaryBasicBlock *back() const; friend class FunctionLayout; }; diff --git a/bolt/include/bolt/Passes/LongJmp.h b/bolt/include/bolt/Passes/LongJmp.h index 3d02d75ac4a277..df3ea9620918af 100644 --- a/bolt/include/bolt/Passes/LongJmp.h +++ b/bolt/include/bolt/Passes/LongJmp.h @@ -63,6 +63,19 @@ class LongJmpPass : public BinaryFunctionPass { uint32_t NumColdStubs{0}; uint32_t NumSharedStubs{0}; + /// The shortest distance for any branch instruction on AArch64. + static constexpr size_t ShortestJumpBits = 16; + static constexpr size_t ShortestJumpSpan = 1ULL << (ShortestJumpBits - 1); + + /// The longest single-instruction branch. + static constexpr size_t LongestJumpBits = 28; + static constexpr size_t LongestJumpSpan = 1ULL << (LongestJumpBits - 1); + + /// Relax all internal function branches including those between fragments. + /// Assume that fragments are placed in different sections but are within + /// 128MB of each other. + void relaxLocalBranches(BinaryFunction &BF); + /// -- Layout estimation methods -- /// Try to do layout before running the emitter, by looking at BinaryFunctions /// and MCInsts -- this is an estimation. To be correct for longjmp inserter diff --git a/bolt/lib/Core/FunctionLayout.cpp b/bolt/lib/Core/FunctionLayout.cpp index 15e6127ad2e9e8..4498fc44da9548 100644 --- a/bolt/lib/Core/FunctionLayout.cpp +++ b/bolt/lib/Core/FunctionLayout.cpp @@ -33,7 +33,9 @@ FunctionFragment::const_iterator FunctionFragment::end() const { return const_iterator(Layout->block_begin() + StartIndex + Size); } -const BinaryBasicBlock *FunctionFragment::front() const { return *begin(); } +BinaryBasicBlock *FunctionFragment::front() const { return *begin(); } + +BinaryBasicBlock *FunctionFragment::back() const { return *std::prev(end()); } FunctionLayout::FunctionLayout() { addFragment(); } diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp index 0b2d00300f46b9..6a7cf4c2976009 100644 --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -11,18 +11,26 @@ //===----------------------------------------------------------------------===// #include "bolt/Passes/LongJmp.h" +#include "bolt/Core/ParallelUtilities.h" +#include "llvm/Support/MathExtras.h" #define DEBUG_TYPE "longjmp" using namespace llvm; namespace opts { +extern cl::OptionCategory BoltCategory; extern cl::OptionCategory BoltOptCategory; extern llvm::cl::opt AlignText; extern cl::opt AlignFunctions; extern cl::opt UseOldText; extern cl::opt HotFunctionsAtEnd; +static cl::opt + CompactCodeModel("compact-code-model", + cl::desc("generate code for binaries <128MB on AArch64"), + cl::init(false), cl::cat(BoltCategory)); + static cl::opt GroupStubs("group-stubs", cl::desc("share stubs across functions"), cl::init(true), cl::cat(BoltOptCategory)); @@ -61,10 +69,10 @@ static BinaryBasicBlock *getBBAtHotColdSplitPoint(BinaryFunction &Func) { if (Next != E && (*Next)->isCold()) return *I; } - llvm_unreachable("No hot-colt split point found"); + llvm_unreachable("No hot-cold split point found"); } -static bool shouldInsertStub(const BinaryContext &BC, const MCInst &Inst) { +static bool mayNeedStub(const BinaryContext &BC, const MCInst &Inst) { return (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) && !BC.MIB->isIndirectBranch(Inst) && !BC.MIB->isIndirectCall(Inst); } @@ -570,7 +578,7 @@ Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) { if (BC.MIB->isPseudo(Inst)) continue; - if (!shouldInsertStub(BC, Inst)) { + if (!mayNeedStub(BC, Inst)) { DotAddress += InsnSize; continue; } @@ -634,7 +642,282 @@ Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) { return Error::success(); } +void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) { + BinaryContext &BC = BF.getBinaryContext(); + auto &MIB = BC.MIB; + + // Quick path. + if (!BF.isSplit() && BF.estimateSize() < ShortestJumpSpan) + return; + + auto isBranchOffsetInRange = [&](const MCInst &Inst, int64_t Offset) { + const unsigned Bits = MIB->getPCRelEncodingSize(Inst); + return isIntN(Bits, Offset); + }; + + auto isBlockInRange = [&](const MCInst &Inst, uint64_t InstAddress, + const BinaryBasicBlock &BB) { + const int64_t Offset = BB.getOutputStartAddress() - InstAddress; + return isBranchOffsetInRange(Inst, Offset); + }; + + // Keep track of *all* function trampolines that are going to be added to the + // function layout at the end of relaxation. + std::vector>> + FunctionTrampolines; + + // Function fragments are relaxed independently. + for (FunctionFragment &FF : BF.getLayout().fragments()) { + // Fill out code size estimation for the fragment. Use output BB address + // ranges to store offsets from the start of the function fragment. + uint64_t CodeSize = 0; + for (BinaryBasicBlock *BB : FF) { + BB->setOutputStartAddress(CodeSize); + CodeSize += BB->estimateSize(); + BB->setOutputEndAddress(CodeSize); + } + + // Dynamically-updated size of the fragment. + uint64_t FragmentSize = CodeSize; + + // Size of the trampoline in bytes. + constexpr uint64_t TrampolineSize = 4; + + // Trampolines created for the fragment. DestinationBB -> TrampolineBB. + // NB: here we store only the first trampoline created for DestinationBB. + DenseMap FragmentTrampolines; + + // Create a trampoline code after \p BB or at the end of the fragment if BB + // is nullptr. If \p UpdateOffsets is true, update FragmentSize and offsets + // for basic blocks affected by the insertion of the trampoline. + auto addTrampolineAfter = [&](BinaryBasicBlock *BB, + BinaryBasicBlock *TargetBB, uint64_t Count, + bool UpdateOffsets = true) { + FunctionTrampolines.emplace_back(BB ? BB : FF.back(), + BF.createBasicBlock()); + BinaryBasicBlock *TrampolineBB = FunctionTrampolines.back().second.get(); + + MCInst Inst; + { + auto L = BC.scopeLock(); + MIB->createUncondBranch(Inst, TargetBB->getLabel(), BC.Ctx.get()); + } + TrampolineBB->addInstruction(Inst); + TrampolineBB->addSuccessor(TargetBB, Count); + TrampolineBB->setExecutionCount(Count); + const uint64_t TrampolineAddress = + BB ? BB->getOutputEndAddress() : FragmentSize; + TrampolineBB->setOutputStartAddress(TrampolineAddress); + TrampolineBB->setOutputEndAddress(TrampolineAddress + TrampolineSize); + TrampolineBB->setFragmentNum(FF.getFragmentNum()); + + if (!FragmentTrampolines.lookup(TargetBB)) + FragmentTrampolines[TargetBB] = TrampolineBB; + + if (!UpdateOffsets) + return TrampolineBB; + + FragmentSize += TrampolineSize; + + // If the trampoline was added at the end of the fragment, offsets of + // other fragments should stay intact. + if (!BB) + return TrampolineBB; + + // Update offsets for blocks after BB. + for (BinaryBasicBlock *IBB : FF) { + if (IBB->getOutputStartAddress() >= TrampolineAddress) { + IBB->setOutputStartAddress(IBB->getOutputStartAddress() + + TrampolineSize); + IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize); + } + } + + // Update offsets for trampolines in this fragment that are placed after + // the new trampoline. Note that trampoline blocks are not part of the + // function/fragment layout until we add them right before the return + // from relaxLocalBranches(). + for (auto &Pair : FunctionTrampolines) { + BinaryBasicBlock *IBB = Pair.second.get(); + if (IBB->getFragmentNum() != TrampolineBB->getFragmentNum()) + continue; + if (IBB == TrampolineBB) + continue; + if (IBB->getOutputStartAddress() >= TrampolineAddress) { + IBB->setOutputStartAddress(IBB->getOutputStartAddress() + + TrampolineSize); + IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize); + } + } + + return TrampolineBB; + }; + + // Pre-populate trampolines by splitting unconditional branches from the + // containing basic block. + for (BinaryBasicBlock *BB : FF) { + MCInst *Inst = BB->getLastNonPseudoInstr(); + if (!Inst || !MIB->isUnconditionalBranch(*Inst)) + continue; + + const MCSymbol *TargetSymbol = MIB->getTargetSymbol(*Inst); + BB->eraseInstruction(BB->findInstruction(Inst)); + BB->setOutputEndAddress(BB->getOutputEndAddress() - TrampolineSize); + + BinaryBasicBlock::BinaryBranchInfo BI; + BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol, BI); + + BinaryBasicBlock *TrampolineBB = + addTrampolineAfter(BB, TargetBB, BI.Count, /*UpdateOffsets*/ false); + BB->replaceSuccessor(TargetBB, TrampolineBB, BI.Count); + } + + /// Relax the branch \p Inst in basic block \p BB that targets \p TargetBB. + /// \p InstAddress contains offset of the branch from the start of the + /// containing function fragment. + auto relaxBranch = [&](BinaryBasicBlock *BB, MCInst &Inst, + uint64_t InstAddress, BinaryBasicBlock *TargetBB) { + BinaryFunction *BF = BB->getParent(); + + // Use branch taken count for optimal relaxation. + const uint64_t Count = BB->getBranchInfo(*TargetBB).Count; + assert(Count != BinaryBasicBlock::COUNT_NO_PROFILE && + "Expected valid branch execution count"); + + // Try to reuse an existing trampoline without introducing any new code. + BinaryBasicBlock *TrampolineBB = FragmentTrampolines.lookup(TargetBB); + if (TrampolineBB && isBlockInRange(Inst, InstAddress, *TrampolineBB)) { + BB->replaceSuccessor(TargetBB, TrampolineBB, Count); + TrampolineBB->setExecutionCount(TrampolineBB->getExecutionCount() + + Count); + auto L = BC.scopeLock(); + MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); + return; + } + + // For cold branches, check if we can introduce a trampoline at the end + // of the fragment that is within the branch reach. Note that such + // trampoline may change address later and become unreachable in which + // case we will need further relaxation. + const int64_t OffsetToEnd = FragmentSize - InstAddress; + if (Count == 0 && isBranchOffsetInRange(Inst, OffsetToEnd)) { + TrampolineBB = addTrampolineAfter(nullptr, TargetBB, Count); + BB->replaceSuccessor(TargetBB, TrampolineBB, Count); + auto L = BC.scopeLock(); + MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); + + return; + } + + // Insert a new block after the current one and use it as a trampoline. + TrampolineBB = addTrampolineAfter(BB, TargetBB, Count); + + // If the other successor is a fall-through, invert the condition code. + const BinaryBasicBlock *const NextBB = + BF->getLayout().getBasicBlockAfter(BB, /*IgnoreSplits*/ false); + if (BB->getConditionalSuccessor(false) == NextBB) { + BB->swapConditionalSuccessors(); + auto L = BC.scopeLock(); + MIB->reverseBranchCondition(Inst, NextBB->getLabel(), BC.Ctx.get()); + } else { + auto L = BC.scopeLock(); + MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); + } + BB->replaceSuccessor(TargetBB, TrampolineBB, Count); + }; + + bool MayNeedRelaxation; + uint64_t NumIterations = 0; + do { + MayNeedRelaxation = false; + ++NumIterations; + for (auto BBI = FF.begin(); BBI != FF.end(); ++BBI) { + BinaryBasicBlock *BB = *BBI; + uint64_t NextInstOffset = BB->getOutputStartAddress(); + for (MCInst &Inst : *BB) { + const size_t InstAddress = NextInstOffset; + if (!MIB->isPseudo(Inst)) + NextInstOffset += 4; + + if (!mayNeedStub(BF.getBinaryContext(), Inst)) + continue; + + const size_t BitsAvailable = MIB->getPCRelEncodingSize(Inst); + + // Span of +/-128MB. + if (BitsAvailable == LongestJumpBits) + continue; + + const MCSymbol *TargetSymbol = MIB->getTargetSymbol(Inst); + BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol); + assert(TargetBB && + "Basic block target expected for conditional branch."); + + // Check if the relaxation is needed. + if (TargetBB->getFragmentNum() == FF.getFragmentNum() && + isBlockInRange(Inst, InstAddress, *TargetBB)) + continue; + + relaxBranch(BB, Inst, InstAddress, TargetBB); + + MayNeedRelaxation = true; + } + } + + // We may have added new instructions, but the whole fragment is less than + // the minimum branch span. + if (FragmentSize < ShortestJumpSpan) + MayNeedRelaxation = false; + + } while (MayNeedRelaxation); + + LLVM_DEBUG({ + if (NumIterations > 2) { + dbgs() << "BOLT-DEBUG: relaxed fragment " << FF.getFragmentNum().get() + << " of " << BF << " in " << NumIterations << " iterations\n"; + } + }); + } + + // Add trampoline blocks from all fragments to the layout. + DenseMap>> + Insertions; + for (std::pair> &Pair : + FunctionTrampolines) { + if (!Pair.second) + continue; + Insertions[Pair.first].emplace_back(std::move(Pair.second)); + } + + for (auto &Pair : Insertions) { + BF.insertBasicBlocks(Pair.first, std::move(Pair.second), + /*UpdateLayout*/ true, /*UpdateCFI*/ true, + /*RecomputeLPs*/ false); + } +} + Error LongJmpPass::runOnFunctions(BinaryContext &BC) { + + if (opts::CompactCodeModel) { + BC.outs() + << "BOLT-INFO: relaxing branches for compact code model (<128MB)\n"; + + ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { + relaxLocalBranches(BF); + }; + + ParallelUtilities::PredicateTy SkipPredicate = + [&](const BinaryFunction &BF) { + return !BC.shouldEmit(BF) || !BF.isSimple(); + }; + + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, + SkipPredicate, "RelaxLocalBranches"); + + return Error::success(); + } + BC.outs() << "BOLT-INFO: Starting stub-insertion pass\n"; std::vector Sorted = BC.getSortedFunctions(); bool Modified; diff --git a/bolt/test/AArch64/compact-code-model.s b/bolt/test/AArch64/compact-code-model.s new file mode 100644 index 00000000000000..0805302a885981 --- /dev/null +++ b/bolt/test/AArch64/compact-code-model.s @@ -0,0 +1,92 @@ +## Check that llvm-bolt successfully relaxes branches for compact (<128MB) code +## model. + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static +# RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --split-functions \ +# RUN: --keep-nops --compact-code-model +# RUN: llvm-objdump -d \ +# RUN: --disassemble-symbols=_start,_start.cold.0,foo,foo.cold.0 %t.bolt \ +# RUN: | FileCheck %s +# RUN: llvm-nm -nS %t.bolt | FileCheck %s --check-prefix=CHECK-NM + +## Fragments of _start and foo will be separated by large_function which is over +## 1MB in size - larger than all conditional branches can cover requiring branch +## relaxation. + +# CHECK-NM: _start +# CHECK-NM: foo +# CHECK-NM: 0000000000124f84 T large_function +# CHECK-NM: _start.cold.0 +# CHECK-NM: foo.cold.0 + + .text + .globl _start + .type _start, %function +_start: +# CHECK: <_start>: +# FDATA: 0 [unknown] 0 1 _start 0 0 100 + .cfi_startproc + cmp x0, 1 + b.eq .L0 +# CHECK: b.eq +# CHECK-NEXT: b +# CHECK-NEXT: b + + bl large_function +.L0: + ret x30 + .cfi_endproc +.size _start, .-_start + +## Check that long branch in foo() is reused during relaxation. I.e. we should +## see just one branch to the cold fragment. + + .globl foo + .type foo, %function +foo: +# CHECK: : +# FDATA: 0 [unknown] 0 1 foo 0 0 100 + .cfi_startproc + cmp x0, 0 +.T0: + b.eq .ERROR +# CHECK: b {{.*}} +# CHECK-NOT: b {{.*}} +# FDATA: 1 foo #.T0# 1 foo #.T1# 0 100 +.T1: + bl large_function + cmp x0, 1 +.T2: + b.eq .ERROR +# FDATA: 1 foo #.T2# 1 foo #.T3# 0 100 +.T3: + mov x1, x0 + mov x0, 0 + ret x30 + +# CHECK: : +# CHECK-NEXT: mov x0, #0x1 +# CHECK-NEXT: ret +.ERROR: + mov x0, 1 + ret x30 + .cfi_endproc +.size foo, .-foo + + .globl large_function + .type large_function, %function +large_function: +# FDATA: 0 [unknown] 0 1 large_function 0 0 100 + .cfi_startproc + .rept 300000 + nop + .endr + ret x30 + .cfi_endproc +.size large_function, .-large_function + +## Force relocation mode. + .reloc 0, R_AARCH64_NONE From faefedf7f8d520035a7c699baa12d5bb9bb93f49 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Thu, 7 Nov 2024 23:56:00 +0100 Subject: [PATCH 37/40] [libc][math][c23] Add exp10m1f C23 math function (#87992) Fixes #86503. --- libc/config/linux/x86_64/entrypoints.txt | 1 + libc/docs/math/index.rst | 2 +- libc/newhdrgen/yaml/math.yaml | 6 + libc/spec/stdc.td | 2 +- libc/src/math/CMakeLists.txt | 1 + libc/src/math/exp10m1f.h | 20 ++ libc/src/math/generic/CMakeLists.txt | 24 +- libc/src/math/generic/exp10m1f.cpp | 216 ++++++++++++++++++ libc/src/math/generic/explogxf.h | 8 +- libc/test/UnitTest/FPMatcher.h | 3 +- libc/test/src/math/CMakeLists.txt | 15 ++ libc/test/src/math/exhaustive/CMakeLists.txt | 15 ++ .../src/math/exhaustive/exp10m1f_test.cpp | 33 +++ libc/test/src/math/exp10m1f_test.cpp | 97 ++++++++ libc/test/src/math/smoke/CMakeLists.txt | 11 + libc/test/src/math/smoke/exp10m1f_test.cpp | 59 +++++ 16 files changed, 505 insertions(+), 8 deletions(-) create mode 100644 libc/src/math/exp10m1f.h create mode 100644 libc/src/math/generic/exp10m1f.cpp create mode 100644 libc/test/src/math/exhaustive/exp10m1f_test.cpp create mode 100644 libc/test/src/math/exp10m1f_test.cpp create mode 100644 libc/test/src/math/smoke/exp10m1f_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 9a4a0ff9e75a40..41be79e2f6c80c 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -417,6 +417,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.exp libc.src.math.exp10 libc.src.math.exp10f + libc.src.math.exp10m1f libc.src.math.exp2 libc.src.math.exp2f libc.src.math.exp2m1f diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index a50e054622e1a4..92580cb1592757 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -292,7 +292,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | exp10 | |check| | |check| | | |check| | | 7.12.6.2 | F.10.3.2 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| exp10m1 | | | | |check| | | 7.12.6.3 | F.10.3.3 | +| exp10m1 | |check| | | | |check| | | 7.12.6.3 | F.10.3.3 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | exp2 | |check| | |check| | | |check| | | 7.12.6.4 | F.10.3.4 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/newhdrgen/yaml/math.yaml b/libc/newhdrgen/yaml/math.yaml index 3cc4b599c777bf..d6669f1e8ffcc4 100644 --- a/libc/newhdrgen/yaml/math.yaml +++ b/libc/newhdrgen/yaml/math.yaml @@ -280,6 +280,12 @@ functions: return_type: float arguments: - type: float + - name: exp10m1f + standards: + - stdc + return_type: float + arguments: + - type: float - name: exp10m1f16 standards: - stdc diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index d1ebc6ffb5821e..4fa057da1cf133 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -695,6 +695,7 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"exp10f", RetValSpec, [ArgSpec]>, GuardedFunctionSpec<"exp10f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + FunctionSpec<"exp10m1f", RetValSpec, [ArgSpec]>, GuardedFunctionSpec<"exp10m1f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, FunctionSpec<"remainder", RetValSpec, [ArgSpec, ArgSpec]>, @@ -1737,7 +1738,6 @@ def StdC : StandardSpec<"stdc"> { ] >; - NamedType StructLconv = NamedType<"struct lconv">; PtrType StructLconvPtr = PtrType; diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 80c1867d2116f6..88cef320cee76d 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -131,6 +131,7 @@ add_math_entrypoint_object(exp10) add_math_entrypoint_object(exp10f) add_math_entrypoint_object(exp10f16) +add_math_entrypoint_object(exp10m1f) add_math_entrypoint_object(exp10m1f16) add_math_entrypoint_object(expm1) diff --git a/libc/src/math/exp10m1f.h b/libc/src/math/exp10m1f.h new file mode 100644 index 00000000000000..fcb9f77795da37 --- /dev/null +++ b/libc/src/math/exp10m1f.h @@ -0,0 +1,20 @@ +//===-- Implementation header for exp10m1f ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_EXP10M1F_H +#define LLVM_LIBC_SRC_MATH_EXP10M1F_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +float exp10m1f(float x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_EXP10M1F_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index ca27759d3212f2..93780a79a8e2f4 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -359,7 +359,7 @@ add_header_library( libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.nearest_integer - libc.src.__support.common + libc.src.__support.common ) add_header_library( @@ -1569,6 +1569,7 @@ add_entrypoint_object( .explogxf libc.src.errno.errno libc.src.__support.common + libc.src.__support.FPUtil.except_value_utils libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add @@ -1686,6 +1687,27 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + exp10m1f + SRCS + exp10m1f.cpp + HDRS + ../exp10m1f.h + DEPENDS + .explogxf + libc.src.errno.errno + libc.src.__support.common + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( exp10m1f16 SRCS diff --git a/libc/src/math/generic/exp10m1f.cpp b/libc/src/math/generic/exp10m1f.cpp new file mode 100644 index 00000000000000..c0e302eea7b08a --- /dev/null +++ b/libc/src/math/generic/exp10m1f.cpp @@ -0,0 +1,216 @@ +//===-- Implementation of exp10m1f function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/exp10m1f.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" +#include "src/errno/libc_errno.h" + +#include "explogxf.h" + +namespace LIBC_NAMESPACE_DECL { + +static constexpr size_t N_EXCEPTS_LO = 11; + +static constexpr fputil::ExceptValues EXP10M1F_EXCEPTS_LO = + {{ + // x = 0x1.0fe54ep-11, exp10m1f(x) = 0x1.3937eep-10 (RZ) + {0x3a07'f2a7U, 0x3a9c'9bf7U, 1U, 0U, 1U}, + // x = 0x1.80e6eap-11, exp10m1f(x) = 0x1.bb8272p-10 (RZ) + {0x3a40'7375U, 0x3add'c139U, 1U, 0U, 1U}, + // x = -0x1.2a33bcp-51, exp10m1f(x) = -0x1.57515ep-50 (RZ) + {0xa615'19deU, 0xa6ab'a8afU, 0U, 1U, 0U}, + // x = -0x0p+0, exp10m1f(x) = -0x0p+0 (RZ) + {0x8000'0000U, 0x8000'0000U, 0U, 0U, 0U}, + // x = -0x1.b59e08p-31, exp10m1f(x) = -0x1.f7d356p-30 (RZ) + {0xb05a'cf04U, 0xb0fb'e9abU, 0U, 1U, 1U}, + // x = -0x1.bf342p-12, exp10m1f(x) = -0x1.014e02p-10 (RZ) + {0xb9df'9a10U, 0xba80'a701U, 0U, 1U, 0U}, + // x = -0x1.6207fp-11, exp10m1f(x) = -0x1.9746cap-10 (RZ) + {0xba31'03f8U, 0xbacb'a365U, 0U, 1U, 1U}, + // x = -0x1.bd0c66p-11, exp10m1f(x) = -0x1.ffe168p-10 (RZ) + {0xba5e'8633U, 0xbaff'f0b4U, 0U, 1U, 1U}, + // x = -0x1.ffd84cp-10, exp10m1f(x) = -0x1.25faf2p-8 (RZ) + {0xbaff'ec26U, 0xbb92'fd79U, 0U, 1U, 0U}, + // x = -0x1.a74172p-9, exp10m1f(x) = -0x1.e57be2p-8 (RZ) + {0xbb53'a0b9U, 0xbbf2'bdf1U, 0U, 1U, 1U}, + // x = -0x1.cb694cp-9, exp10m1f(x) = -0x1.0764e4p-7 (RZ) + {0xbb65'b4a6U, 0xbc03'b272U, 0U, 1U, 0U}, + }}; + +static constexpr size_t N_EXCEPTS_HI = 19; + +static constexpr fputil::ExceptValues EXP10M1F_EXCEPTS_HI = + {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.8d31eep-8, exp10m1f(x) = 0x1.cc7e4cp-7 (RZ) + {0x3bc6'98f7U, 0x3c66'3f26U, 1U, 0U, 1U}, + // x = 0x1.915fcep-8, exp10m1f(x) = 0x1.d15f72p-7 (RZ) + {0x3bc8'afe7U, 0x3c68'afb9U, 1U, 0U, 0U}, + // x = 0x1.bcf982p-8, exp10m1f(x) = 0x1.022928p-6 (RZ) + {0x3bde'7cc1U, 0x3c81'1494U, 1U, 0U, 1U}, + // x = 0x1.99ff0ap-7, exp10m1f(x) = 0x1.dee416p-6 (RZ) + {0x3c4c'ff85U, 0x3cef'720bU, 1U, 0U, 0U}, + // x = 0x1.75ea14p-6, exp10m1f(x) = 0x1.b9ff16p-5 (RZ) + {0x3cba'f50aU, 0x3d5c'ff8bU, 1U, 0U, 0U}, + // x = 0x1.f81b64p-6, exp10m1f(x) = 0x1.2cb6bcp-4 (RZ) + {0x3cfc'0db2U, 0x3d96'5b5eU, 1U, 0U, 0U}, + // x = 0x1.fafecp+3, exp10m1f(x) = 0x1.8c880ap+52 (RZ) + {0x417d'7f60U, 0x59c6'4405U, 1U, 0U, 0U}, + // x = -0x1.3bf094p-8, exp10m1f(x) = -0x1.69ba4ap-7 (RZ) + {0xbb9d'f84aU, 0xbc34'dd25U, 0U, 1U, 0U}, + // x = -0x1.4558bcp-8, exp10m1f(x) = -0x1.746fb8p-7 (RZ) + {0xbba2'ac5eU, 0xbc3a'37dcU, 0U, 1U, 1U}, + // x = -0x1.4bb43p-8, exp10m1f(x) = -0x1.7babe4p-7 (RZ) + {0xbba5'da18U, 0xbc3d'd5f2U, 0U, 1U, 1U}, + // x = -0x1.776cc8p-8, exp10m1f(x) = -0x1.ad62c4p-7 (RZ) + {0xbbbb'b664U, 0xbc56'b162U, 0U, 1U, 0U}, + // x = -0x1.f024cp-8, exp10m1f(x) = -0x1.1b20d6p-6 (RZ) + {0xbbf8'1260U, 0xbc8d'906bU, 0U, 1U, 1U}, + // x = -0x1.f510eep-8, exp10m1f(x) = -0x1.1de9aap-6 (RZ) + {0xbbfa'8877U, 0xbc8e'f4d5U, 0U, 1U, 0U}, + // x = -0x1.0b43c4p-7, exp10m1f(x) = -0x1.30d418p-6 (RZ) + {0xbc05'a1e2U, 0xbc98'6a0cU, 0U, 1U, 0U}, + // x = -0x1.245ee4p-7, exp10m1f(x) = -0x1.4d2b86p-6 (RZ) + {0xbc12'2f72U, 0xbca6'95c3U, 0U, 1U, 0U}, + // x = -0x1.f9f2dap-7, exp10m1f(x) = -0x1.1e2186p-5 (RZ) + {0xbc7c'f96dU, 0xbd0f'10c3U, 0U, 1U, 0U}, + // x = -0x1.08e42p-6, exp10m1f(x) = -0x1.2b5c4p-5 (RZ) + {0xbc84'7210U, 0xbd15'ae20U, 0U, 1U, 1U}, + // x = -0x1.0cdc44p-5, exp10m1f(x) = -0x1.2a2152p-4 (RZ) + {0xbd06'6e22U, 0xbd95'10a9U, 0U, 1U, 1U}, + // x = -0x1.ca4322p-5, exp10m1f(x) = -0x1.ef073p-4 (RZ) + {0xbd65'2191U, 0xbdf7'8398U, 0U, 1U, 1U}, + }}; + +LLVM_LIBC_FUNCTION(float, exp10m1f, (float x)) { + using FPBits = fputil::FPBits; + FPBits xbits(x); + + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = x_u & 0x7fff'ffffU; + + // When x >= log10(2^128), or x is nan + if (LIBC_UNLIKELY(xbits.is_pos() && x_u >= 0x421a'209bU)) { + if (xbits.is_finite()) { + int rounding = fputil::quick_get_round(); + if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) + return FPBits::max_normal().get_val(); + + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW); + } + + // x >= log10(2^128) and 10^x - 1 rounds to +inf, or x is +inf or nan + return x + FPBits::inf().get_val(); + } + + // When |x| <= log10(2) * 2^(-6) + if (LIBC_UNLIKELY(x_abs <= 0x3b9a'209bU)) { + if (auto r = EXP10M1F_EXCEPTS_LO.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); + + double dx = x; + double dx_sq = dx * dx; + double c0 = dx * Exp10Base::COEFFS[0]; + double c1 = + fputil::multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]); + double c2 = + fputil::multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]); + // 10^dx - 1 ~ (1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5) - 1 + // = COEFFS[0] * dx + ... + COEFFS[4] * dx^5 + return static_cast(fputil::polyeval(dx_sq, c0, c1, c2)); + } + + // When x <= log10(2^-25), or x is nan + if (LIBC_UNLIKELY(x_u >= 0xc0f0d2f1)) { + // exp10m1(-inf) = -1 + if (xbits.is_inf()) + return -1.0f; + // exp10m1(nan) = nan + if (xbits.is_nan()) + return x; + + int rounding = fputil::quick_get_round(); + if (rounding == FE_UPWARD || rounding == FE_TOWARDZERO || + (rounding == FE_TONEAREST && x_u == 0xc0f0d2f1)) + return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f + + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_UNDERFLOW); + return -1.0f; + } + + // Exact outputs when x = 1, 2, ..., 10. + // Quick check mask: 0x800f'ffffU = ~(bits of 1.0f | ... | bits of 10.0f) + if (LIBC_UNLIKELY((x_u & 0x800f'ffffU) == 0)) { + switch (x_u) { + case 0x3f800000U: // x = 1.0f + return 9.0f; + case 0x40000000U: // x = 2.0f + return 99.0f; + case 0x40400000U: // x = 3.0f + return 999.0f; + case 0x40800000U: // x = 4.0f + return 9'999.0f; + case 0x40a00000U: // x = 5.0f + return 99'999.0f; + case 0x40c00000U: // x = 6.0f + return 999'999.0f; + case 0x40e00000U: // x = 7.0f + return 9'999'999.0f; + case 0x41000000U: { // x = 8.0f + int rounding = fputil::quick_get_round(); + if (rounding == FE_UPWARD || rounding == FE_TONEAREST) + return 100'000'000.0f; + return 99'999'992.0f; + } + case 0x41100000U: { // x = 9.0f + int rounding = fputil::quick_get_round(); + if (rounding == FE_UPWARD || rounding == FE_TONEAREST) + return 1'000'000'000.0f; + return 999'999'936.0f; + } + case 0x41200000U: { // x = 10.0f + int rounding = fputil::quick_get_round(); + if (rounding == FE_UPWARD || rounding == FE_TONEAREST) + return 10'000'000'000.0f; + return 9'999'998'976.0f; + } + } + } + + if (auto r = EXP10M1F_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); + + // Range reduction: 10^x = 2^(mid + hi) * 10^lo + // rr = (2^(mid + hi), lo) + auto rr = exp_b_range_reduc(x); + + // The low part is approximated by a degree-5 minimax polynomial. + // 10^lo ~ 1 + COEFFS[0] * lo + ... + COEFFS[4] * lo^5 + double lo_sq = rr.lo * rr.lo; + double c0 = fputil::multiply_add(rr.lo, Exp10Base::COEFFS[0], 1.0); + double c1 = + fputil::multiply_add(rr.lo, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]); + double c2 = + fputil::multiply_add(rr.lo, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]); + double exp10_lo = fputil::polyeval(lo_sq, c0, c1, c2); + // 10^x - 1 = 2^(mid + hi) * 10^lo - 1 + // ~ mh * exp10_lo - 1 + return static_cast(fputil::multiply_add(exp10_lo, rr.mh, -1.0)); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h index f3f50c21aacce7..651524a165f032 100644 --- a/libc/src/math/generic/explogxf.h +++ b/libc/src/math/generic/explogxf.h @@ -159,12 +159,12 @@ template LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) { int k = static_cast(kd); // hi = floor(kd * 2^(-MID_BITS)) // exp_hi = shift hi to the exponent field of double precision. - int64_t exp_hi = static_cast((k >> Base::MID_BITS)) - << fputil::FPBits::FRACTION_LEN; + uint64_t exp_hi = static_cast(k >> Base::MID_BITS) + << fputil::FPBits::FRACTION_LEN; // mh = 2^hi * 2^mid // mh_bits = bit field of mh - int64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi; - double mh = fputil::FPBits(uint64_t(mh_bits)).get_val(); + uint64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi; + double mh = fputil::FPBits(mh_bits).get_val(); // dx = lo = x - (hi + mid) * log(2) double dx = fputil::multiply_add( kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd)); diff --git a/libc/test/UnitTest/FPMatcher.h b/libc/test/UnitTest/FPMatcher.h index 55fe73cd2f1ac9..9f2bae3279208b 100644 --- a/libc/test/UnitTest/FPMatcher.h +++ b/libc/test/UnitTest/FPMatcher.h @@ -174,7 +174,8 @@ template struct FPTest : public Test { LIBC_NAMESPACE::cpp::numeric_limits::max(); static constexpr T zero = FPBits::zero(Sign::POS).get_val(); static constexpr T neg_zero = FPBits::zero(Sign::NEG).get_val(); - static constexpr T aNaN = FPBits::quiet_nan().get_val(); + static constexpr T aNaN = FPBits::quiet_nan(Sign::POS).get_val(); + static constexpr T neg_aNaN = FPBits::quiet_nan(Sign::NEG).get_val(); static constexpr T sNaN = FPBits::signaling_nan().get_val(); static constexpr T inf = FPBits::inf(Sign::POS).get_val(); static constexpr T neg_inf = FPBits::inf(Sign::NEG).get_val(); diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index b46ef4028915ba..d120f8e2fab219 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1084,6 +1084,21 @@ add_fp_unittest( libc.src.math.exp10m1f16 ) +add_fp_unittest( + exp10m1f_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + exp10m1f_test.cpp + DEPENDS + libc.hdr.math_macros + libc.src.errno.errno + libc.src.math.exp10m1f + libc.src.__support.CPP.array + libc.src.__support.FPUtil.fp_bits +) + add_fp_unittest( copysign_test SUITE diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt index 6c10ea422109e7..423c3b7a8bfd11 100644 --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -201,6 +201,21 @@ add_fp_unittest( -lpthread ) +add_fp_unittest( + exp10m1f_test + NO_RUN_POSTBUILD + NEED_MPFR + SUITE + libc_math_exhaustive_tests + SRCS + exp10m1f_test.cpp + DEPENDS + .exhaustive_test + libc.src.math.exp10m1f + LINK_LIBRARIES + -lpthread +) + add_fp_unittest( expm1f_test NO_RUN_POSTBUILD diff --git a/libc/test/src/math/exhaustive/exp10m1f_test.cpp b/libc/test/src/math/exhaustive/exp10m1f_test.cpp new file mode 100644 index 00000000000000..b9b2290f8b570d --- /dev/null +++ b/libc/test/src/math/exhaustive/exp10m1f_test.cpp @@ -0,0 +1,33 @@ +//===-- Exhaustive test for exp10m1f --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "exhaustive_test.h" +#include "src/math/exp10m1f.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +using LlvmLibcExp10m1fExhaustiveTest = + LlvmLibcUnaryOpExhaustiveMathTest; + +// Range: [0, Inf]; +static constexpr uint32_t POS_START = 0x0000'0000U; +static constexpr uint32_t POS_STOP = 0x7f80'0000U; + +TEST_F(LlvmLibcExp10m1fExhaustiveTest, PostiveRange) { + test_full_range_all_roundings(POS_START, POS_STOP); +} + +// Range: [-Inf, 0]; +static constexpr uint32_t NEG_START = 0x8000'0000U; +static constexpr uint32_t NEG_STOP = 0xff80'0000U; + +TEST_F(LlvmLibcExp10m1fExhaustiveTest, NegativeRange) { + test_full_range_all_roundings(NEG_START, NEG_STOP); +} diff --git a/libc/test/src/math/exp10m1f_test.cpp b/libc/test/src/math/exp10m1f_test.cpp new file mode 100644 index 00000000000000..cc960321175cbf --- /dev/null +++ b/libc/test/src/math/exp10m1f_test.cpp @@ -0,0 +1,97 @@ +//===-- Unittests for exp10m1f --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/math_macros.h" +#include "src/__support/CPP/array.h" +#include "src/errno/libc_errno.h" +#include "src/math/exp10m1f.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +#include + +using LlvmLibcExp10m1fTest = LIBC_NAMESPACE::testing::FPTest; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +TEST_F(LlvmLibcExp10m1fTest, TrickyInputs) { + constexpr LIBC_NAMESPACE::cpp::array INPUTS = { + // EXP10M1F_EXCEPTS_LO + 0x1.0fe54ep-11f, + 0x1.80e6eap-11f, + -0x1.2a33bcp-51f, + -0x0p+0f, + -0x1.b59e08p-31f, + -0x1.bf342p-12f, + -0x1.6207fp-11f, + -0x1.bd0c66p-11f, + -0x1.ffd84cp-10f, + -0x1.a74172p-9f, + -0x1.cb694cp-9f, + // EXP10M1F_EXCEPTS_HI + 0x1.8d31eep-8f, + 0x1.915fcep-8f, + 0x1.bcf982p-8f, + 0x1.99ff0ap-7f, + 0x1.75ea14p-6f, + 0x1.f81b64p-6f, + 0x1.fafecp+3f, + -0x1.3bf094p-8f, + -0x1.4558bcp-8f, + -0x1.4bb43p-8f, + -0x1.776cc8p-8f, + -0x1.f024cp-8f, + -0x1.f510eep-8f, + -0x1.0b43c4p-7f, + -0x1.245ee4p-7f, + -0x1.f9f2dap-7f, + -0x1.08e42p-6f, + -0x1.0cdc44p-5f, + -0x1.ca4322p-5f, + // Exceptional integers. + 8.0f, + 9.0f, + 10.0f, + // Overflow boundaries. + 0x1.344134p+5f, + 0x1.344136p+5f, + 0x1.344138p+5f, + // Underflow boundaries. + -0x1.e1a5e0p+2f, + -0x1.e1a5e2p+2f, + -0x1.e1a5e4p+2f, + }; + + for (float x : INPUTS) { + LIBC_NAMESPACE::libc_errno = 0; + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp10m1, x, + LIBC_NAMESPACE::exp10m1f(x), 0.5); + } +} + +TEST_F(LlvmLibcExp10m1fTest, InFloatRange) { + constexpr uint32_t COUNT = 100'000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { + float x = FPBits(v).get_val(); + if (isnan(x) || isinf(x)) + continue; + LIBC_NAMESPACE::libc_errno = 0; + float result = LIBC_NAMESPACE::exp10m1f(x); + + // If the computation resulted in an error or did not produce valid result + // in the single-precision floating point range, then ignore comparing with + // MPFR result as MPFR can still produce valid results because of its + // wider precision. + if (isnan(result) || isinf(result) || LIBC_NAMESPACE::libc_errno != 0) + continue; + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp10m1, x, + LIBC_NAMESPACE::exp10m1f(x), 0.5); + } +} diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 269e92c5900628..1da6f377a1debb 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -1259,6 +1259,17 @@ add_fp_unittest( libc.src.__support.FPUtil.cast ) +add_fp_unittest( + exp10m1f_test + SUITE + libc-math-smoke-tests + SRCS + exp10m1f_test.cpp + DEPENDS + libc.src.errno.errno + libc.src.math.exp10m1f +) + add_fp_unittest( copysign_test SUITE diff --git a/libc/test/src/math/smoke/exp10m1f_test.cpp b/libc/test/src/math/smoke/exp10m1f_test.cpp new file mode 100644 index 00000000000000..9c65a38425d778 --- /dev/null +++ b/libc/test/src/math/smoke/exp10m1f_test.cpp @@ -0,0 +1,59 @@ +//===-- Unittests for exp10m1f --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/errno/libc_errno.h" +#include "src/math/exp10m1f.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcExp10m1fTest = LIBC_NAMESPACE::testing::FPTest; + +TEST_F(LlvmLibcExp10m1fTest, SpecialNumbers) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_EQ(FPBits(aNaN).uintval(), + FPBits(LIBC_NAMESPACE::exp10m1f(aNaN)).uintval()); + EXPECT_EQ(FPBits(neg_aNaN).uintval(), + FPBits(LIBC_NAMESPACE::exp10m1f(neg_aNaN)).uintval()); + EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::exp10m1f(inf)); + EXPECT_FP_EQ_ALL_ROUNDING(-1.0f, LIBC_NAMESPACE::exp10m1f(neg_inf)); + EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::exp10m1f(zero)); + EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::exp10m1f(neg_zero)); + + EXPECT_FP_EQ_ALL_ROUNDING(9.0f, LIBC_NAMESPACE::exp10m1f(1.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(99.0f, LIBC_NAMESPACE::exp10m1f(2.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(999.0f, LIBC_NAMESPACE::exp10m1f(3.0f)); +} + +TEST_F(LlvmLibcExp10m1fTest, Overflow) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::exp10m1f(0x1.fffffep+127f), + FE_OVERFLOW); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::exp10m1f(0x1.344136p+5), + FE_OVERFLOW); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::exp10m1f(0x1.344138p+5), + FE_OVERFLOW); + EXPECT_MATH_ERRNO(ERANGE); +} + +TEST_F(LlvmLibcExp10m1fTest, Underflow) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_WITH_EXCEPTION(-1.0f, LIBC_NAMESPACE::exp10m1f(-max_normal), + FE_UNDERFLOW); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(-1.0f, LIBC_NAMESPACE::exp10m1f(-0x1.e1a5e4p+2f), + FE_UNDERFLOW); + EXPECT_MATH_ERRNO(ERANGE); +} From accd8f98be29fb086d83cd318eeba8e491fcb799 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 7 Nov 2024 15:09:52 -0800 Subject: [PATCH 38/40] [BOLT] Fix a warning This patch: bolt/lib/Passes/LongJmp.cpp:830:14: error: variable 'NumIterations' set but not used [-Werror,-Wunused-but-set-variable] --- bolt/lib/Passes/LongJmp.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp index 6a7cf4c2976009..c1b8c03324e0e2 100644 --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -877,6 +877,7 @@ void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) { << " of " << BF << " in " << NumIterations << " iterations\n"; } }); + (void)NumIterations; } // Add trampoline blocks from all fragments to the layout. From 1cb119b168a6d24f32b05de7f22c7a06a0f0c407 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 7 Nov 2024 23:10:25 +0000 Subject: [PATCH 39/40] [gn build] Port e189d61924ba --- .../llvm/unittests/Transforms/Instrumentation/BUILD.gn | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn index c9c59acda22aca..bab8616b214a95 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn @@ -10,5 +10,8 @@ unittest("InstrumentationTests") { "//llvm/lib/Testing/Support", "//llvm/lib/Transforms/Instrumentation", ] - sources = [ "PGOInstrumentationTest.cpp" ] + sources = [ + "MemProfUseTest.cpp", + "PGOInstrumentationTest.cpp", + ] } From d4525b016f5a1ab2852acb2108742b2f9d0bd3bd Mon Sep 17 00:00:00 2001 From: Egor Zhdan Date: Thu, 7 Nov 2024 23:25:33 +0000 Subject: [PATCH 40/40] [Clang] Dispatch default overloads of `TemplateArgumentVisitor` to the implementation This fixes an issue where overriding `clang::ConstTemplateArgumentVisitor::VisitTemplateArgument` in an implementation visitor class did not have the desired effect: the overload was not invoked when one of the visitor methods (e.g. `VisitDeclarationArgument`) is not implemented, instead it dispatched to `clang::ConstTemplateArgumentVisitor::VisitTemplateArgument` itself and always returned a default-initialized result. This makes `TemplateArgumentVisitor` and `ConstTemplateArgumentVisitor` follow the implicit convention that is followed elsewhere in Clang AST, in `RecursiveASTVisitor` and `TypeVisitor`. --- clang/include/clang/AST/TemplateArgumentVisitor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/AST/TemplateArgumentVisitor.h b/clang/include/clang/AST/TemplateArgumentVisitor.h index cf0d3220158063..923f045a995703 100644 --- a/clang/include/clang/AST/TemplateArgumentVisitor.h +++ b/clang/include/clang/AST/TemplateArgumentVisitor.h @@ -52,7 +52,8 @@ class Base { #define VISIT_METHOD(CATEGORY) \ RetTy Visit##CATEGORY##TemplateArgument(REF(TemplateArgument) TA, \ ParamTys... P) { \ - return VisitTemplateArgument(TA, std::forward(P)...); \ + return static_cast(this)->VisitTemplateArgument( \ + TA, std::forward(P)...); \ } VISIT_METHOD(Null);