diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index ba0015d3ddacb6..c31454a8affda5 100644 --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/EHUtils.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -128,6 +129,9 @@ static bool isColdBlock(const MachineBasicBlock &MBB, } bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { + // Do not split functions when -basic-block-sections=all is specified. + if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All) + return false; // We target functions with profile data. Static information in the form // of exception handling code may be split to cold if user passes the // mfs-split-ehcode flag. @@ -139,6 +143,14 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { if (!TII.isFunctionSafeToSplit(MF)) return false; + // Do not split functions with BasicBlockSections profiles as they will + // be split by the BasicBlockSections pass. + auto BBSectionsProfile = + getAnalysisIfAvailable(); + if (BBSectionsProfile != nullptr && + BBSectionsProfile->getBBSPR().isFunctionHot(MF.getName())) + return false; + // Renumbering blocks here preserves the order of the blocks as // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort // blocks. Preserving the order of blocks is essential to retaining decisions @@ -201,6 +213,7 @@ void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addUsedIfAvailable(); } char MachineFunctionSplitter::ID = 0; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index a6159a38753cf5..d407e9f0871d4c 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1235,13 +1235,13 @@ void TargetPassConfig::addMachinePasses() { addPass(createMIRAddFSDiscriminatorsPass( sampleprof::FSDiscriminatorPass::PassLast)); - bool NeedsBBSections = - TM->getBBSectionsType() != llvm::BasicBlockSection::None; - // Machine function splitter uses the basic block sections feature. Both - // cannot be enabled at the same time. We do not apply machine function - // splitter if -basic-block-sections is requested. - if (!NeedsBBSections && (TM->Options.EnableMachineFunctionSplitter || - EnableMachineFunctionSplitter)) { + // Machine function splitter uses the basic block sections feature. + // When used along with `-basic-block-sections=`, the basic-block-sections + // feature takes precedence. This means functions eligible for + // basic-block-sections optimizations (`=all`, or `=list=` with function + // included in the list profile) will get that optimization instead. + if (TM->Options.EnableMachineFunctionSplitter || + EnableMachineFunctionSplitter) { const std::string ProfileFile = getFSProfileFile(TM); if (!ProfileFile.empty()) { if (EnableFSDiscriminator) { @@ -1260,7 +1260,8 @@ void TargetPassConfig::addMachinePasses() { } // We run the BasicBlockSections pass if either we need BB sections or BB // address map (or both). - if (NeedsBBSections || TM->Options.BBAddrMap) { + if (TM->getBBSectionsType() != llvm::BasicBlockSection::None || + TM->Options.BBAddrMap) { if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); diff --git a/llvm/test/CodeGen/Generic/machine-function-splitter.ll b/llvm/test/CodeGen/Generic/machine-function-splitter.ll index 2097523a61c5f9..1a8c9ede8f8b73 100644 --- a/llvm/test/CodeGen/Generic/machine-function-splitter.ll +++ b/llvm/test/CodeGen/Generic/machine-function-splitter.ll @@ -2,12 +2,21 @@ ; REQUIRES: x86-registered-target ; COM: Machine function splitting with FDO profiles -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-X86,MFS-NOBBSECTIONS ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86 ; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86 +; COM: Machine function splitting along with -basic-block-sections profile +; RUN: echo 'v1' > %t +; RUN: echo 'ffoo21' >> %t +; RUN: echo 'c0' >> %t +; RUN: echo 'ffoo22' >> %t +; RUN: echo 'c0 1' >> %t +; RUN: echo 'c2' >> %t +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -basic-block-sections=%t -split-machine-functions | FileCheck %s --check-prefixes=MFS-BBSECTIONS + ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64 @@ -610,6 +619,61 @@ cold_asm_target: ret void } +define void @foo21(i1 zeroext %0) { +;; Check that a function with basic-block-sections profile (but no pgo profile) +;; is properly split when the profile is used along with mfs. +; MFS-BBSECTIONS: .section .text.hot.foo21 +; MFS-NOBBSECTIONS-NOT: .section .text.hot.foo21 +; MFS-BBSECTIONS-LABEL: foo21: +; MFS-NOBBSECTIONS-NOT: foo21.cold: +; MFS-BBSECTIONS: .section .text.split.foo21 +; MFS-BBSECTIONS: foo21.cold + %2 = alloca i8, align 1 + %3 = zext i1 %0 to i8 + store i8 %3, ptr %2, align 1 + %4 = load i8, ptr %2, align 1 + %5 = trunc i8 %4 to i1 + br i1 %5, label %6, label %8 + +6: ; preds = %1 + %7 = call i32 @bar() + br label %10 + +8: ; preds = %1 + %9 = call i32 @baz() + br label %10 + +10: ; preds = %8, %6 + ret void +} + +define void @foo22(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 { +;; Check that when a function has both basic-block-section and pgo profiles +;; only the basic-block-section profile is used for splitting. + +;; Check that we create two hot sections with -basic-block-sections. +; MFS-BBSECTIONS: .section .text.hot.foo22 +; MFS-BBSECTIONS-LABEL: foo22: +; MFS-BBSECTIONS: callq bar +; MFS-BBSECTIONS: .section .text.hot.foo22 +; MFS-BBSECTIONS-NEXT: foo22.__part.1: +; MFS-BBSECTIONS: callq baz +; MFS-BBSECTIONS-NOT: .section .text.split.foo22 + br i1 %0, label %2, label %4, !prof !17 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + declare i32 @bar() declare i32 @baz() declare i32 @bam()