diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h index b4f31cf2bae6f6..25cccc4edecf68 100644 --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -819,6 +819,9 @@ class BinaryBasicBlock { return OutputAddressRange; } + uint64_t getOutputStartAddress() const { return OutputAddressRange.first; } + uint64_t getOutputEndAddress() const { return OutputAddressRange.second; } + bool hasLocSyms() const { return LocSyms != nullptr; } /// Return mapping of input offsets to symbols in the output. diff --git a/bolt/include/bolt/Core/FunctionLayout.h b/bolt/include/bolt/Core/FunctionLayout.h index 6a13cbec69fee7..ee4dd689b8dd64 100644 --- a/bolt/include/bolt/Core/FunctionLayout.h +++ b/bolt/include/bolt/Core/FunctionLayout.h @@ -123,7 +123,8 @@ class FunctionFragment { const_iterator begin() const; iterator end(); const_iterator end() const; - const BinaryBasicBlock *front() const; + BinaryBasicBlock *front() const; + BinaryBasicBlock *back() const; friend class FunctionLayout; }; diff --git a/bolt/include/bolt/Passes/LongJmp.h b/bolt/include/bolt/Passes/LongJmp.h index 3d02d75ac4a277..df3ea9620918af 100644 --- a/bolt/include/bolt/Passes/LongJmp.h +++ b/bolt/include/bolt/Passes/LongJmp.h @@ -63,6 +63,19 @@ class LongJmpPass : public BinaryFunctionPass { uint32_t NumColdStubs{0}; uint32_t NumSharedStubs{0}; + /// The shortest distance for any branch instruction on AArch64. + static constexpr size_t ShortestJumpBits = 16; + static constexpr size_t ShortestJumpSpan = 1ULL << (ShortestJumpBits - 1); + + /// The longest single-instruction branch. + static constexpr size_t LongestJumpBits = 28; + static constexpr size_t LongestJumpSpan = 1ULL << (LongestJumpBits - 1); + + /// Relax all internal function branches including those between fragments. + /// Assume that fragments are placed in different sections but are within + /// 128MB of each other. + void relaxLocalBranches(BinaryFunction &BF); + /// -- Layout estimation methods -- /// Try to do layout before running the emitter, by looking at BinaryFunctions /// and MCInsts -- this is an estimation. To be correct for longjmp inserter diff --git a/bolt/lib/Core/FunctionLayout.cpp b/bolt/lib/Core/FunctionLayout.cpp index 15e6127ad2e9e8..4498fc44da9548 100644 --- a/bolt/lib/Core/FunctionLayout.cpp +++ b/bolt/lib/Core/FunctionLayout.cpp @@ -33,7 +33,9 @@ FunctionFragment::const_iterator FunctionFragment::end() const { return const_iterator(Layout->block_begin() + StartIndex + Size); } -const BinaryBasicBlock *FunctionFragment::front() const { return *begin(); } +BinaryBasicBlock *FunctionFragment::front() const { return *begin(); } + +BinaryBasicBlock *FunctionFragment::back() const { return *std::prev(end()); } FunctionLayout::FunctionLayout() { addFragment(); } diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp index 0b2d00300f46b9..c1b8c03324e0e2 100644 --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -11,18 +11,26 @@ //===----------------------------------------------------------------------===// #include "bolt/Passes/LongJmp.h" +#include "bolt/Core/ParallelUtilities.h" +#include "llvm/Support/MathExtras.h" #define DEBUG_TYPE "longjmp" using namespace llvm; namespace opts { +extern cl::OptionCategory BoltCategory; extern cl::OptionCategory BoltOptCategory; extern llvm::cl::opt AlignText; extern cl::opt AlignFunctions; extern cl::opt UseOldText; extern cl::opt HotFunctionsAtEnd; +static cl::opt + CompactCodeModel("compact-code-model", + cl::desc("generate code for binaries <128MB on AArch64"), + cl::init(false), cl::cat(BoltCategory)); + static cl::opt GroupStubs("group-stubs", cl::desc("share stubs across functions"), cl::init(true), cl::cat(BoltOptCategory)); @@ -61,10 +69,10 @@ static BinaryBasicBlock *getBBAtHotColdSplitPoint(BinaryFunction &Func) { if (Next != E && (*Next)->isCold()) return *I; } - llvm_unreachable("No hot-colt split point found"); + llvm_unreachable("No hot-cold split point found"); } -static bool shouldInsertStub(const BinaryContext &BC, const MCInst &Inst) { +static bool mayNeedStub(const BinaryContext &BC, const MCInst &Inst) { return (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) && !BC.MIB->isIndirectBranch(Inst) && !BC.MIB->isIndirectCall(Inst); } @@ -570,7 +578,7 @@ Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) { if (BC.MIB->isPseudo(Inst)) continue; - if (!shouldInsertStub(BC, Inst)) { + if (!mayNeedStub(BC, Inst)) { DotAddress += InsnSize; continue; } @@ -634,7 +642,283 @@ Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) { return Error::success(); } +void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) { + BinaryContext &BC = BF.getBinaryContext(); + auto &MIB = BC.MIB; + + // Quick path. + if (!BF.isSplit() && BF.estimateSize() < ShortestJumpSpan) + return; + + auto isBranchOffsetInRange = [&](const MCInst &Inst, int64_t Offset) { + const unsigned Bits = MIB->getPCRelEncodingSize(Inst); + return isIntN(Bits, Offset); + }; + + auto isBlockInRange = [&](const MCInst &Inst, uint64_t InstAddress, + const BinaryBasicBlock &BB) { + const int64_t Offset = BB.getOutputStartAddress() - InstAddress; + return isBranchOffsetInRange(Inst, Offset); + }; + + // Keep track of *all* function trampolines that are going to be added to the + // function layout at the end of relaxation. + std::vector>> + FunctionTrampolines; + + // Function fragments are relaxed independently. + for (FunctionFragment &FF : BF.getLayout().fragments()) { + // Fill out code size estimation for the fragment. Use output BB address + // ranges to store offsets from the start of the function fragment. + uint64_t CodeSize = 0; + for (BinaryBasicBlock *BB : FF) { + BB->setOutputStartAddress(CodeSize); + CodeSize += BB->estimateSize(); + BB->setOutputEndAddress(CodeSize); + } + + // Dynamically-updated size of the fragment. + uint64_t FragmentSize = CodeSize; + + // Size of the trampoline in bytes. + constexpr uint64_t TrampolineSize = 4; + + // Trampolines created for the fragment. DestinationBB -> TrampolineBB. + // NB: here we store only the first trampoline created for DestinationBB. + DenseMap FragmentTrampolines; + + // Create a trampoline code after \p BB or at the end of the fragment if BB + // is nullptr. If \p UpdateOffsets is true, update FragmentSize and offsets + // for basic blocks affected by the insertion of the trampoline. + auto addTrampolineAfter = [&](BinaryBasicBlock *BB, + BinaryBasicBlock *TargetBB, uint64_t Count, + bool UpdateOffsets = true) { + FunctionTrampolines.emplace_back(BB ? BB : FF.back(), + BF.createBasicBlock()); + BinaryBasicBlock *TrampolineBB = FunctionTrampolines.back().second.get(); + + MCInst Inst; + { + auto L = BC.scopeLock(); + MIB->createUncondBranch(Inst, TargetBB->getLabel(), BC.Ctx.get()); + } + TrampolineBB->addInstruction(Inst); + TrampolineBB->addSuccessor(TargetBB, Count); + TrampolineBB->setExecutionCount(Count); + const uint64_t TrampolineAddress = + BB ? BB->getOutputEndAddress() : FragmentSize; + TrampolineBB->setOutputStartAddress(TrampolineAddress); + TrampolineBB->setOutputEndAddress(TrampolineAddress + TrampolineSize); + TrampolineBB->setFragmentNum(FF.getFragmentNum()); + + if (!FragmentTrampolines.lookup(TargetBB)) + FragmentTrampolines[TargetBB] = TrampolineBB; + + if (!UpdateOffsets) + return TrampolineBB; + + FragmentSize += TrampolineSize; + + // If the trampoline was added at the end of the fragment, offsets of + // other fragments should stay intact. + if (!BB) + return TrampolineBB; + + // Update offsets for blocks after BB. + for (BinaryBasicBlock *IBB : FF) { + if (IBB->getOutputStartAddress() >= TrampolineAddress) { + IBB->setOutputStartAddress(IBB->getOutputStartAddress() + + TrampolineSize); + IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize); + } + } + + // Update offsets for trampolines in this fragment that are placed after + // the new trampoline. Note that trampoline blocks are not part of the + // function/fragment layout until we add them right before the return + // from relaxLocalBranches(). + for (auto &Pair : FunctionTrampolines) { + BinaryBasicBlock *IBB = Pair.second.get(); + if (IBB->getFragmentNum() != TrampolineBB->getFragmentNum()) + continue; + if (IBB == TrampolineBB) + continue; + if (IBB->getOutputStartAddress() >= TrampolineAddress) { + IBB->setOutputStartAddress(IBB->getOutputStartAddress() + + TrampolineSize); + IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize); + } + } + + return TrampolineBB; + }; + + // Pre-populate trampolines by splitting unconditional branches from the + // containing basic block. + for (BinaryBasicBlock *BB : FF) { + MCInst *Inst = BB->getLastNonPseudoInstr(); + if (!Inst || !MIB->isUnconditionalBranch(*Inst)) + continue; + + const MCSymbol *TargetSymbol = MIB->getTargetSymbol(*Inst); + BB->eraseInstruction(BB->findInstruction(Inst)); + BB->setOutputEndAddress(BB->getOutputEndAddress() - TrampolineSize); + + BinaryBasicBlock::BinaryBranchInfo BI; + BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol, BI); + + BinaryBasicBlock *TrampolineBB = + addTrampolineAfter(BB, TargetBB, BI.Count, /*UpdateOffsets*/ false); + BB->replaceSuccessor(TargetBB, TrampolineBB, BI.Count); + } + + /// Relax the branch \p Inst in basic block \p BB that targets \p TargetBB. + /// \p InstAddress contains offset of the branch from the start of the + /// containing function fragment. + auto relaxBranch = [&](BinaryBasicBlock *BB, MCInst &Inst, + uint64_t InstAddress, BinaryBasicBlock *TargetBB) { + BinaryFunction *BF = BB->getParent(); + + // Use branch taken count for optimal relaxation. + const uint64_t Count = BB->getBranchInfo(*TargetBB).Count; + assert(Count != BinaryBasicBlock::COUNT_NO_PROFILE && + "Expected valid branch execution count"); + + // Try to reuse an existing trampoline without introducing any new code. + BinaryBasicBlock *TrampolineBB = FragmentTrampolines.lookup(TargetBB); + if (TrampolineBB && isBlockInRange(Inst, InstAddress, *TrampolineBB)) { + BB->replaceSuccessor(TargetBB, TrampolineBB, Count); + TrampolineBB->setExecutionCount(TrampolineBB->getExecutionCount() + + Count); + auto L = BC.scopeLock(); + MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); + return; + } + + // For cold branches, check if we can introduce a trampoline at the end + // of the fragment that is within the branch reach. Note that such + // trampoline may change address later and become unreachable in which + // case we will need further relaxation. + const int64_t OffsetToEnd = FragmentSize - InstAddress; + if (Count == 0 && isBranchOffsetInRange(Inst, OffsetToEnd)) { + TrampolineBB = addTrampolineAfter(nullptr, TargetBB, Count); + BB->replaceSuccessor(TargetBB, TrampolineBB, Count); + auto L = BC.scopeLock(); + MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); + + return; + } + + // Insert a new block after the current one and use it as a trampoline. + TrampolineBB = addTrampolineAfter(BB, TargetBB, Count); + + // If the other successor is a fall-through, invert the condition code. + const BinaryBasicBlock *const NextBB = + BF->getLayout().getBasicBlockAfter(BB, /*IgnoreSplits*/ false); + if (BB->getConditionalSuccessor(false) == NextBB) { + BB->swapConditionalSuccessors(); + auto L = BC.scopeLock(); + MIB->reverseBranchCondition(Inst, NextBB->getLabel(), BC.Ctx.get()); + } else { + auto L = BC.scopeLock(); + MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); + } + BB->replaceSuccessor(TargetBB, TrampolineBB, Count); + }; + + bool MayNeedRelaxation; + uint64_t NumIterations = 0; + do { + MayNeedRelaxation = false; + ++NumIterations; + for (auto BBI = FF.begin(); BBI != FF.end(); ++BBI) { + BinaryBasicBlock *BB = *BBI; + uint64_t NextInstOffset = BB->getOutputStartAddress(); + for (MCInst &Inst : *BB) { + const size_t InstAddress = NextInstOffset; + if (!MIB->isPseudo(Inst)) + NextInstOffset += 4; + + if (!mayNeedStub(BF.getBinaryContext(), Inst)) + continue; + + const size_t BitsAvailable = MIB->getPCRelEncodingSize(Inst); + + // Span of +/-128MB. + if (BitsAvailable == LongestJumpBits) + continue; + + const MCSymbol *TargetSymbol = MIB->getTargetSymbol(Inst); + BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol); + assert(TargetBB && + "Basic block target expected for conditional branch."); + + // Check if the relaxation is needed. + if (TargetBB->getFragmentNum() == FF.getFragmentNum() && + isBlockInRange(Inst, InstAddress, *TargetBB)) + continue; + + relaxBranch(BB, Inst, InstAddress, TargetBB); + + MayNeedRelaxation = true; + } + } + + // We may have added new instructions, but the whole fragment is less than + // the minimum branch span. + if (FragmentSize < ShortestJumpSpan) + MayNeedRelaxation = false; + + } while (MayNeedRelaxation); + + LLVM_DEBUG({ + if (NumIterations > 2) { + dbgs() << "BOLT-DEBUG: relaxed fragment " << FF.getFragmentNum().get() + << " of " << BF << " in " << NumIterations << " iterations\n"; + } + }); + (void)NumIterations; + } + + // Add trampoline blocks from all fragments to the layout. + DenseMap>> + Insertions; + for (std::pair> &Pair : + FunctionTrampolines) { + if (!Pair.second) + continue; + Insertions[Pair.first].emplace_back(std::move(Pair.second)); + } + + for (auto &Pair : Insertions) { + BF.insertBasicBlocks(Pair.first, std::move(Pair.second), + /*UpdateLayout*/ true, /*UpdateCFI*/ true, + /*RecomputeLPs*/ false); + } +} + Error LongJmpPass::runOnFunctions(BinaryContext &BC) { + + if (opts::CompactCodeModel) { + BC.outs() + << "BOLT-INFO: relaxing branches for compact code model (<128MB)\n"; + + ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { + relaxLocalBranches(BF); + }; + + ParallelUtilities::PredicateTy SkipPredicate = + [&](const BinaryFunction &BF) { + return !BC.shouldEmit(BF) || !BF.isSimple(); + }; + + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, + SkipPredicate, "RelaxLocalBranches"); + + return Error::success(); + } + BC.outs() << "BOLT-INFO: Starting stub-insertion pass\n"; std::vector Sorted = BC.getSortedFunctions(); bool Modified; diff --git a/bolt/test/AArch64/compact-code-model.s b/bolt/test/AArch64/compact-code-model.s new file mode 100644 index 00000000000000..0805302a885981 --- /dev/null +++ b/bolt/test/AArch64/compact-code-model.s @@ -0,0 +1,92 @@ +## Check that llvm-bolt successfully relaxes branches for compact (<128MB) code +## model. + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static +# RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --split-functions \ +# RUN: --keep-nops --compact-code-model +# RUN: llvm-objdump -d \ +# RUN: --disassemble-symbols=_start,_start.cold.0,foo,foo.cold.0 %t.bolt \ +# RUN: | FileCheck %s +# RUN: llvm-nm -nS %t.bolt | FileCheck %s --check-prefix=CHECK-NM + +## Fragments of _start and foo will be separated by large_function which is over +## 1MB in size - larger than all conditional branches can cover requiring branch +## relaxation. + +# CHECK-NM: _start +# CHECK-NM: foo +# CHECK-NM: 0000000000124f84 T large_function +# CHECK-NM: _start.cold.0 +# CHECK-NM: foo.cold.0 + + .text + .globl _start + .type _start, %function +_start: +# CHECK: <_start>: +# FDATA: 0 [unknown] 0 1 _start 0 0 100 + .cfi_startproc + cmp x0, 1 + b.eq .L0 +# CHECK: b.eq +# CHECK-NEXT: b +# CHECK-NEXT: b + + bl large_function +.L0: + ret x30 + .cfi_endproc +.size _start, .-_start + +## Check that long branch in foo() is reused during relaxation. I.e. we should +## see just one branch to the cold fragment. + + .globl foo + .type foo, %function +foo: +# CHECK: : +# FDATA: 0 [unknown] 0 1 foo 0 0 100 + .cfi_startproc + cmp x0, 0 +.T0: + b.eq .ERROR +# CHECK: b {{.*}} +# CHECK-NOT: b {{.*}} +# FDATA: 1 foo #.T0# 1 foo #.T1# 0 100 +.T1: + bl large_function + cmp x0, 1 +.T2: + b.eq .ERROR +# FDATA: 1 foo #.T2# 1 foo #.T3# 0 100 +.T3: + mov x1, x0 + mov x0, 0 + ret x30 + +# CHECK: : +# CHECK-NEXT: mov x0, #0x1 +# CHECK-NEXT: ret +.ERROR: + mov x0, 1 + ret x30 + .cfi_endproc +.size foo, .-foo + + .globl large_function + .type large_function, %function +large_function: +# FDATA: 0 [unknown] 0 1 large_function 0 0 100 + .cfi_startproc + .rept 300000 + nop + .endr + ret x30 + .cfi_endproc +.size large_function, .-large_function + +## Force relocation mode. + .reloc 0, R_AARCH64_NONE diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index f00422cd8b8045..f7285352b9deb9 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -3774,6 +3774,74 @@ type-generic alternative to the ``__builtin_clz{,l,ll}`` (respectively ``__builtin_ctz{,l,ll}``) builtins, with support for other integer types, such as ``unsigned __int128`` and C23 ``unsigned _BitInt(N)``. +``__builtin_counted_by_ref`` +---------------------------- + +``__builtin_counted_by_ref`` returns a pointer to the count field from the +``counted_by`` attribute. + +The argument must be a flexible array member. If the argument isn't a flexible +array member or doesn't have the ``counted_by`` attribute, the builtin returns +``(void *)0``. + +**Syntax**: + +.. code-block:: c + + T *__builtin_counted_by_ref(void *array) + +**Examples**: + +.. code-block:: c + + #define alloc(P, FAM, COUNT) ({ \ + size_t __ignored_assignment; \ + typeof(P) __p = NULL; \ + __p = malloc(MAX(sizeof(*__p), \ + sizeof(*__p) + sizeof(*__p->FAM) * COUNT)); \ + \ + *_Generic( \ + __builtin_counted_by_ref(__p->FAM), \ + void *: &__ignored_assignment, \ + default: __builtin_counted_by_ref(__p->FAM)) = COUNT; \ + \ + __p; \ + }) + +**Description**: + +The ``__builtin_counted_by_ref`` builtin allows the programmer to prevent a +common error associated with the ``counted_by`` attribute. When using the +``counted_by`` attribute, the ``count`` field **must** be set before the +flexible array member can be accessed. Otherwise, the sanitizers may view such +accesses as false positives. For instance, it's not uncommon for programmers to +initialize the flexible array before setting the ``count`` field: + +.. code-block:: c + + struct s { + int dummy; + short count; + long array[] __attribute__((counted_by(count))); + }; + + struct s *ptr = malloc(sizeof(struct s) + sizeof(long) * COUNT); + + for (int i = 0; i < COUNT; ++i) + ptr->array[i] = i; + + ptr->count = COUNT; + +Enforcing the rule that ``ptr->count = COUNT;`` must occur after every +allocation of a struct with a flexible array member with the ``counted_by`` +attribute is prone to failure in large code bases. This builtin mitigates this +for allocators (like in Linux) that are implemented in a way where the counter +assignment can happen automatically. + +**Note:** The value returned by ``__builtin_counted_by_ref`` cannot be assigned +to a variable, have its address taken, or passed into or returned from a +function, because doing so violates bounds safety conventions. + Multiprecision Arithmetic Builtins ---------------------------------- diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index eae9c10058c9a0..6feba3d2172262 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -321,6 +321,29 @@ Non-comprehensive list of changes in this release as well as declarations. - ``__builtin_abs`` function can now be used in constant expressions. +- The new builtin ``__builtin_counted_by_ref`` was added. In contexts where the + programmer needs access to the ``counted_by`` attribute's field, but it's not + available --- e.g. in macros. For instace, it can be used to automatically + set the counter during allocation in the Linux kernel: + + .. code-block:: c + + /* A simplified version of Linux allocation macros */ + #define alloc(PTR, FAM, COUNT) ({ \ + sizeof_t __ignored_assignment; \ + typeof(P) __p; \ + size_t __size = sizeof(*P) + sizeof(*P->FAM) * COUNT; \ + __p = malloc(__size); \ + *_Generic( \ + __builtin_counted_by_ref(__p->FAM), \ + void *: &__ignored_assignment, \ + default: __builtin_counted_by_ref(__p->FAM)) = COUNT; \ + __p; \ + }) + + The flexible array member (FAM) can now be accessed immediately without causing + issues with the sanitizer because the counter is automatically set. + New Compiler Flags ------------------ diff --git a/clang/include/clang/AST/TemplateArgumentVisitor.h b/clang/include/clang/AST/TemplateArgumentVisitor.h index cf0d3220158063..923f045a995703 100644 --- a/clang/include/clang/AST/TemplateArgumentVisitor.h +++ b/clang/include/clang/AST/TemplateArgumentVisitor.h @@ -52,7 +52,8 @@ class Base { #define VISIT_METHOD(CATEGORY) \ RetTy Visit##CATEGORY##TemplateArgument(REF(TemplateArgument) TA, \ ParamTys... P) { \ - return VisitTemplateArgument(TA, std::forward(P)...); \ + return static_cast(this)->VisitTemplateArgument( \ + TA, std::forward(P)...); \ } VISIT_METHOD(Null); diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index 5f024b4b5fd782..11c64547721739 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -67,6 +67,7 @@ class AttributeCommonInfo { IgnoredAttribute, UnknownAttribute, }; + enum class Scope { NONE, CLANG, GNU, MSVC, OMP, HLSL, GSL, RISCV }; private: const IdentifierInfo *AttrName = nullptr; diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index e484c3969fe228..4360e0bf9840f1 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4932,3 +4932,9 @@ def ArithmeticFence : LangBuiltin<"ALL_LANGUAGES"> { let Attributes = [CustomTypeChecking, Constexpr]; let Prototype = "void(...)"; } + +def CountedByRef : Builtin { + let Spellings = ["__builtin_counted_by_ref"]; + let Attributes = [NoThrow, CustomTypeChecking]; + let Prototype = "int(...)"; +} diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c96a3f6d6e157f..6a244c276facd6 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6652,6 +6652,18 @@ def warn_counted_by_attr_elt_type_unknown_size : Warning, InGroup; +// __builtin_counted_by_ref diagnostics: +def err_builtin_counted_by_ref_must_be_flex_array_member : Error< + "'__builtin_counted_by_ref' argument must reference a flexible array member">; +def err_builtin_counted_by_ref_cannot_leak_reference : Error< + "value returned by '__builtin_counted_by_ref' cannot be assigned to a " + "variable, have its address taken, or passed into or returned from a function">; +def err_builtin_counted_by_ref_invalid_lhs_use : Error< + "value returned by '__builtin_counted_by_ref' cannot be used in " + "%select{an array subscript|a binary}0 expression">; +def err_builtin_counted_by_ref_has_side_effects : Error< + "'__builtin_counted_by_ref' argument cannot have side-effects">; + let CategoryName = "ARC Semantic Issue" in { // ARC-mode diagnostics. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index aa2f5ff3ef7207..fad446a05e782f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2510,6 +2510,8 @@ class Sema final : public SemaBase { bool BuiltinNonDeterministicValue(CallExpr *TheCall); + bool BuiltinCountedByRef(CallExpr *TheCall); + // Matrix builtin handling. ExprResult BuiltinMatrixTranspose(CallExpr *TheCall, ExprResult CallResult); ExprResult BuiltinMatrixColumnMajorLoad(CallExpr *TheCall, diff --git a/clang/include/clang/Serialization/ASTRecordWriter.h b/clang/include/clang/Serialization/ASTRecordWriter.h index d6090ba1a6c690..67720a0aebc1ca 100644 --- a/clang/include/clang/Serialization/ASTRecordWriter.h +++ b/clang/include/clang/Serialization/ASTRecordWriter.h @@ -60,8 +60,9 @@ class ASTRecordWriter public: /// Construct a ASTRecordWriter that uses the default encoding scheme. - ASTRecordWriter(ASTWriter &W, ASTWriter::RecordDataImpl &Record) - : DataStreamBasicWriter(W.getASTContext()), Writer(&W), Record(&Record) {} + ASTRecordWriter(ASTContext &Context, ASTWriter &W, + ASTWriter::RecordDataImpl &Record) + : DataStreamBasicWriter(Context), Writer(&W), Record(&Record) {} /// Construct a ASTRecordWriter that uses the same encoding scheme as another /// ASTRecordWriter. @@ -208,7 +209,7 @@ class ASTRecordWriter /// Emit a reference to a type. void AddTypeRef(QualType T) { - return Writer->AddTypeRef(T, *Record); + return Writer->AddTypeRef(getASTContext(), T, *Record); } void writeQualType(QualType T) { AddTypeRef(T); diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index d0e841f367c1e0..dc9fcd3c33726e 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -119,9 +119,6 @@ class ASTWriter : public ASTDeserializationListener, /// The PCM manager which manages memory buffers for pcm files. InMemoryModuleCache &ModuleCache; - /// The ASTContext we're writing. - ASTContext *Context = nullptr; - /// The preprocessor we're writing. Preprocessor *PP = nullptr; @@ -545,7 +542,7 @@ class ASTWriter : public ASTDeserializationListener, unsigned getSubmoduleID(Module *Mod); /// Write the given subexpression to the bitstream. - void WriteSubStmt(Stmt *S); + void WriteSubStmt(ASTContext &Context, Stmt *S); void WriteBlockInfoBlock(); void WriteControlBlock(Preprocessor &PP, StringRef isysroot); @@ -564,25 +561,25 @@ class ASTWriter : public ASTDeserializationListener, void WriteHeaderSearch(const HeaderSearch &HS); void WritePreprocessorDetail(PreprocessingRecord &PPRec, uint64_t MacroOffsetsBase); - void WriteSubmodules(Module *WritingModule); + void WriteSubmodules(Module *WritingModule, ASTContext &Context); void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, bool isModule); unsigned TypeExtQualAbbrev = 0; void WriteTypeAbbrevs(); - void WriteType(QualType T); + void WriteType(ASTContext &Context, QualType T); bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC); - void GenerateNameLookupTable(const DeclContext *DC, + void GenerateNameLookupTable(ASTContext &Context, const DeclContext *DC, llvm::SmallVectorImpl &LookupTable); uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, const DeclContext *DC); uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC); void WriteTypeDeclOffsets(); void WriteFileDeclIDsMap(); - void WriteComments(); + void WriteComments(ASTContext &Context); void WriteSelectors(Sema &SemaRef); void WriteReferencedSelectorsPool(Sema &SemaRef); void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver &IdResolver, @@ -590,8 +587,10 @@ class ASTWriter : public ASTDeserializationListener, void WriteDeclAndTypes(ASTContext &Context); void PrepareWritingSpecialDecls(Sema &SemaRef); void WriteSpecialDeclRecords(Sema &SemaRef); - void WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord); - void WriteDeclContextVisibleUpdate(const DeclContext *DC); + void WriteDeclUpdatesBlocks(ASTContext &Context, + RecordDataImpl &OffsetsRecord); + void WriteDeclContextVisibleUpdate(ASTContext &Context, + const DeclContext *DC); void WriteFPPragmaOptions(const FPOptionsOverride &Opts); void WriteOpenCLExtensions(Sema &SemaRef); void WriteCUDAPragmas(Sema &SemaRef); @@ -653,11 +652,6 @@ class ASTWriter : public ASTDeserializationListener, bool GeneratingReducedBMI = false); ~ASTWriter() override; - ASTContext &getASTContext() const { - assert(Context && "requested AST context when not writing AST"); - return *Context; - } - const LangOptions &getLangOpts() const; /// Get a timestamp for output into the AST file. The actual timestamp @@ -723,10 +717,10 @@ class ASTWriter : public ASTDeserializationListener, uint32_t getMacroDirectivesOffset(const IdentifierInfo *Name); /// Emit a reference to a type. - void AddTypeRef(QualType T, RecordDataImpl &Record); + void AddTypeRef(ASTContext &Context, QualType T, RecordDataImpl &Record); /// Force a type to be emitted and get its ID. - serialization::TypeID GetOrCreateTypeID(QualType T); + serialization::TypeID GetOrCreateTypeID(ASTContext &Context, QualType T); /// Find the first local declaration of a given local redeclarable /// decl. diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 8204e3509dd563..047f354b200745 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3657,6 +3657,10 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { (!hasAttr() && !hasAttr())) return 0; + if (getASTContext().getLangOpts().CPlusPlus && + BuiltinID == Builtin::BI__builtin_counted_by_ref) + return 0; + const ASTContext &Context = getASTContext(); if (!Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) return BuiltinID; diff --git a/clang/lib/Basic/Attributes.cpp b/clang/lib/Basic/Attributes.cpp index 867d241a2cf847..2d18fb3f9d5bb2 100644 --- a/clang/lib/Basic/Attributes.cpp +++ b/clang/lib/Basic/Attributes.cpp @@ -17,6 +17,8 @@ #include "clang/Basic/ParsedAttrInfo.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/StringMap.h" + using namespace clang; static int hasAttributeImpl(AttributeCommonInfo::Syntax Syntax, StringRef Name, @@ -153,12 +155,37 @@ std::string AttributeCommonInfo::getNormalizedFullName() const { normalizeName(getAttrName(), getScopeName(), getSyntax())); } +// Sorted list of attribute scope names +static constexpr std::pair ScopeList[] = + {{"", AttributeCommonInfo::Scope::NONE}, + {"clang", AttributeCommonInfo::Scope::CLANG}, + {"gnu", AttributeCommonInfo::Scope::GNU}, + {"gsl", AttributeCommonInfo::Scope::GSL}, + {"hlsl", AttributeCommonInfo::Scope::HLSL}, + {"msvc", AttributeCommonInfo::Scope::MSVC}, + {"omp", AttributeCommonInfo::Scope::OMP}, + {"riscv", AttributeCommonInfo::Scope::RISCV}}; + +AttributeCommonInfo::Scope +getScopeFromNormalizedScopeName(StringRef ScopeName) { + auto It = std::lower_bound( + std::begin(ScopeList), std::end(ScopeList), ScopeName, + [](const std::pair &Element, + StringRef Value) { return Element.first < Value; }); + assert(It != std::end(ScopeList) && It->first == ScopeName); + + return It->second; +} + unsigned AttributeCommonInfo::calculateAttributeSpellingListIndex() const { // Both variables will be used in tablegen generated // attribute spell list index matching code. auto Syntax = static_cast(getSyntax()); - StringRef Scope = normalizeAttrScopeName(getScopeName(), Syntax); - StringRef Name = normalizeAttrName(getAttrName(), Scope, Syntax); + StringRef ScopeName = normalizeAttrScopeName(getScopeName(), Syntax); + StringRef Name = normalizeAttrName(getAttrName(), ScopeName, Syntax); + + AttributeCommonInfo::Scope ComputedScope = + getScopeFromNormalizedScopeName(ScopeName); #include "clang/Sema/AttrSpellingListIndex.inc" } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1acb0161f0dd25..316d981a9da5f4 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3691,6 +3691,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, /*EmittedE=*/nullptr, IsDynamic)); } + case Builtin::BI__builtin_counted_by_ref: { + // Default to returning '(void *) 0'. + llvm::Value *Result = llvm::ConstantPointerNull::get( + llvm::PointerType::getUnqual(getLLVMContext())); + + const Expr *Arg = E->getArg(0)->IgnoreParenImpCasts(); + + if (auto *UO = dyn_cast(Arg); + UO && UO->getOpcode() == UO_AddrOf) { + Arg = UO->getSubExpr()->IgnoreParenImpCasts(); + + if (auto *ASE = dyn_cast(Arg)) + Arg = ASE->getBase()->IgnoreParenImpCasts(); + } + + if (const MemberExpr *ME = dyn_cast_if_present(Arg)) { + if (auto *CATy = + ME->getMemberDecl()->getType()->getAs(); + CATy && CATy->getKind() == CountAttributedType::CountedBy) { + const auto *FAMDecl = cast(ME->getMemberDecl()); + if (const FieldDecl *CountFD = FAMDecl->findCountedByField()) + Result = GetCountedByFieldExprGEP(Arg, FAMDecl, CountFD); + else + llvm::report_fatal_error("Cannot find the counted_by 'count' field"); + } + } + + return RValue::get(Result); + } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); // FIXME: Technically these constants should of type 'int', yes? diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index f97de88e9efb98..1b730223e4d73c 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1145,15 +1145,7 @@ static bool getGEPIndicesToField(CodeGenFunction &CGF, const RecordDecl *RD, return false; } -/// This method is typically called in contexts where we can't generate -/// side-effects, like in __builtin_dynamic_object_size. When finding -/// expressions, only choose those that have either already been emitted or can -/// be loaded without side-effects. -/// -/// - \p FAMDecl: the \p Decl for the flexible array member. It may not be -/// within the top-level struct. -/// - \p CountDecl: must be within the same non-anonymous struct as \p FAMDecl. -llvm::Value *CodeGenFunction::EmitLoadOfCountedByField( +llvm::Value *CodeGenFunction::GetCountedByFieldExprGEP( const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl) { const RecordDecl *RD = CountDecl->getParent()->getOuterLexicalRecordContext(); @@ -1182,12 +1174,25 @@ llvm::Value *CodeGenFunction::EmitLoadOfCountedByField( return nullptr; Indices.push_back(Builder.getInt32(0)); - Res = Builder.CreateInBoundsGEP( + return Builder.CreateInBoundsGEP( ConvertType(QualType(RD->getTypeForDecl(), 0)), Res, RecIndicesTy(llvm::reverse(Indices)), "..counted_by.gep"); +} - return Builder.CreateAlignedLoad(ConvertType(CountDecl->getType()), Res, - getIntAlign(), "..counted_by.load"); +/// This method is typically called in contexts where we can't generate +/// side-effects, like in __builtin_dynamic_object_size. When finding +/// expressions, only choose those that have either already been emitted or can +/// be loaded without side-effects. +/// +/// - \p FAMDecl: the \p Decl for the flexible array member. It may not be +/// within the top-level struct. +/// - \p CountDecl: must be within the same non-anonymous struct as \p FAMDecl. +llvm::Value *CodeGenFunction::EmitLoadOfCountedByField( + const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl) { + if (llvm::Value *GEP = GetCountedByFieldExprGEP(Base, FAMDecl, CountDecl)) + return Builder.CreateAlignedLoad(ConvertType(CountDecl->getType()), GEP, + getIntAlign(), "..counted_by.load"); + return nullptr; } void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 192f488d97ec27..b026809da0f0f6 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3314,6 +3314,10 @@ class CodeGenFunction : public CodeGenTypeCache { const FieldDecl *FAMDecl, uint64_t &Offset); + llvm::Value *GetCountedByFieldExprGEP(const Expr *Base, + const FieldDecl *FAMDecl, + const FieldDecl *CountDecl); + /// Build an expression accessing the "counted_by" field. llvm::Value *EmitLoadOfCountedByField(const Expr *Base, const FieldDecl *FAMDecl, diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp index c8075cbfe36b35..3f81c3cb0f80e8 100644 --- a/clang/lib/Driver/ToolChains/HIPUtility.cpp +++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp @@ -148,8 +148,8 @@ class HIPUndefinedFatBinSymbols { bool Verbose; std::set FatBinSymbols; std::set GPUBinHandleSymbols; - std::set DefinedFatBinSymbols; - std::set DefinedGPUBinHandleSymbols; + std::set> DefinedFatBinSymbols; + std::set> DefinedGPUBinHandleSymbols; const std::string FatBinPrefix = "__hip_fatbin"; const std::string GPUBinHandlePrefix = "__hip_gpubin_handle"; @@ -260,11 +260,10 @@ class HIPUndefinedFatBinSymbols { // Add undefined symbols if they are not in the defined sets if (isFatBinSymbol && - DefinedFatBinSymbols.find(Name.str()) == DefinedFatBinSymbols.end()) + DefinedFatBinSymbols.find(Name) == DefinedFatBinSymbols.end()) FatBinSymbols.insert(Name.str()); - else if (isGPUBinHandleSymbol && - DefinedGPUBinHandleSymbols.find(Name.str()) == - DefinedGPUBinHandleSymbols.end()) + else if (isGPUBinHandleSymbol && DefinedGPUBinHandleSymbols.find(Name) == + DefinedGPUBinHandleSymbols.end()) GPUBinHandleSymbols.insert(Name.str()); } } diff --git a/clang/lib/InstallAPI/Frontend.cpp b/clang/lib/InstallAPI/Frontend.cpp index 2ebe72bf021cf9..9e8c60fbda3d00 100644 --- a/clang/lib/InstallAPI/Frontend.cpp +++ b/clang/lib/InstallAPI/Frontend.cpp @@ -94,7 +94,7 @@ InstallAPIContext::findAndRecordFile(const FileEntry *FE, // included. This is primarily to resolve headers found // in a different location than what passed directly as input. StringRef IncludeName = PP.getHeaderSearchInfo().getIncludeNameForHeader(FE); - auto BackupIt = KnownIncludes.find(IncludeName.str()); + auto BackupIt = KnownIncludes.find(IncludeName); if (BackupIt != KnownIncludes.end()) { KnownFiles[FE] = BackupIt->second; return BackupIt->second; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index d78968179b1fdc..96008b14225a4c 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2973,6 +2973,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, } break; } + case Builtin::BI__builtin_counted_by_ref: + if (BuiltinCountedByRef(TheCall)) + return ExprError(); + break; } if (getLangOpts().HLSL && HLSL().CheckBuiltinFunctionCall(BuiltinID, TheCall)) @@ -5575,6 +5579,55 @@ bool Sema::BuiltinSetjmp(CallExpr *TheCall) { return false; } +bool Sema::BuiltinCountedByRef(CallExpr *TheCall) { + if (checkArgCount(TheCall, 1)) + return true; + + ExprResult ArgRes = UsualUnaryConversions(TheCall->getArg(0)); + if (ArgRes.isInvalid()) + return true; + + // For simplicity, we support only limited expressions for the argument. + // Specifically a pointer to a flexible array member:'ptr->array'. This + // allows us to reject arguments with complex casting, which really shouldn't + // be a huge problem. + const Expr *Arg = ArgRes.get()->IgnoreParenImpCasts(); + if (!isa(Arg->getType()) && !Arg->getType()->isArrayType()) + return Diag(Arg->getBeginLoc(), + diag::err_builtin_counted_by_ref_must_be_flex_array_member) + << Arg->getSourceRange(); + + if (Arg->HasSideEffects(Context)) + return Diag(Arg->getBeginLoc(), + diag::err_builtin_counted_by_ref_has_side_effects) + << Arg->getSourceRange(); + + if (const auto *ME = dyn_cast(Arg)) { + if (!ME->isFlexibleArrayMemberLike( + Context, getLangOpts().getStrictFlexArraysLevel())) + return Diag(Arg->getBeginLoc(), + diag::err_builtin_counted_by_ref_must_be_flex_array_member) + << Arg->getSourceRange(); + + if (auto *CATy = + ME->getMemberDecl()->getType()->getAs(); + CATy && CATy->getKind() == CountAttributedType::CountedBy) { + const auto *FAMDecl = cast(ME->getMemberDecl()); + if (const FieldDecl *CountFD = FAMDecl->findCountedByField()) { + TheCall->setType(Context.getPointerType(CountFD->getType())); + return false; + } + } + } else { + return Diag(Arg->getBeginLoc(), + diag::err_builtin_counted_by_ref_must_be_flex_array_member) + << Arg->getSourceRange(); + } + + TheCall->setType(Context.getPointerType(Context.VoidTy)); + return false; +} + namespace { class UncoveredArgHandler { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index df8f025030e2b1..68527d9da8c799 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9209,6 +9209,38 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, LHSType = Context.getCanonicalType(LHSType).getUnqualifiedType(); RHSType = Context.getCanonicalType(RHSType).getUnqualifiedType(); + // __builtin_counted_by_ref cannot be assigned to a variable, used in + // function call, or in a return. + auto FindBuiltinCountedByRefExpr = [&](Expr *E) -> CallExpr * { + struct BuiltinCountedByRefVisitor + : public RecursiveASTVisitor { + CallExpr *TheCall = nullptr; + bool VisitCallExpr(CallExpr *CE) { + if (CE->getBuiltinCallee() == Builtin::BI__builtin_counted_by_ref) { + TheCall = CE; + return false; + } + return true; + } + bool VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *UE) { + // A UnaryExprOrTypeTraitExpr---e.g. sizeof, __alignof, etc.---isn't + // the same as a CallExpr, so if we find a __builtin_counted_by_ref() + // call in one, ignore it. + return false; + } + } V; + V.TraverseStmt(E); + return V.TheCall; + }; + static llvm::SmallPtrSet Diagnosed; + if (auto *CE = FindBuiltinCountedByRefExpr(RHS.get()); + CE && !Diagnosed.count(CE)) { + Diagnosed.insert(CE); + Diag(CE->getExprLoc(), + diag::err_builtin_counted_by_ref_cannot_leak_reference) + << CE->getSourceRange(); + } + // Common case: no conversion required. if (LHSType == RHSType) { Kind = CK_NoOp; @@ -13757,6 +13789,43 @@ QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS, ConvTy = CheckAssignmentConstraints(Loc, LHSType, RHSType); } + // __builtin_counted_by_ref can't be used in a binary expression or array + // subscript on the LHS. + int DiagOption = -1; + auto FindInvalidUseOfBoundsSafetyCounter = [&](Expr *E) -> CallExpr * { + struct BuiltinCountedByRefVisitor + : public RecursiveASTVisitor { + CallExpr *CE = nullptr; + bool InvalidUse = false; + int Option = -1; + + bool VisitCallExpr(CallExpr *E) { + if (E->getBuiltinCallee() == Builtin::BI__builtin_counted_by_ref) { + CE = E; + return false; + } + return true; + } + + bool VisitArraySubscriptExpr(ArraySubscriptExpr *E) { + InvalidUse = true; + Option = 0; // report 'array expression' in diagnostic. + return true; + } + bool VisitBinaryOperator(BinaryOperator *E) { + InvalidUse = true; + Option = 1; // report 'binary expression' in diagnostic. + return true; + } + } V; + V.TraverseStmt(E); + DiagOption = V.Option; + return V.InvalidUse ? V.CE : nullptr; + }; + if (auto *CE = FindInvalidUseOfBoundsSafetyCounter(LHSExpr)) + Diag(CE->getExprLoc(), diag::err_builtin_counted_by_ref_invalid_lhs_use) + << DiagOption << CE->getSourceRange(); + if (DiagnoseAssignmentResult(ConvTy, Loc, LHSType, RHSType, RHS.get(), AssignmentAction::Assigning)) return QualType(); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index b95e29cbc02515..016d1d4acad137 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -277,8 +277,8 @@ class ASTTypeWriter { ASTRecordWriter BasicWriter; public: - ASTTypeWriter(ASTWriter &Writer) - : Writer(Writer), BasicWriter(Writer, Record) {} + ASTTypeWriter(ASTContext &Context, ASTWriter &Writer) + : Writer(Writer), BasicWriter(Context, Writer, Record) {} uint64_t write(QualType T) { if (T.hasLocalNonFastQualifiers()) { @@ -2872,7 +2872,7 @@ static unsigned getNumberOfModules(Module *Mod) { return ChildModules + 1; } -void ASTWriter::WriteSubmodules(Module *WritingModule) { +void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext &Context) { // Enter the submodule description block. Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5); @@ -3124,7 +3124,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { // Emit the reachable initializers. // The initializer may only be unreachable in reduced BMI. RecordData Inits; - for (Decl *D : Context->getModuleInitializers(Mod)) + for (Decl *D : Context.getModuleInitializers(Mod)) if (wasDeclEmitted(D)) AddDeclRef(D, Inits); if (!Inits.empty()) @@ -3259,7 +3259,7 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, //===----------------------------------------------------------------------===// /// Write the representation of a type to the AST stream. -void ASTWriter::WriteType(QualType T) { +void ASTWriter::WriteType(ASTContext &Context, QualType T) { TypeIdx &IdxRef = TypeIdxs[T]; if (IdxRef.getValue() == 0) // we haven't seen this type before. IdxRef = TypeIdx(0, NextTypeID++); @@ -3269,7 +3269,8 @@ void ASTWriter::WriteType(QualType T) { assert(Idx.getValue() >= FirstTypeID && "Writing predefined type"); // Emit the type's representation. - uint64_t Offset = ASTTypeWriter(*this).write(T) - DeclTypesBlockStartOffset; + uint64_t Offset = + ASTTypeWriter(Context, *this).write(T) - DeclTypesBlockStartOffset; // Record the offset for this type. uint64_t Index = Idx.getValue() - FirstTypeID; @@ -3393,7 +3394,7 @@ void ASTWriter::WriteFileDeclIDsMap() { Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs)); } -void ASTWriter::WriteComments() { +void ASTWriter::WriteComments(ASTContext &Context) { Stream.EnterSubblock(COMMENTS_BLOCK_ID, 3); auto _ = llvm::make_scope_exit([this] { Stream.ExitBlock(); }); if (!PP->getPreprocessorOpts().WriteCommentListToPCH) @@ -3406,7 +3407,7 @@ void ASTWriter::WriteComments() { return; RecordData Record; - for (const auto &FO : Context->Comments.OrderedComments) { + for (const auto &FO : Context.Comments.OrderedComments) { for (const auto &OC : FO.second) { const RawComment *I = OC.second; Record.clear(); @@ -3656,7 +3657,7 @@ void ASTWriter::WriteReferencedSelectorsPool(Sema &SemaRef) { return; RecordData Record; - ASTRecordWriter Writer(*this, Record); + ASTRecordWriter Writer(SemaRef.Context, *this, Record); // Note: this writes out all references even for a dependent AST. But it is // very tricky to fix, and given that @selector shouldn't really appear in @@ -4137,9 +4138,9 @@ static bool isLookupResultNotInteresting(ASTWriter &Writer, return true; } -void -ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC, - llvm::SmallVectorImpl &LookupTable) { +void ASTWriter::GenerateNameLookupTable( + ASTContext &Context, const DeclContext *ConstDC, + llvm::SmallVectorImpl &LookupTable) { assert(!ConstDC->hasLazyLocalLexicalLookups() && !ConstDC->hasLazyExternalLexicalLookups() && "must call buildLookups first"); @@ -4234,8 +4235,8 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC, // another declaration in the redecl chain. Any non-implicit constructor or // conversion function which doesn't occur in all the lexical contexts // would be an ODR violation. - auto ImplicitCtorName = Context->DeclarationNames.getCXXConstructorName( - Context->getCanonicalType(Context->getRecordType(D))); + auto ImplicitCtorName = Context.DeclarationNames.getCXXConstructorName( + Context.getCanonicalType(Context.getRecordType(D))); if (ConstructorNameSet.erase(ImplicitCtorName)) Names.push_back(ImplicitCtorName); @@ -4415,7 +4416,7 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context, // Create the on-disk hash table in a buffer. SmallString<4096> LookupTable; - GenerateNameLookupTable(DC, LookupTable); + GenerateNameLookupTable(Context, DC, LookupTable); // Write the lookup table RecordData::value_type Record[] = {DECL_CONTEXT_VISIBLE}; @@ -4431,14 +4432,15 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context, /// DeclContext in a dependent AST file. As such, they only exist for the TU /// (in C++), for namespaces, and for classes with forward-declared unscoped /// enumeration members (in C++11). -void ASTWriter::WriteDeclContextVisibleUpdate(const DeclContext *DC) { +void ASTWriter::WriteDeclContextVisibleUpdate(ASTContext &Context, + const DeclContext *DC) { StoredDeclsMap *Map = DC->getLookupPtr(); if (!Map || Map->empty()) return; // Create the on-disk hash table in a buffer. SmallString<4096> LookupTable; - GenerateNameLookupTable(DC, LookupTable); + GenerateNameLookupTable(Context, DC, LookupTable); // If we're updating a namespace, select a key declaration as the key for the // update record; those are the only ones that will be checked on reload. @@ -4753,15 +4755,14 @@ void ASTWriter::AddString(StringRef Str, RecordDataImpl &Record) { } bool ASTWriter::PreparePathForOutput(SmallVectorImpl &Path) { - assert(Context && "should have context when outputting path"); + assert(WritingAST && "can't prepare path for output when not writing AST"); // Leave special file names as they are. StringRef PathStr(Path.data(), Path.size()); if (PathStr == "" || PathStr == "") return false; - bool Changed = - cleanPathForOutput(Context->getSourceManager().getFileManager(), Path); + bool Changed = cleanPathForOutput(PP->getFileManager(), Path); // Remove a prefix to make the path relative, if relevant. const char *PathBegin = Path.data(); @@ -4850,7 +4851,7 @@ ASTWriter::~ASTWriter() = default; const LangOptions &ASTWriter::getLangOpts() const { assert(WritingAST && "can't determine lang opts when not writing AST"); - return Context->getLangOpts(); + return PP->getLangOpts(); } time_t ASTWriter::getTimestampForOutput(const FileEntry *E) const { @@ -4874,11 +4875,9 @@ ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef, StringRef OutputFile, WriteBlockInfoBlock(); - Context = &SemaRef.Context; PP = &SemaRef.PP; this->WritingModule = WritingModule; ASTFileSignature Signature = WriteASTCore(SemaRef, isysroot, WritingModule); - Context = nullptr; PP = nullptr; this->WritingModule = nullptr; this->BaseDirectory.clear(); @@ -5417,14 +5416,14 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, // Form the record of special types. RecordData SpecialTypes; - AddTypeRef(Context.getRawCFConstantStringType(), SpecialTypes); - AddTypeRef(Context.getFILEType(), SpecialTypes); - AddTypeRef(Context.getjmp_bufType(), SpecialTypes); - AddTypeRef(Context.getsigjmp_bufType(), SpecialTypes); - AddTypeRef(Context.ObjCIdRedefinitionType, SpecialTypes); - AddTypeRef(Context.ObjCClassRedefinitionType, SpecialTypes); - AddTypeRef(Context.ObjCSelRedefinitionType, SpecialTypes); - AddTypeRef(Context.getucontext_tType(), SpecialTypes); + AddTypeRef(Context, Context.getRawCFConstantStringType(), SpecialTypes); + AddTypeRef(Context, Context.getFILEType(), SpecialTypes); + AddTypeRef(Context, Context.getjmp_bufType(), SpecialTypes); + AddTypeRef(Context, Context.getsigjmp_bufType(), SpecialTypes); + AddTypeRef(Context, Context.ObjCIdRedefinitionType, SpecialTypes); + AddTypeRef(Context, Context.ObjCClassRedefinitionType, SpecialTypes); + AddTypeRef(Context, Context.ObjCSelRedefinitionType, SpecialTypes); + AddTypeRef(Context, Context.getucontext_tType(), SpecialTypes); PrepareWritingSpecialDecls(SemaRef); @@ -5523,7 +5522,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, WriteFileDeclIDsMap(); WriteSourceManagerBlock(PP.getSourceManager()); - WriteComments(); + WriteComments(Context); WritePreprocessor(PP, isModule); WriteHeaderSearch(PP.getHeaderSearchInfo()); WriteSelectors(SemaRef); @@ -5536,7 +5535,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, // If we're emitting a module, write out the submodule information. if (WritingModule) - WriteSubmodules(WritingModule); + WriteSubmodules(WritingModule, SemaRef.Context); Stream.EmitRecord(SPECIAL_TYPES, SpecialTypes); @@ -5656,12 +5655,12 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { WriteTypeAbbrevs(); WriteDeclAbbrevs(); do { - WriteDeclUpdatesBlocks(DeclUpdatesOffsetsRecord); + WriteDeclUpdatesBlocks(Context, DeclUpdatesOffsetsRecord); while (!DeclTypesToEmit.empty()) { DeclOrType DOT = DeclTypesToEmit.front(); DeclTypesToEmit.pop(); if (DOT.isType()) - WriteType(DOT.getType()); + WriteType(Context, DOT.getType()); else WriteDecl(Context, DOT.getDecl()); } @@ -5757,18 +5756,19 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv)); // And a visible updates block for the translation unit. - WriteDeclContextVisibleUpdate(TU); + WriteDeclContextVisibleUpdate(Context, TU); // If we have any extern "C" names, write out a visible update for them. if (Context.ExternCContext) - WriteDeclContextVisibleUpdate(Context.ExternCContext); + WriteDeclContextVisibleUpdate(Context, Context.ExternCContext); // Write the visible updates to DeclContexts. for (auto *DC : UpdatedDeclContexts) - WriteDeclContextVisibleUpdate(DC); + WriteDeclContextVisibleUpdate(Context, DC); } -void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { +void ASTWriter::WriteDeclUpdatesBlocks(ASTContext &Context, + RecordDataImpl &OffsetsRecord) { if (DeclUpdates.empty()) return; @@ -5781,7 +5781,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { bool HasUpdatedBody = false; bool HasAddedVarDefinition = false; RecordData RecordData; - ASTRecordWriter Record(*this, RecordData); + ASTRecordWriter Record(Context, *this, RecordData); for (auto &Update : DeclUpdate.second) { DeclUpdateKind Kind = (DeclUpdateKind)Update.getKind(); @@ -5827,7 +5827,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { Record.push_back(RD->isParamDestroyedInCallee()); Record.push_back(llvm::to_underlying(RD->getArgPassingRestrictions())); Record.AddCXXDefinitionData(RD); - Record.AddOffset(WriteDeclContextLexicalBlock(*Context, RD)); + Record.AddOffset(WriteDeclContextLexicalBlock(Context, RD)); // This state is sometimes updated by template instantiation, when we // switch from the specialization referring to the template declaration @@ -5880,7 +5880,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { } case UPD_CXX_DEDUCED_RETURN_TYPE: - Record.push_back(GetOrCreateTypeID(Update.getType())); + Record.push_back(GetOrCreateTypeID(Context, Update.getType())); break; case UPD_DECL_MARKED_USED: @@ -6022,8 +6022,7 @@ ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) { unsigned ModuleFileIndex = 0; // See SourceLocationEncoding.h for the encoding details. - if (Context->getSourceManager().isLoadedSourceLocation(Loc) && - Loc.isValid()) { + if (PP->getSourceManager().isLoadedSourceLocation(Loc) && Loc.isValid()) { assert(getChain()); auto SLocMapI = getChain()->GlobalSLocOffsetMap.find( SourceManager::MaxLoadedOffset - Loc.getOffset() - 1); @@ -6184,8 +6183,9 @@ void ASTRecordWriter::AddTypeLoc(TypeLoc TL, LocSeq *OuterSeq) { TLW.Visit(TL); } -void ASTWriter::AddTypeRef(QualType T, RecordDataImpl &Record) { - Record.push_back(GetOrCreateTypeID(T)); +void ASTWriter::AddTypeRef(ASTContext &Context, QualType T, + RecordDataImpl &Record) { + Record.push_back(GetOrCreateTypeID(Context, T)); } template @@ -6213,9 +6213,8 @@ static TypeID MakeTypeID(ASTContext &Context, QualType T, return IdxForType(T).asTypeID(FastQuals); } -TypeID ASTWriter::GetOrCreateTypeID(QualType T) { - assert(Context); - return MakeTypeID(*Context, T, [&](QualType T) -> TypeIdx { +TypeID ASTWriter::GetOrCreateTypeID(ASTContext &Context, QualType T) { + return MakeTypeID(Context, T, [&](QualType T) -> TypeIdx { if (T.isNull()) return TypeIdx(); assert(!T.getLocalFastQualifiers()); @@ -6335,7 +6334,7 @@ void ASTWriter::associateDeclWithFile(const Decl *D, LocalDeclID ID) { if (isa(D)) return; - SourceManager &SM = Context->getSourceManager(); + SourceManager &SM = PP->getSourceManager(); SourceLocation FileLoc = SM.getFileLoc(Loc); assert(SM.isLocalSourceLocation(FileLoc)); FileID FID; @@ -6530,10 +6529,10 @@ void ASTRecordWriter::AddCXXBaseSpecifier(const CXXBaseSpecifier &Base) { : SourceLocation()); } -static uint64_t EmitCXXBaseSpecifiers(ASTWriter &W, +static uint64_t EmitCXXBaseSpecifiers(ASTContext &Context, ASTWriter &W, ArrayRef Bases) { ASTWriter::RecordData Record; - ASTRecordWriter Writer(W, Record); + ASTRecordWriter Writer(Context, W, Record); Writer.push_back(Bases.size()); for (auto &Base : Bases) @@ -6544,14 +6543,14 @@ static uint64_t EmitCXXBaseSpecifiers(ASTWriter &W, // FIXME: Move this out of the main ASTRecordWriter interface. void ASTRecordWriter::AddCXXBaseSpecifiers(ArrayRef Bases) { - AddOffset(EmitCXXBaseSpecifiers(*Writer, Bases)); + AddOffset(EmitCXXBaseSpecifiers(getASTContext(), *Writer, Bases)); } static uint64_t -EmitCXXCtorInitializers(ASTWriter &W, +EmitCXXCtorInitializers(ASTContext &Context, ASTWriter &W, ArrayRef CtorInits) { ASTWriter::RecordData Record; - ASTRecordWriter Writer(W, Record); + ASTRecordWriter Writer(Context, W, Record); Writer.push_back(CtorInits.size()); for (auto *Init : CtorInits) { @@ -6585,7 +6584,7 @@ EmitCXXCtorInitializers(ASTWriter &W, // FIXME: Move this out of the main ASTRecordWriter interface. void ASTRecordWriter::AddCXXCtorInitializers( ArrayRef CtorInits) { - AddOffset(EmitCXXCtorInitializers(*Writer, CtorInits)); + AddOffset(EmitCXXCtorInitializers(getASTContext(), *Writer, CtorInits)); } void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { @@ -6613,18 +6612,17 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { bool ModulesCodegen = !D->isDependentType() && - (Writer->Context->getLangOpts().ModulesDebugInfo || - D->isInNamedModule()); + (Writer->getLangOpts().ModulesDebugInfo || D->isInNamedModule()); Record->push_back(ModulesCodegen); if (ModulesCodegen) Writer->AddDeclRef(D, Writer->ModularCodegenDecls); // IsLambda bit is already saved. - AddUnresolvedSet(Data.Conversions.get(*Writer->Context)); + AddUnresolvedSet(Data.Conversions.get(getASTContext())); Record->push_back(Data.ComputedVisibleConversions); if (Data.ComputedVisibleConversions) - AddUnresolvedSet(Data.VisibleConversions.get(*Writer->Context)); + AddUnresolvedSet(Data.VisibleConversions.get(getASTContext())); // Data.Definition is the owning decl, no need to write it. if (!Data.IsLambda) { diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index b9ce3db41ef916..ad357e30d57529 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -34,7 +34,6 @@ using namespace serialization; namespace clang { class ASTDeclWriter : public DeclVisitor { ASTWriter &Writer; - ASTContext &Context; ASTRecordWriter Record; serialization::DeclCode Code; @@ -45,7 +44,7 @@ namespace clang { public: ASTDeclWriter(ASTWriter &Writer, ASTContext &Context, ASTWriter::RecordDataImpl &Record, bool GeneratingReducedBMI) - : Writer(Writer), Context(Context), Record(Writer, Record), + : Writer(Writer), Record(Context, Writer, Record), Code((serialization::DeclCode)0), AbbrevToUse(0), GeneratingReducedBMI(GeneratingReducedBMI) {} @@ -217,7 +216,7 @@ namespace clang { // If we have any lazy specializations, and the external AST source is // our chained AST reader, we can just write out the DeclIDs. Otherwise, // we need to resolve them to actual declarations. - if (Writer.Chain != Writer.Context->getExternalSource() && + if (Writer.Chain != Record.getASTContext().getExternalSource() && Common->LazySpecializations) { D->LoadLazySpecializations(); assert(!Common->LazySpecializations); @@ -811,8 +810,8 @@ void ASTDeclWriter::VisitObjCMethodDecl(ObjCMethodDecl *D) { Record.push_back(D->isRedeclaration()); Record.push_back(D->hasRedeclaration()); if (D->hasRedeclaration()) { - assert(Context.getObjCMethodRedeclaration(D)); - Record.AddDeclRef(Context.getObjCMethodRedeclaration(D)); + assert(Record.getASTContext().getObjCMethodRedeclaration(D)); + Record.AddDeclRef(Record.getASTContext().getObjCMethodRedeclaration(D)); } // FIXME: stable encoding for @required/@optional @@ -1039,7 +1038,8 @@ void ASTDeclWriter::VisitFieldDecl(FieldDecl *D) { Record.AddStmt(D->getBitWidth()); if (!D->getDeclName() || D->isPlaceholderVar(Writer.getLangOpts())) - Record.AddDeclRef(Context.getInstantiatedFromUnnamedFieldDecl(D)); + Record.AddDeclRef( + Record.getASTContext().getInstantiatedFromUnnamedFieldDecl(D)); if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && @@ -1118,11 +1118,11 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) { // strong definition in the module interface is provided by the // compilation of that unit, not by its users. (Inline variables are still // emitted in module users.) - ModulesCodegen = - (Writer.WritingModule->isInterfaceOrPartition() || - (D->hasAttr() && - Writer.Context->getLangOpts().BuildingPCHWithObjectFile)) && - Writer.Context->GetGVALinkageForVariable(D) >= GVA_StrongExternal; + ModulesCodegen = (Writer.WritingModule->isInterfaceOrPartition() || + (D->hasAttr() && + Writer.getLangOpts().BuildingPCHWithObjectFile)) && + Record.getASTContext().GetGVALinkageForVariable(D) >= + GVA_StrongExternal; } VarDeclBits.addBit(ModulesCodegen); @@ -1163,7 +1163,7 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) { Writer.AddDeclRef(D, Writer.ModularCodegenDecls); if (D->hasAttr()) { - BlockVarCopyInit Init = Writer.Context->getBlockVarCopyInit(D); + BlockVarCopyInit Init = Record.getASTContext().getBlockVarCopyInit(D); Record.AddStmt(Init.getCopyExpr()); if (Init.getCopyExpr()) Record.push_back(Init.canThrow()); @@ -1411,7 +1411,7 @@ void ASTDeclWriter::VisitUsingDecl(UsingDecl *D) { Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName()); Record.AddDeclRef(D->FirstUsingShadow.getPointer()); Record.push_back(D->hasTypename()); - Record.AddDeclRef(Context.getInstantiatedFromUsingDecl(D)); + Record.AddDeclRef(Record.getASTContext().getInstantiatedFromUsingDecl(D)); Code = serialization::DECL_USING; } @@ -1421,7 +1421,7 @@ void ASTDeclWriter::VisitUsingEnumDecl(UsingEnumDecl *D) { Record.AddSourceLocation(D->getEnumLoc()); Record.AddTypeSourceInfo(D->getEnumType()); Record.AddDeclRef(D->FirstUsingShadow.getPointer()); - Record.AddDeclRef(Context.getInstantiatedFromUsingEnumDecl(D)); + Record.AddDeclRef(Record.getASTContext().getInstantiatedFromUsingEnumDecl(D)); Code = serialization::DECL_USING_ENUM; } @@ -1440,7 +1440,8 @@ void ASTDeclWriter::VisitUsingShadowDecl(UsingShadowDecl *D) { Record.AddDeclRef(D->getTargetDecl()); Record.push_back(D->getIdentifierNamespace()); Record.AddDeclRef(D->UsingOrNextShadow); - Record.AddDeclRef(Context.getInstantiatedFromUsingShadowDecl(D)); + Record.AddDeclRef( + Record.getASTContext().getInstantiatedFromUsingShadowDecl(D)); if (D->getDeclContext() == D->getLexicalDeclContext() && D->getFirstDecl() == D->getMostRecentDecl() && !D->hasAttrs() && @@ -1544,7 +1545,7 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { // FIXME: Avoid adding the key function if the class is defined in // module purview since in that case the key function is meaningless. if (D->isCompleteDefinition()) - Record.AddDeclRef(Context.getCurrentKeyFunction(D)); + Record.AddDeclRef(Record.getASTContext().getCurrentKeyFunction(D)); Code = serialization::DECL_CXX_RECORD; } @@ -1735,7 +1736,8 @@ void ASTDeclWriter::VisitClassTemplateDecl(ClassTemplateDecl *D) { // Force emitting the corresponding deduction guide in reduced BMI mode. // Otherwise, the deduction guide may be optimized out incorrectly. if (Writer.isGeneratingReducedBMI()) { - auto Name = Context.DeclarationNames.getCXXDeductionGuideName(D); + auto Name = + Record.getASTContext().DeclarationNames.getCXXDeductionGuideName(D); for (auto *DG : D->getDeclContext()->noload_lookup(Name)) Writer.GetDeclRef(DG->getCanonicalDecl()); } @@ -1791,8 +1793,9 @@ void ASTDeclWriter::VisitClassTemplateSpecializationDecl( // FIXME: Would it be more efficient to add a callback register function // in sema to register the deduction guide? if (Writer.isWritingStdCXXNamedModules()) { - auto Name = Context.DeclarationNames.getCXXDeductionGuideName( - D->getSpecializedTemplate()); + auto Name = + Record.getASTContext().DeclarationNames.getCXXDeductionGuideName( + D->getSpecializedTemplate()); for (auto *DG : D->getDeclContext()->noload_lookup(Name)) Writer.GetDeclRef(DG->getCanonicalDecl()); } @@ -2022,8 +2025,10 @@ void ASTDeclWriter::VisitDeclContext(DeclContext *DC) { // details. Writer.DelayedNamespace.push_back(cast(DC)); } else { - LexicalOffset = Writer.WriteDeclContextLexicalBlock(Context, DC); - VisibleOffset = Writer.WriteDeclContextVisibleBlock(Context, DC); + LexicalOffset = + Writer.WriteDeclContextLexicalBlock(Record.getASTContext(), DC); + VisibleOffset = + Writer.WriteDeclContextVisibleBlock(Record.getASTContext(), DC); } Record.AddOffset(LexicalOffset); @@ -2880,18 +2885,18 @@ void ASTRecordWriter::AddFunctionDefinition(const FunctionDecl *FD) { // strong definition in the module interface is provided by the // compilation of that unit, not by its users. (Inline functions are still // emitted in module users.) - Linkage = Writer->Context->GetGVALinkageForFunction(FD); + Linkage = getASTContext().GetGVALinkageForFunction(FD); ModulesCodegen = *Linkage >= GVA_StrongExternal; } - if (Writer->Context->getLangOpts().ModulesCodegen || + if (Writer->getLangOpts().ModulesCodegen || (FD->hasAttr() && - Writer->Context->getLangOpts().BuildingPCHWithObjectFile)) { + Writer->getLangOpts().BuildingPCHWithObjectFile)) { // Under -fmodules-codegen, codegen is performed for all non-internal, // non-always_inline functions, unless they are available elsewhere. if (!FD->hasAttr()) { if (!Linkage) - Linkage = Writer->Context->GetGVALinkageForFunction(FD); + Linkage = getASTContext().GetGVALinkageForFunction(FD); ModulesCodegen = *Linkage != GVA_Internal && *Linkage != GVA_AvailableExternally; } diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 321e0031661ee2..7f700c2977e09c 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -91,8 +91,9 @@ namespace clang { PakedBitsWriter CurrentPackingBits; public: - ASTStmtWriter(ASTWriter &Writer, ASTWriter::RecordData &Record) - : Writer(Writer), Record(Writer, Record), + ASTStmtWriter(ASTContext &Context, ASTWriter &Writer, + ASTWriter::RecordData &Record) + : Writer(Writer), Record(Context, Writer, Record), Code(serialization::STMT_NULL_PTR), AbbrevToUse(0), CurrentPackingBits(this->Record) {} @@ -2112,7 +2113,7 @@ void ASTStmtWriter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *E) { // propagted. DeclarationName Name = E->getName(); for (auto *Found : - Writer.getASTContext().getTranslationUnitDecl()->lookup(Name)) + Record.getASTContext().getTranslationUnitDecl()->lookup(Name)) if (Found->isFromASTFile()) Writer.GetDeclRef(Found); @@ -2952,9 +2953,9 @@ void ASTWriter::ClearSwitchCaseIDs() { /// Write the given substatement or subexpression to the /// bitstream. -void ASTWriter::WriteSubStmt(Stmt *S) { +void ASTWriter::WriteSubStmt(ASTContext &Context, Stmt *S) { RecordData Record; - ASTStmtWriter Writer(*this, Record); + ASTStmtWriter Writer(Context, *this, Record); ++NumStatements; if (!S) { @@ -3003,7 +3004,7 @@ void ASTRecordWriter::FlushStmts() { assert(Writer->ParentStmts.empty() && "unexpected entries in parent stmt map"); for (unsigned I = 0, N = StmtsToEmit.size(); I != N; ++I) { - Writer->WriteSubStmt(StmtsToEmit[I]); + Writer->WriteSubStmt(getASTContext(), StmtsToEmit[I]); assert(N == StmtsToEmit.size() && "record modified while being written!"); @@ -3024,7 +3025,7 @@ void ASTRecordWriter::FlushSubStmts() { // that a simple stack machine can be used when loading), and don't emit a // STMT_STOP after each one. for (unsigned I = 0, N = StmtsToEmit.size(); I != N; ++I) { - Writer->WriteSubStmt(StmtsToEmit[N - I - 1]); + Writer->WriteSubStmt(getASTContext(), StmtsToEmit[N - I - 1]); assert(N == StmtsToEmit.size() && "record modified while being written!"); } diff --git a/clang/test/AST/ast-print-builtin-counted-by-ref.c b/clang/test/AST/ast-print-builtin-counted-by-ref.c new file mode 100644 index 00000000000000..c0ff7515fc8208 --- /dev/null +++ b/clang/test/AST/ast-print-builtin-counted-by-ref.c @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux -ast-print %s -o - | FileCheck %s + +typedef unsigned long int size_t; + +int global_array[42]; +int global_int; + +struct fam_struct { + int x; + char count; + int array[] __attribute__((counted_by(count))); +}; + +// CHECK-LABEL: void test1(struct fam_struct *ptr, int size) { +// CHECK-NEXT: size_t __ignored_assignment; +// CHECK-NEXT: *_Generic(__builtin_counted_by_ref(ptr->array), void *: &__ignored_assignment, default: __builtin_counted_by_ref(ptr->array)) = 42; +void test1(struct fam_struct *ptr, int size) { + size_t __ignored_assignment; + + *_Generic(__builtin_counted_by_ref(ptr->array), + void *: &__ignored_assignment, + default: __builtin_counted_by_ref(ptr->array)) = 42; // ok +} diff --git a/clang/test/CodeGen/builtin-counted-by-ref.c b/clang/test/CodeGen/builtin-counted-by-ref.c new file mode 100644 index 00000000000000..8ad715879aa767 --- /dev/null +++ b/clang/test/CodeGen/builtin-counted-by-ref.c @@ -0,0 +1,177 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=X86_64 +// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=I386 + +struct a { + char x; + short count; + int array[] __attribute__((counted_by(count))); +}; + +// X86_64-LABEL: define dso_local ptr @test1( +// X86_64-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] { +// X86_64-NEXT: [[ENTRY:.*:]] +// X86_64-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// X86_64-NEXT: [[P:%.*]] = alloca ptr, align 8 +// X86_64-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 +// X86_64-NEXT: [[MUL:%.*]] = mul i64 4, [[CONV]] +// X86_64-NEXT: [[ADD:%.*]] = add i64 4, [[MUL]] +// X86_64-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 noundef [[ADD]]) #[[ATTR2:[0-9]+]] +// X86_64-NEXT: store ptr [[CALL]], ptr [[P]], align 8 +// X86_64-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP1]] to i16 +// X86_64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 8 +// X86_64-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[TMP2]], i32 0, i32 1 +// X86_64-NEXT: store i16 [[CONV1]], ptr [[DOT_COUNTED_BY_GEP]], align 2 +// X86_64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P]], align 8 +// X86_64-NEXT: ret ptr [[TMP3]] +// +// I386-LABEL: define dso_local ptr @test1( +// I386-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] { +// I386-NEXT: [[ENTRY:.*:]] +// I386-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// I386-NEXT: [[P:%.*]] = alloca ptr, align 4 +// I386-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[MUL:%.*]] = mul i32 4, [[TMP0]] +// I386-NEXT: [[ADD:%.*]] = add i32 4, [[MUL]] +// I386-NEXT: [[CALL:%.*]] = call ptr @malloc(i32 noundef [[ADD]]) #[[ATTR2:[0-9]+]] +// I386-NEXT: store ptr [[CALL]], ptr [[P]], align 4 +// I386-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[CONV:%.*]] = trunc i32 [[TMP1]] to i16 +// I386-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 4 +// I386-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[TMP2]], i32 0, i32 1 +// I386-NEXT: store i16 [[CONV]], ptr [[DOT_COUNTED_BY_GEP]], align 2 +// I386-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P]], align 4 +// I386-NEXT: ret ptr [[TMP3]] +// +struct a *test1(int size) { + struct a *p = __builtin_malloc(sizeof(struct a) + sizeof(int) * size); + + *__builtin_counted_by_ref(p->array) = size; + return p; +} + +struct b { + int _filler; + struct { + int __filler; + struct { + int ___filler; + struct { + char count; + }; + }; + }; + struct { + int filler_; + struct { + int filler__; + struct { + long array[] __attribute__((counted_by(count))); + }; + }; + }; +}; + +// X86_64-LABEL: define dso_local ptr @test2( +// X86_64-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0]] { +// X86_64-NEXT: [[ENTRY:.*:]] +// X86_64-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// X86_64-NEXT: [[P:%.*]] = alloca ptr, align 8 +// X86_64-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 +// X86_64-NEXT: [[MUL:%.*]] = mul i64 4, [[CONV]] +// X86_64-NEXT: [[ADD:%.*]] = add i64 4, [[MUL]] +// X86_64-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 noundef [[ADD]]) #[[ATTR2]] +// X86_64-NEXT: store ptr [[CALL]], ptr [[P]], align 8 +// X86_64-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP1]] to i8 +// X86_64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 8 +// X86_64-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP2]], i32 0, i32 1, i32 1, i32 1, i32 0 +// X86_64-NEXT: store i8 [[CONV1]], ptr [[DOT_COUNTED_BY_GEP]], align 1 +// X86_64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P]], align 8 +// X86_64-NEXT: ret ptr [[TMP3]] +// +// I386-LABEL: define dso_local ptr @test2( +// I386-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0]] { +// I386-NEXT: [[ENTRY:.*:]] +// I386-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// I386-NEXT: [[P:%.*]] = alloca ptr, align 4 +// I386-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[MUL:%.*]] = mul i32 4, [[TMP0]] +// I386-NEXT: [[ADD:%.*]] = add i32 4, [[MUL]] +// I386-NEXT: [[CALL:%.*]] = call ptr @malloc(i32 noundef [[ADD]]) #[[ATTR2]] +// I386-NEXT: store ptr [[CALL]], ptr [[P]], align 4 +// I386-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[CONV:%.*]] = trunc i32 [[TMP1]] to i8 +// I386-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 4 +// I386-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP2]], i32 0, i32 1, i32 1, i32 1, i32 0 +// I386-NEXT: store i8 [[CONV]], ptr [[DOT_COUNTED_BY_GEP]], align 1 +// I386-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P]], align 4 +// I386-NEXT: ret ptr [[TMP3]] +// +struct b *test2(int size) { + struct b *p = __builtin_malloc(sizeof(struct a) + sizeof(int) * size); + + *__builtin_counted_by_ref(p->array) = size; + return p; +} + +struct c { + char x; + short count; + int array[]; +}; + +// X86_64-LABEL: define dso_local ptr @test3( +// X86_64-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0]] { +// X86_64-NEXT: [[ENTRY:.*:]] +// X86_64-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// X86_64-NEXT: [[P:%.*]] = alloca ptr, align 8 +// X86_64-NEXT: [[__IGNORED:%.*]] = alloca i64, align 8 +// X86_64-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 +// X86_64-NEXT: [[MUL:%.*]] = mul i64 4, [[CONV]] +// X86_64-NEXT: [[ADD:%.*]] = add i64 4, [[MUL]] +// X86_64-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 noundef [[ADD]]) #[[ATTR2]] +// X86_64-NEXT: store ptr [[CALL]], ptr [[P]], align 8 +// X86_64-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// X86_64-NEXT: [[CONV1:%.*]] = sext i32 [[TMP1]] to i64 +// X86_64-NEXT: store i64 [[CONV1]], ptr [[__IGNORED]], align 8 +// X86_64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 8 +// X86_64-NEXT: ret ptr [[TMP2]] +// +// I386-LABEL: define dso_local ptr @test3( +// I386-SAME: i32 noundef [[SIZE:%.*]]) #[[ATTR0]] { +// I386-NEXT: [[ENTRY:.*:]] +// I386-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +// I386-NEXT: [[P:%.*]] = alloca ptr, align 4 +// I386-NEXT: [[__IGNORED:%.*]] = alloca i32, align 4 +// I386-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: [[MUL:%.*]] = mul i32 4, [[TMP0]] +// I386-NEXT: [[ADD:%.*]] = add i32 4, [[MUL]] +// I386-NEXT: [[CALL:%.*]] = call ptr @malloc(i32 noundef [[ADD]]) #[[ATTR2]] +// I386-NEXT: store ptr [[CALL]], ptr [[P]], align 4 +// I386-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// I386-NEXT: store i32 [[TMP1]], ptr [[__IGNORED]], align 4 +// I386-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P]], align 4 +// I386-NEXT: ret ptr [[TMP2]] +// +struct c *test3(int size) { + struct c *p = __builtin_malloc(sizeof(struct c) + sizeof(int) * size); + unsigned long int __ignored; + + *_Generic( + __builtin_counted_by_ref(p->array), + void *: &__ignored, + default: __builtin_counted_by_ref(p->array)) = size; + + return p; +} diff --git a/clang/test/Sema/builtin-counted-by-ref.c b/clang/test/Sema/builtin-counted-by-ref.c new file mode 100644 index 00000000000000..5a7ecefcb78976 --- /dev/null +++ b/clang/test/Sema/builtin-counted-by-ref.c @@ -0,0 +1,123 @@ +// RUN: %clang_cc1 -std=c99 -fsyntax-only -verify %s + +typedef unsigned long int size_t; + +int global_array[42]; +int global_int; + +struct fam_struct { + int x; + char count; + int array[] __attribute__((counted_by(count))); +}; + +void test1(struct fam_struct *ptr, int size, int idx) { + size_t size_of = sizeof(__builtin_counted_by_ref(ptr->array)); // ok + + *__builtin_counted_by_ref(ptr->array) = size; // ok + + { + size_t __ignored_assignment; + *_Generic(__builtin_counted_by_ref(ptr->array), + void *: &__ignored_assignment, + default: __builtin_counted_by_ref(ptr->array)) = 42; // ok + } +} + +void test2(struct fam_struct *ptr, int idx) { + __builtin_counted_by_ref(); // expected-error {{too few arguments to function call, expected 1, have 0}} + __builtin_counted_by_ref(ptr->array, ptr->x, ptr->count); // expected-error {{too many arguments to function call, expected 1, have 3}} +} + +void test3(struct fam_struct *ptr, int idx) { + __builtin_counted_by_ref(&ptr->array[0]); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(&ptr->array[idx]); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(&ptr->array); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(ptr->x); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(&ptr->x); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(global_array); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(global_int); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + __builtin_counted_by_ref(&global_int); // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} +} + +void test4(struct fam_struct *ptr, int idx) { + __builtin_counted_by_ref(ptr++->array); // expected-error {{'__builtin_counted_by_ref' argument cannot have side-effects}} + __builtin_counted_by_ref(&ptr->array[idx++]); // expected-error {{'__builtin_counted_by_ref' argument cannot have side-effects}} +} + +void foo(char *); + +void *test5(struct fam_struct *ptr, int size, int idx) { + char *ref = __builtin_counted_by_ref(ptr->array); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + + ref = __builtin_counted_by_ref(ptr->array); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + ref = (char *)(int *)(42 + &*__builtin_counted_by_ref(ptr->array)); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + foo(__builtin_counted_by_ref(ptr->array)); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + foo(ref = __builtin_counted_by_ref(ptr->array)); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + + if ((ref = __builtin_counted_by_ref(ptr->array))) // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + ; + + for (char *p = __builtin_counted_by_ref(ptr->array); p && *p; ++p) // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} + ; + + return __builtin_counted_by_ref(ptr->array); // expected-error {{value returned by '__builtin_counted_by_ref' cannot be assigned to a variable, have its address taken, or passed into or returned from a function}} +} + +void test6(struct fam_struct *ptr, int size, int idx) { + *(__builtin_counted_by_ref(ptr->array) + 4) = 37; // expected-error {{value returned by '__builtin_counted_by_ref' cannot be used in a binary expression}} + __builtin_counted_by_ref(ptr->array)[3] = 37; // expected-error {{value returned by '__builtin_counted_by_ref' cannot be used in an array subscript expression}} +} + +struct non_fam_struct { + char x; + long *pointer; + int array[42]; + short count; +}; + +void *test7(struct non_fam_struct *ptr, int size) { + *__builtin_counted_by_ref(ptr->array) = size // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + *__builtin_counted_by_ref(&ptr->array[0]) = size; // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + *__builtin_counted_by_ref(ptr->pointer) = size; // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} + *__builtin_counted_by_ref(&ptr->pointer[0]) = size; // expected-error {{'__builtin_counted_by_ref' argument must reference a flexible array member}} +} + +struct char_count { + char count; + int array[] __attribute__((counted_by(count))); +} *cp; + +struct short_count { + short count; + int array[] __attribute__((counted_by(count))); +} *sp; + +struct int_count { + int count; + int array[] __attribute__((counted_by(count))); +} *ip; + +struct unsigned_count { + unsigned count; + int array[] __attribute__((counted_by(count))); +} *up; + +struct long_count { + long count; + int array[] __attribute__((counted_by(count))); +} *lp; + +struct unsigned_long_count { + unsigned long count; + int array[] __attribute__((counted_by(count))); +} *ulp; + +void test8(void) { + _Static_assert(_Generic(__builtin_counted_by_ref(cp->array), char * : 1, default : 0) == 1, "wrong return type"); + _Static_assert(_Generic(__builtin_counted_by_ref(sp->array), short * : 1, default : 0) == 1, "wrong return type"); + _Static_assert(_Generic(__builtin_counted_by_ref(ip->array), int * : 1, default : 0) == 1, "wrong return type"); + _Static_assert(_Generic(__builtin_counted_by_ref(up->array), unsigned int * : 1, default : 0) == 1, "wrong return type"); + _Static_assert(_Generic(__builtin_counted_by_ref(lp->array), long * : 1, default : 0) == 1, "wrong return type"); + _Static_assert(_Generic(__builtin_counted_by_ref(ulp->array), unsigned long * : 1, default : 0) == 1, "wrong return type"); +} diff --git a/clang/test/Sema/builtin-counted-by-ref.cpp b/clang/test/Sema/builtin-counted-by-ref.cpp new file mode 100644 index 00000000000000..b9ec9c908dcaa6 --- /dev/null +++ b/clang/test/Sema/builtin-counted-by-ref.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -x c++ -fsyntax-only -verify %s + +struct fam_struct { + int x; + char count; + int array[] __attribute__((counted_by(count))); // expected-warning {{'counted_by' attribute ignored}} +}; + diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index 5a80c8c0b7ad36..b450989aeeddca 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSwitch.h" @@ -3843,19 +3844,60 @@ void EmitClangAttrSpellingListIndex(const RecordKeeper &Records, const Record &R = *I.second; std::vector Spellings = GetFlattenedSpellings(R); OS << " case AT_" << I.first << ": {\n"; - for (unsigned I = 0; I < Spellings.size(); ++ I) { - OS << " if (Name == \"" << Spellings[I].name() << "\" && " - << "getSyntax() == AttributeCommonInfo::AS_" << Spellings[I].variety() - << " && Scope == \"" << Spellings[I].nameSpace() << "\")\n" - << " return " << I << ";\n"; + + // If there are none or one spelling to check, resort to the default + // behavior of returning index as 0. + if (Spellings.size() <= 1) { + OS << " return 0;\n" + << " break;\n" + << " }\n"; + continue; } - OS << " break;\n"; - OS << " }\n"; + std::vector Names; + llvm::transform(Spellings, std::back_inserter(Names), + [](const FlattenedSpelling &FS) { return FS.name(); }); + llvm::sort(Names); + Names.erase(llvm::unique(Names), Names.end()); + + for (const auto &[Idx, FS] : enumerate(Spellings)) { + OS << " if ("; + if (Names.size() > 1) { + SmallVector SameLenNames; + StringRef FSName = FS.name(); + llvm::copy_if( + Names, std::back_inserter(SameLenNames), + [&](StringRef N) { return N.size() == FSName.size(); }); + + if (SameLenNames.size() == 1) { + OS << "Name.size() == " << FS.name().size() << " && "; + } else { + // FIXME: We currently fall back to comparing entire strings if there + // are 2 or more spelling names with the same length. This can be + // optimized to check only for the the first differing character + // between them instead. + OS << "Name == \"" << FS.name() << "\"" + << " && "; + } + } + + OS << "getSyntax() == AttributeCommonInfo::AS_" << FS.variety() + << " && ComputedScope == "; + if (FS.nameSpace() == "") + OS << "AttributeCommonInfo::Scope::NONE"; + else + OS << "AttributeCommonInfo::Scope::" + FS.nameSpace().upper(); + + OS << ")\n" + << " return " << Idx << ";\n"; + } + + OS << " break;\n" + << " }\n"; } - OS << " }\n"; - OS << " return 0;\n"; + OS << " }\n" + << " return 0;\n"; } // Emits code used by RecursiveASTVisitor to visit attributes diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 9a4a0ff9e75a40..41be79e2f6c80c 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -417,6 +417,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.exp libc.src.math.exp10 libc.src.math.exp10f + libc.src.math.exp10m1f libc.src.math.exp2 libc.src.math.exp2f libc.src.math.exp2m1f diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index a50e054622e1a4..92580cb1592757 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -292,7 +292,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | exp10 | |check| | |check| | | |check| | | 7.12.6.2 | F.10.3.2 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| exp10m1 | | | | |check| | | 7.12.6.3 | F.10.3.3 | +| exp10m1 | |check| | | | |check| | | 7.12.6.3 | F.10.3.3 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | exp2 | |check| | |check| | | |check| | | 7.12.6.4 | F.10.3.4 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/newhdrgen/yaml/math.yaml b/libc/newhdrgen/yaml/math.yaml index 3cc4b599c777bf..d6669f1e8ffcc4 100644 --- a/libc/newhdrgen/yaml/math.yaml +++ b/libc/newhdrgen/yaml/math.yaml @@ -280,6 +280,12 @@ functions: return_type: float arguments: - type: float + - name: exp10m1f + standards: + - stdc + return_type: float + arguments: + - type: float - name: exp10m1f16 standards: - stdc diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index d1ebc6ffb5821e..4fa057da1cf133 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -695,6 +695,7 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"exp10f", RetValSpec, [ArgSpec]>, GuardedFunctionSpec<"exp10f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + FunctionSpec<"exp10m1f", RetValSpec, [ArgSpec]>, GuardedFunctionSpec<"exp10m1f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, FunctionSpec<"remainder", RetValSpec, [ArgSpec, ArgSpec]>, @@ -1737,7 +1738,6 @@ def StdC : StandardSpec<"stdc"> { ] >; - NamedType StructLconv = NamedType<"struct lconv">; PtrType StructLconvPtr = PtrType; diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 80c1867d2116f6..88cef320cee76d 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -131,6 +131,7 @@ add_math_entrypoint_object(exp10) add_math_entrypoint_object(exp10f) add_math_entrypoint_object(exp10f16) +add_math_entrypoint_object(exp10m1f) add_math_entrypoint_object(exp10m1f16) add_math_entrypoint_object(expm1) diff --git a/libc/src/math/exp10m1f.h b/libc/src/math/exp10m1f.h new file mode 100644 index 00000000000000..fcb9f77795da37 --- /dev/null +++ b/libc/src/math/exp10m1f.h @@ -0,0 +1,20 @@ +//===-- Implementation header for exp10m1f ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_EXP10M1F_H +#define LLVM_LIBC_SRC_MATH_EXP10M1F_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +float exp10m1f(float x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_EXP10M1F_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index ca27759d3212f2..93780a79a8e2f4 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -359,7 +359,7 @@ add_header_library( libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.nearest_integer - libc.src.__support.common + libc.src.__support.common ) add_header_library( @@ -1569,6 +1569,7 @@ add_entrypoint_object( .explogxf libc.src.errno.errno libc.src.__support.common + libc.src.__support.FPUtil.except_value_utils libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add @@ -1686,6 +1687,27 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + exp10m1f + SRCS + exp10m1f.cpp + HDRS + ../exp10m1f.h + DEPENDS + .explogxf + libc.src.errno.errno + libc.src.__support.common + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( exp10m1f16 SRCS diff --git a/libc/src/math/generic/exp10m1f.cpp b/libc/src/math/generic/exp10m1f.cpp new file mode 100644 index 00000000000000..c0e302eea7b08a --- /dev/null +++ b/libc/src/math/generic/exp10m1f.cpp @@ -0,0 +1,216 @@ +//===-- Implementation of exp10m1f function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/exp10m1f.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" +#include "src/errno/libc_errno.h" + +#include "explogxf.h" + +namespace LIBC_NAMESPACE_DECL { + +static constexpr size_t N_EXCEPTS_LO = 11; + +static constexpr fputil::ExceptValues EXP10M1F_EXCEPTS_LO = + {{ + // x = 0x1.0fe54ep-11, exp10m1f(x) = 0x1.3937eep-10 (RZ) + {0x3a07'f2a7U, 0x3a9c'9bf7U, 1U, 0U, 1U}, + // x = 0x1.80e6eap-11, exp10m1f(x) = 0x1.bb8272p-10 (RZ) + {0x3a40'7375U, 0x3add'c139U, 1U, 0U, 1U}, + // x = -0x1.2a33bcp-51, exp10m1f(x) = -0x1.57515ep-50 (RZ) + {0xa615'19deU, 0xa6ab'a8afU, 0U, 1U, 0U}, + // x = -0x0p+0, exp10m1f(x) = -0x0p+0 (RZ) + {0x8000'0000U, 0x8000'0000U, 0U, 0U, 0U}, + // x = -0x1.b59e08p-31, exp10m1f(x) = -0x1.f7d356p-30 (RZ) + {0xb05a'cf04U, 0xb0fb'e9abU, 0U, 1U, 1U}, + // x = -0x1.bf342p-12, exp10m1f(x) = -0x1.014e02p-10 (RZ) + {0xb9df'9a10U, 0xba80'a701U, 0U, 1U, 0U}, + // x = -0x1.6207fp-11, exp10m1f(x) = -0x1.9746cap-10 (RZ) + {0xba31'03f8U, 0xbacb'a365U, 0U, 1U, 1U}, + // x = -0x1.bd0c66p-11, exp10m1f(x) = -0x1.ffe168p-10 (RZ) + {0xba5e'8633U, 0xbaff'f0b4U, 0U, 1U, 1U}, + // x = -0x1.ffd84cp-10, exp10m1f(x) = -0x1.25faf2p-8 (RZ) + {0xbaff'ec26U, 0xbb92'fd79U, 0U, 1U, 0U}, + // x = -0x1.a74172p-9, exp10m1f(x) = -0x1.e57be2p-8 (RZ) + {0xbb53'a0b9U, 0xbbf2'bdf1U, 0U, 1U, 1U}, + // x = -0x1.cb694cp-9, exp10m1f(x) = -0x1.0764e4p-7 (RZ) + {0xbb65'b4a6U, 0xbc03'b272U, 0U, 1U, 0U}, + }}; + +static constexpr size_t N_EXCEPTS_HI = 19; + +static constexpr fputil::ExceptValues EXP10M1F_EXCEPTS_HI = + {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.8d31eep-8, exp10m1f(x) = 0x1.cc7e4cp-7 (RZ) + {0x3bc6'98f7U, 0x3c66'3f26U, 1U, 0U, 1U}, + // x = 0x1.915fcep-8, exp10m1f(x) = 0x1.d15f72p-7 (RZ) + {0x3bc8'afe7U, 0x3c68'afb9U, 1U, 0U, 0U}, + // x = 0x1.bcf982p-8, exp10m1f(x) = 0x1.022928p-6 (RZ) + {0x3bde'7cc1U, 0x3c81'1494U, 1U, 0U, 1U}, + // x = 0x1.99ff0ap-7, exp10m1f(x) = 0x1.dee416p-6 (RZ) + {0x3c4c'ff85U, 0x3cef'720bU, 1U, 0U, 0U}, + // x = 0x1.75ea14p-6, exp10m1f(x) = 0x1.b9ff16p-5 (RZ) + {0x3cba'f50aU, 0x3d5c'ff8bU, 1U, 0U, 0U}, + // x = 0x1.f81b64p-6, exp10m1f(x) = 0x1.2cb6bcp-4 (RZ) + {0x3cfc'0db2U, 0x3d96'5b5eU, 1U, 0U, 0U}, + // x = 0x1.fafecp+3, exp10m1f(x) = 0x1.8c880ap+52 (RZ) + {0x417d'7f60U, 0x59c6'4405U, 1U, 0U, 0U}, + // x = -0x1.3bf094p-8, exp10m1f(x) = -0x1.69ba4ap-7 (RZ) + {0xbb9d'f84aU, 0xbc34'dd25U, 0U, 1U, 0U}, + // x = -0x1.4558bcp-8, exp10m1f(x) = -0x1.746fb8p-7 (RZ) + {0xbba2'ac5eU, 0xbc3a'37dcU, 0U, 1U, 1U}, + // x = -0x1.4bb43p-8, exp10m1f(x) = -0x1.7babe4p-7 (RZ) + {0xbba5'da18U, 0xbc3d'd5f2U, 0U, 1U, 1U}, + // x = -0x1.776cc8p-8, exp10m1f(x) = -0x1.ad62c4p-7 (RZ) + {0xbbbb'b664U, 0xbc56'b162U, 0U, 1U, 0U}, + // x = -0x1.f024cp-8, exp10m1f(x) = -0x1.1b20d6p-6 (RZ) + {0xbbf8'1260U, 0xbc8d'906bU, 0U, 1U, 1U}, + // x = -0x1.f510eep-8, exp10m1f(x) = -0x1.1de9aap-6 (RZ) + {0xbbfa'8877U, 0xbc8e'f4d5U, 0U, 1U, 0U}, + // x = -0x1.0b43c4p-7, exp10m1f(x) = -0x1.30d418p-6 (RZ) + {0xbc05'a1e2U, 0xbc98'6a0cU, 0U, 1U, 0U}, + // x = -0x1.245ee4p-7, exp10m1f(x) = -0x1.4d2b86p-6 (RZ) + {0xbc12'2f72U, 0xbca6'95c3U, 0U, 1U, 0U}, + // x = -0x1.f9f2dap-7, exp10m1f(x) = -0x1.1e2186p-5 (RZ) + {0xbc7c'f96dU, 0xbd0f'10c3U, 0U, 1U, 0U}, + // x = -0x1.08e42p-6, exp10m1f(x) = -0x1.2b5c4p-5 (RZ) + {0xbc84'7210U, 0xbd15'ae20U, 0U, 1U, 1U}, + // x = -0x1.0cdc44p-5, exp10m1f(x) = -0x1.2a2152p-4 (RZ) + {0xbd06'6e22U, 0xbd95'10a9U, 0U, 1U, 1U}, + // x = -0x1.ca4322p-5, exp10m1f(x) = -0x1.ef073p-4 (RZ) + {0xbd65'2191U, 0xbdf7'8398U, 0U, 1U, 1U}, + }}; + +LLVM_LIBC_FUNCTION(float, exp10m1f, (float x)) { + using FPBits = fputil::FPBits; + FPBits xbits(x); + + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = x_u & 0x7fff'ffffU; + + // When x >= log10(2^128), or x is nan + if (LIBC_UNLIKELY(xbits.is_pos() && x_u >= 0x421a'209bU)) { + if (xbits.is_finite()) { + int rounding = fputil::quick_get_round(); + if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) + return FPBits::max_normal().get_val(); + + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW); + } + + // x >= log10(2^128) and 10^x - 1 rounds to +inf, or x is +inf or nan + return x + FPBits::inf().get_val(); + } + + // When |x| <= log10(2) * 2^(-6) + if (LIBC_UNLIKELY(x_abs <= 0x3b9a'209bU)) { + if (auto r = EXP10M1F_EXCEPTS_LO.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); + + double dx = x; + double dx_sq = dx * dx; + double c0 = dx * Exp10Base::COEFFS[0]; + double c1 = + fputil::multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]); + double c2 = + fputil::multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]); + // 10^dx - 1 ~ (1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5) - 1 + // = COEFFS[0] * dx + ... + COEFFS[4] * dx^5 + return static_cast(fputil::polyeval(dx_sq, c0, c1, c2)); + } + + // When x <= log10(2^-25), or x is nan + if (LIBC_UNLIKELY(x_u >= 0xc0f0d2f1)) { + // exp10m1(-inf) = -1 + if (xbits.is_inf()) + return -1.0f; + // exp10m1(nan) = nan + if (xbits.is_nan()) + return x; + + int rounding = fputil::quick_get_round(); + if (rounding == FE_UPWARD || rounding == FE_TOWARDZERO || + (rounding == FE_TONEAREST && x_u == 0xc0f0d2f1)) + return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f + + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_UNDERFLOW); + return -1.0f; + } + + // Exact outputs when x = 1, 2, ..., 10. + // Quick check mask: 0x800f'ffffU = ~(bits of 1.0f | ... | bits of 10.0f) + if (LIBC_UNLIKELY((x_u & 0x800f'ffffU) == 0)) { + switch (x_u) { + case 0x3f800000U: // x = 1.0f + return 9.0f; + case 0x40000000U: // x = 2.0f + return 99.0f; + case 0x40400000U: // x = 3.0f + return 999.0f; + case 0x40800000U: // x = 4.0f + return 9'999.0f; + case 0x40a00000U: // x = 5.0f + return 99'999.0f; + case 0x40c00000U: // x = 6.0f + return 999'999.0f; + case 0x40e00000U: // x = 7.0f + return 9'999'999.0f; + case 0x41000000U: { // x = 8.0f + int rounding = fputil::quick_get_round(); + if (rounding == FE_UPWARD || rounding == FE_TONEAREST) + return 100'000'000.0f; + return 99'999'992.0f; + } + case 0x41100000U: { // x = 9.0f + int rounding = fputil::quick_get_round(); + if (rounding == FE_UPWARD || rounding == FE_TONEAREST) + return 1'000'000'000.0f; + return 999'999'936.0f; + } + case 0x41200000U: { // x = 10.0f + int rounding = fputil::quick_get_round(); + if (rounding == FE_UPWARD || rounding == FE_TONEAREST) + return 10'000'000'000.0f; + return 9'999'998'976.0f; + } + } + } + + if (auto r = EXP10M1F_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); + + // Range reduction: 10^x = 2^(mid + hi) * 10^lo + // rr = (2^(mid + hi), lo) + auto rr = exp_b_range_reduc(x); + + // The low part is approximated by a degree-5 minimax polynomial. + // 10^lo ~ 1 + COEFFS[0] * lo + ... + COEFFS[4] * lo^5 + double lo_sq = rr.lo * rr.lo; + double c0 = fputil::multiply_add(rr.lo, Exp10Base::COEFFS[0], 1.0); + double c1 = + fputil::multiply_add(rr.lo, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]); + double c2 = + fputil::multiply_add(rr.lo, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]); + double exp10_lo = fputil::polyeval(lo_sq, c0, c1, c2); + // 10^x - 1 = 2^(mid + hi) * 10^lo - 1 + // ~ mh * exp10_lo - 1 + return static_cast(fputil::multiply_add(exp10_lo, rr.mh, -1.0)); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h index f3f50c21aacce7..651524a165f032 100644 --- a/libc/src/math/generic/explogxf.h +++ b/libc/src/math/generic/explogxf.h @@ -159,12 +159,12 @@ template LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) { int k = static_cast(kd); // hi = floor(kd * 2^(-MID_BITS)) // exp_hi = shift hi to the exponent field of double precision. - int64_t exp_hi = static_cast((k >> Base::MID_BITS)) - << fputil::FPBits::FRACTION_LEN; + uint64_t exp_hi = static_cast(k >> Base::MID_BITS) + << fputil::FPBits::FRACTION_LEN; // mh = 2^hi * 2^mid // mh_bits = bit field of mh - int64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi; - double mh = fputil::FPBits(uint64_t(mh_bits)).get_val(); + uint64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi; + double mh = fputil::FPBits(mh_bits).get_val(); // dx = lo = x - (hi + mid) * log(2) double dx = fputil::multiply_add( kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd)); diff --git a/libc/test/UnitTest/FPMatcher.h b/libc/test/UnitTest/FPMatcher.h index 55fe73cd2f1ac9..9f2bae3279208b 100644 --- a/libc/test/UnitTest/FPMatcher.h +++ b/libc/test/UnitTest/FPMatcher.h @@ -174,7 +174,8 @@ template struct FPTest : public Test { LIBC_NAMESPACE::cpp::numeric_limits::max(); static constexpr T zero = FPBits::zero(Sign::POS).get_val(); static constexpr T neg_zero = FPBits::zero(Sign::NEG).get_val(); - static constexpr T aNaN = FPBits::quiet_nan().get_val(); + static constexpr T aNaN = FPBits::quiet_nan(Sign::POS).get_val(); + static constexpr T neg_aNaN = FPBits::quiet_nan(Sign::NEG).get_val(); static constexpr T sNaN = FPBits::signaling_nan().get_val(); static constexpr T inf = FPBits::inf(Sign::POS).get_val(); static constexpr T neg_inf = FPBits::inf(Sign::NEG).get_val(); diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index b46ef4028915ba..d120f8e2fab219 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1084,6 +1084,21 @@ add_fp_unittest( libc.src.math.exp10m1f16 ) +add_fp_unittest( + exp10m1f_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + exp10m1f_test.cpp + DEPENDS + libc.hdr.math_macros + libc.src.errno.errno + libc.src.math.exp10m1f + libc.src.__support.CPP.array + libc.src.__support.FPUtil.fp_bits +) + add_fp_unittest( copysign_test SUITE diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt index 6c10ea422109e7..423c3b7a8bfd11 100644 --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -201,6 +201,21 @@ add_fp_unittest( -lpthread ) +add_fp_unittest( + exp10m1f_test + NO_RUN_POSTBUILD + NEED_MPFR + SUITE + libc_math_exhaustive_tests + SRCS + exp10m1f_test.cpp + DEPENDS + .exhaustive_test + libc.src.math.exp10m1f + LINK_LIBRARIES + -lpthread +) + add_fp_unittest( expm1f_test NO_RUN_POSTBUILD diff --git a/libc/test/src/math/exhaustive/exp10m1f_test.cpp b/libc/test/src/math/exhaustive/exp10m1f_test.cpp new file mode 100644 index 00000000000000..b9b2290f8b570d --- /dev/null +++ b/libc/test/src/math/exhaustive/exp10m1f_test.cpp @@ -0,0 +1,33 @@ +//===-- Exhaustive test for exp10m1f --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "exhaustive_test.h" +#include "src/math/exp10m1f.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +using LlvmLibcExp10m1fExhaustiveTest = + LlvmLibcUnaryOpExhaustiveMathTest; + +// Range: [0, Inf]; +static constexpr uint32_t POS_START = 0x0000'0000U; +static constexpr uint32_t POS_STOP = 0x7f80'0000U; + +TEST_F(LlvmLibcExp10m1fExhaustiveTest, PostiveRange) { + test_full_range_all_roundings(POS_START, POS_STOP); +} + +// Range: [-Inf, 0]; +static constexpr uint32_t NEG_START = 0x8000'0000U; +static constexpr uint32_t NEG_STOP = 0xff80'0000U; + +TEST_F(LlvmLibcExp10m1fExhaustiveTest, NegativeRange) { + test_full_range_all_roundings(NEG_START, NEG_STOP); +} diff --git a/libc/test/src/math/exp10m1f_test.cpp b/libc/test/src/math/exp10m1f_test.cpp new file mode 100644 index 00000000000000..cc960321175cbf --- /dev/null +++ b/libc/test/src/math/exp10m1f_test.cpp @@ -0,0 +1,97 @@ +//===-- Unittests for exp10m1f --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/math_macros.h" +#include "src/__support/CPP/array.h" +#include "src/errno/libc_errno.h" +#include "src/math/exp10m1f.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +#include + +using LlvmLibcExp10m1fTest = LIBC_NAMESPACE::testing::FPTest; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +TEST_F(LlvmLibcExp10m1fTest, TrickyInputs) { + constexpr LIBC_NAMESPACE::cpp::array INPUTS = { + // EXP10M1F_EXCEPTS_LO + 0x1.0fe54ep-11f, + 0x1.80e6eap-11f, + -0x1.2a33bcp-51f, + -0x0p+0f, + -0x1.b59e08p-31f, + -0x1.bf342p-12f, + -0x1.6207fp-11f, + -0x1.bd0c66p-11f, + -0x1.ffd84cp-10f, + -0x1.a74172p-9f, + -0x1.cb694cp-9f, + // EXP10M1F_EXCEPTS_HI + 0x1.8d31eep-8f, + 0x1.915fcep-8f, + 0x1.bcf982p-8f, + 0x1.99ff0ap-7f, + 0x1.75ea14p-6f, + 0x1.f81b64p-6f, + 0x1.fafecp+3f, + -0x1.3bf094p-8f, + -0x1.4558bcp-8f, + -0x1.4bb43p-8f, + -0x1.776cc8p-8f, + -0x1.f024cp-8f, + -0x1.f510eep-8f, + -0x1.0b43c4p-7f, + -0x1.245ee4p-7f, + -0x1.f9f2dap-7f, + -0x1.08e42p-6f, + -0x1.0cdc44p-5f, + -0x1.ca4322p-5f, + // Exceptional integers. + 8.0f, + 9.0f, + 10.0f, + // Overflow boundaries. + 0x1.344134p+5f, + 0x1.344136p+5f, + 0x1.344138p+5f, + // Underflow boundaries. + -0x1.e1a5e0p+2f, + -0x1.e1a5e2p+2f, + -0x1.e1a5e4p+2f, + }; + + for (float x : INPUTS) { + LIBC_NAMESPACE::libc_errno = 0; + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp10m1, x, + LIBC_NAMESPACE::exp10m1f(x), 0.5); + } +} + +TEST_F(LlvmLibcExp10m1fTest, InFloatRange) { + constexpr uint32_t COUNT = 100'000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { + float x = FPBits(v).get_val(); + if (isnan(x) || isinf(x)) + continue; + LIBC_NAMESPACE::libc_errno = 0; + float result = LIBC_NAMESPACE::exp10m1f(x); + + // If the computation resulted in an error or did not produce valid result + // in the single-precision floating point range, then ignore comparing with + // MPFR result as MPFR can still produce valid results because of its + // wider precision. + if (isnan(result) || isinf(result) || LIBC_NAMESPACE::libc_errno != 0) + continue; + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp10m1, x, + LIBC_NAMESPACE::exp10m1f(x), 0.5); + } +} diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 269e92c5900628..1da6f377a1debb 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -1259,6 +1259,17 @@ add_fp_unittest( libc.src.__support.FPUtil.cast ) +add_fp_unittest( + exp10m1f_test + SUITE + libc-math-smoke-tests + SRCS + exp10m1f_test.cpp + DEPENDS + libc.src.errno.errno + libc.src.math.exp10m1f +) + add_fp_unittest( copysign_test SUITE diff --git a/libc/test/src/math/smoke/exp10m1f_test.cpp b/libc/test/src/math/smoke/exp10m1f_test.cpp new file mode 100644 index 00000000000000..9c65a38425d778 --- /dev/null +++ b/libc/test/src/math/smoke/exp10m1f_test.cpp @@ -0,0 +1,59 @@ +//===-- Unittests for exp10m1f --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/errno/libc_errno.h" +#include "src/math/exp10m1f.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcExp10m1fTest = LIBC_NAMESPACE::testing::FPTest; + +TEST_F(LlvmLibcExp10m1fTest, SpecialNumbers) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_EQ(FPBits(aNaN).uintval(), + FPBits(LIBC_NAMESPACE::exp10m1f(aNaN)).uintval()); + EXPECT_EQ(FPBits(neg_aNaN).uintval(), + FPBits(LIBC_NAMESPACE::exp10m1f(neg_aNaN)).uintval()); + EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::exp10m1f(inf)); + EXPECT_FP_EQ_ALL_ROUNDING(-1.0f, LIBC_NAMESPACE::exp10m1f(neg_inf)); + EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::exp10m1f(zero)); + EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::exp10m1f(neg_zero)); + + EXPECT_FP_EQ_ALL_ROUNDING(9.0f, LIBC_NAMESPACE::exp10m1f(1.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(99.0f, LIBC_NAMESPACE::exp10m1f(2.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(999.0f, LIBC_NAMESPACE::exp10m1f(3.0f)); +} + +TEST_F(LlvmLibcExp10m1fTest, Overflow) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::exp10m1f(0x1.fffffep+127f), + FE_OVERFLOW); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::exp10m1f(0x1.344136p+5), + FE_OVERFLOW); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::exp10m1f(0x1.344138p+5), + FE_OVERFLOW); + EXPECT_MATH_ERRNO(ERANGE); +} + +TEST_F(LlvmLibcExp10m1fTest, Underflow) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_WITH_EXCEPTION(-1.0f, LIBC_NAMESPACE::exp10m1f(-max_normal), + FE_UNDERFLOW); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(-1.0f, LIBC_NAMESPACE::exp10m1f(-0x1.e1a5e4p+2f), + FE_UNDERFLOW); + EXPECT_MATH_ERRNO(ERANGE); +} diff --git a/libc/test/src/stdlib/at_quick_exit_test.cpp b/libc/test/src/stdlib/at_quick_exit_test.cpp index 1ed5a83a61b8d0..c0aac4d20d92cc 100644 --- a/libc/test/src/stdlib/at_quick_exit_test.cpp +++ b/libc/test/src/stdlib/at_quick_exit_test.cpp @@ -8,6 +8,7 @@ #include "src/__support/CPP/array.h" #include "src/__support/CPP/utility.h" +#include "src/stdlib/_Exit.h" #include "src/stdlib/at_quick_exit.h" #include "src/stdlib/quick_exit.h" #include "test/UnitTest/Test.h" diff --git a/libc/test/src/stdlib/atexit_test.cpp b/libc/test/src/stdlib/atexit_test.cpp index 24f8b0451f3636..c25202ff54f696 100644 --- a/libc/test/src/stdlib/atexit_test.cpp +++ b/libc/test/src/stdlib/atexit_test.cpp @@ -8,6 +8,7 @@ #include "src/__support/CPP/array.h" #include "src/__support/CPP/utility.h" +#include "src/stdlib/_Exit.h" #include "src/stdlib/atexit.h" #include "src/stdlib/exit.h" #include "test/UnitTest/Test.h" diff --git a/libcxx/include/__utility/small_buffer.h b/libcxx/include/__utility/small_buffer.h index 70e068f89f62ed..b44b37e90e7653 100644 --- a/libcxx/include/__utility/small_buffer.h +++ b/libcxx/include/__utility/small_buffer.h @@ -66,7 +66,7 @@ class __small_buffer { if constexpr (__fits_in_buffer<_Stored>) { return std::launder(reinterpret_cast<_Stored*>(__buffer_)); } else { - byte* __allocation = static_cast(::operator new[](sizeof(_Stored), align_val_t{alignof(_Stored)})); + byte* __allocation = static_cast(std::__libcpp_allocate(sizeof(_Stored), alignof(_Stored))); std::construct_at(reinterpret_cast(__buffer_), __allocation); return std::launder(reinterpret_cast<_Stored*>(__allocation)); } @@ -75,7 +75,7 @@ class __small_buffer { template _LIBCPP_HIDE_FROM_ABI void __dealloc() noexcept { if constexpr (!__fits_in_buffer<_Stored>) - ::operator delete[](*reinterpret_cast(__buffer_), sizeof(_Stored), align_val_t{alignof(_Stored)}); + std::__libcpp_deallocate(*reinterpret_cast(__buffer_), sizeof(_Stored), alignof(_Stored)); } template diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index da0e8b286cddc1..50e9a296a4a13b 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -86,12 +86,6 @@ set(LIBCXXABI_STATIC_OUTPUT_NAME "c++abi" CACHE STRING "Output name for the stat set(LIBCXXABI_INSTALL_INCLUDE_DIR "${CMAKE_INSTALL_INCLUDEDIR}/c++/v1" CACHE STRING "Path to install the libc++abi headers at.") -if(LLVM_LIBRARY_OUTPUT_INTDIR) - set(LIBCXXABI_GENERATED_INCLUDE_DIR "${LLVM_BINARY_DIR}/include/c++/v1") -else() - set(LIBCXXABI_GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}/include/c++/v1") -endif() - set(LIBCXXABI_LIBCXX_LIBRARY_PATH "" CACHE PATH "The path to libc++ library.") set(LIBCXXABI_LIBRARY_VERSION "1.0" CACHE STRING "Version of libc++abi. This will be reflected in the name of the shared \ diff --git a/libcxxabi/include/CMakeLists.txt b/libcxxabi/include/CMakeLists.txt index 5b1cc2545016ec..0deb7b1eb9e715 100644 --- a/libcxxabi/include/CMakeLists.txt +++ b/libcxxabi/include/CMakeLists.txt @@ -3,20 +3,7 @@ set(files cxxabi.h ) -foreach(f ${files}) - set(src "${CMAKE_CURRENT_SOURCE_DIR}/${f}") - set(dst "${LIBCXXABI_GENERATED_INCLUDE_DIR}/${f}") - add_custom_command(OUTPUT ${dst} - DEPENDS ${src} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst} - COMMENT "Copying CXXABI header ${f}") - list(APPEND _all_includes "${dst}") -endforeach() - -add_custom_target(generate-cxxabi-headers ALL DEPENDS ${_all_includes}) - add_library(cxxabi-headers INTERFACE) -add_dependencies(cxxabi-headers generate-cxxabi-headers) target_include_directories(cxxabi-headers INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}") if (LIBCXXABI_INSTALL_HEADERS) diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index c990972ca64bcf..227ed802aa933c 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -441,6 +441,8 @@ void CommandInterpreter::Initialize() { cmd_obj_sp = GetCommandSPExact("expression"); if (cmd_obj_sp) { + // Ensure `e` runs `expression`. + AddAlias("e", cmd_obj_sp); AddAlias("call", cmd_obj_sp, "--")->SetHelpLong(""); CommandAlias *parray_alias = AddAlias("parray", cmd_obj_sp, "--element-count %1 --"); diff --git a/lldb/test/API/functionalities/abbreviation/TestAbbreviations.py b/lldb/test/API/functionalities/abbreviation/TestAbbreviations.py index 02ee581da516d4..a8cbffbb7ba4a5 100644 --- a/lldb/test/API/functionalities/abbreviation/TestAbbreviations.py +++ b/lldb/test/API/functionalities/abbreviation/TestAbbreviations.py @@ -20,6 +20,10 @@ def test_command_abbreviations_and_aliases(self): self.assertTrue(result.Succeeded()) self.assertEqual("apropos script", result.GetOutput()) + command_interpreter.ResolveCommand("e", result) + self.assertTrue(result.Succeeded()) + self.assertEqual("expression", result.GetOutput()) + command_interpreter.ResolveCommand("h", result) self.assertTrue(result.Succeeded()) self.assertEqual("help", result.GetOutput()) diff --git a/llvm/include/llvm/ADT/STLFunctionalExtras.h b/llvm/include/llvm/ADT/STLFunctionalExtras.h index 3b9d40959d7142..6f172504b3c167 100644 --- a/llvm/include/llvm/ADT/STLFunctionalExtras.h +++ b/llvm/include/llvm/ADT/STLFunctionalExtras.h @@ -16,7 +16,6 @@ #define LLVM_ADT_STLFUNCTIONALEXTRAS_H #include "llvm/ADT/STLForwardCompat.h" -#include "llvm/Support/Compiler.h" #include #include @@ -53,7 +52,7 @@ class function_ref { template function_ref( - Callable &&callable LLVM_LIFETIME_BOUND, + Callable &&callable, // This is not the copy-constructor. std::enable_if_t, function_ref>::value> * = nullptr, diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 5090c7ff666621..67bbb814f90680 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -868,6 +868,10 @@ class LLVM_ABI MachineFunction { /// it are renumbered. void RenumberBlocks(MachineBasicBlock *MBBFrom = nullptr); + /// Return an estimate of the function's code size, + /// taking into account block and function alignment + int64_t estimateFunctionSizeInBytes(); + /// print - Print out the MachineFunction in a format suitable for debugging /// to the specified stream. void print(raw_ostream &OS, const SlotIndexes* = nullptr) const; diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 43267033f024a7..c181424a6e95bf 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -35,6 +35,9 @@ def int_dx_typedBufferLoad_checkbit def int_dx_typedBufferStore : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>; +def int_dx_updateCounter + : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i8_ty]>; + // Cast between target extension handle types and dxil-style opaque handles def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>; diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h index f92c6b4775a2a2..f168ffc4fdb1ef 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -57,6 +57,38 @@ class MemProfUsePass : public PassInfoMixin { IntrusiveRefCntPtr FS; }; +namespace memprof { + +struct LineLocation { + LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Column(D) {} + + bool operator<(const LineLocation &O) const { + return LineOffset < O.LineOffset || + (LineOffset == O.LineOffset && Column < O.Column); + } + + bool operator==(const LineLocation &O) const { + return LineOffset == O.LineOffset && Column == O.Column; + } + + bool operator!=(const LineLocation &O) const { + return LineOffset != O.LineOffset || Column != O.Column; + } + + uint64_t getHashCode() const { return ((uint64_t)Column << 32) | LineOffset; } + + uint32_t LineOffset; + uint32_t Column; +}; + +// A pair of a call site location and its corresponding callee GUID. +using CallEdgeTy = std::pair; + +// Extract all calls from the IR. Arrange them in a map from caller GUIDs to a +// list of call sites, each of the form {LineLocation, CalleeGUID}. +DenseMap> extractCallsFromIR(Module &M); + +} // namespace memprof } // namespace llvm #endif diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index fd586351215448..a684e239c7d658 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -388,6 +388,37 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { MBBNumberingEpoch++; } +int64_t MachineFunction::estimateFunctionSizeInBytes() { + const TargetInstrInfo &TII = *getSubtarget().getInstrInfo(); + const Align FunctionAlignment = getAlignment(); + MachineFunction::iterator MBBI = begin(), E = end(); + /// Offset - Distance from the beginning of the function to the end + /// of the basic block. + int64_t Offset = 0; + + for (; MBBI != E; ++MBBI) { + const Align Alignment = MBBI->getAlignment(); + int64_t BlockSize = 0; + + for (auto &MI : *MBBI) { + BlockSize += TII.getInstSizeInBytes(MI); + } + + int64_t OffsetBB; + if (Alignment <= FunctionAlignment) { + OffsetBB = alignTo(Offset, Alignment); + } else { + // The alignment of this MBB is larger than the function's alignment, so + // we can't tell whether or not it will insert nops. Assume that it will. + OffsetBB = alignTo(Offset, Alignment) + Alignment.value() - + FunctionAlignment.value(); + } + Offset = OffsetBB + BlockSize; + } + + return Offset; +} + /// This method iterates over the basic blocks and assigns their IsBeginSection /// and IsEndSection fields. This must be called after MBB layout is finalized /// and the SectionID's are assigned to MBBs. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 21fffba14287ef..e3a330d45aaa57 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -22,7 +22,6 @@ #include "SIISelLowering.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/UniformityAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 11c4cdd560c2f3..5ae0b179d7d0e6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -17,6 +17,7 @@ #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "SIModeRegisterDefaults.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 55de3363f99672..6acf313e369ce9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3574,6 +3574,23 @@ def : AMDGPUPat < (V_BFE_U32_e64 $src, (i32 0), $width) >; +def uint5Bits : PatLeaf<(i32 VGPR_32:$width), [{ + return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxTrailingOnes() <= 5; +}]>; + +// x << (bitwidth - y) >> (bitwidth - y) +def : AMDGPUPat < + (DivergentBinFrag (shl_oneuse i32:$src, (sub 32, uint5Bits:$width)), + (sub 32, uint5Bits:$width)), + (V_BFE_U32_e64 $src, (i32 0), $width) +>; + +def : AMDGPUPat < + (DivergentBinFrag (shl_oneuse i32:$src, (sub 32, uint5Bits:$width)), + (sub 32, uint5Bits:$width)), + (V_BFE_I32_e64 $src, (i32 0), $width) +>; + // SHA-256 Ma patterns // ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 1aabff90e5ec6e..efaf96112c75d6 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -754,6 +754,13 @@ def BufferStore : DXILOp<69, bufferStore> { let stages = [Stages]; } +def UpdateCounter : DXILOp<70, bufferUpdateCounter> { + let Doc = "increments/decrements a buffer counter"; + let arguments = [HandleTy, Int8Ty]; + let result = VoidTy; + let stages = [Stages]; +} + def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> { let Doc = "checks whether a Sample, Gather, or Load operation " "accessed mapped tiles in a tiled resource"; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 8acc9c1efa08c0..0dd3a8dc1ad4ce 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -463,6 +463,28 @@ class OpLowerer { }); } + [[nodiscard]] bool lowerUpdateCounter(Function &F) { + IRBuilder<> &IRB = OpBuilder.getIRB(); + + return replaceFunction(F, [&](CallInst *CI) -> Error { + IRB.SetInsertPoint(CI); + Value *Handle = + createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); + Value *Op1 = CI->getArgOperand(1); + + std::array Args{Handle, Op1}; + + Expected OpCall = + OpBuilder.tryCreateOp(OpCode::UpdateCounter, Args, CI->getName()); + + if (Error E = OpCall.takeError()) + return E; + + CI->eraseFromParent(); + return Error::success(); + }); + } + [[nodiscard]] bool lowerTypedBufferStore(Function &F) { IRBuilder<> &IRB = OpBuilder.getIRB(); Type *Int8Ty = IRB.getInt8Ty(); @@ -600,6 +622,9 @@ class OpLowerer { case Intrinsic::dx_typedBufferStore: HasErrors |= lowerTypedBufferStore(F); break; + case Intrinsic::dx_updateCounter: + HasErrors |= lowerUpdateCounter(F); + break; // TODO: this can be removed when // https://github.com/llvm/llvm-project/issues/113192 is fixed case Intrinsic::dx_splitdouble: diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 389bdbe6d5e912..d11647b78d7417 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -87,6 +87,12 @@ class RISCVInstructionSelector : public InstructionSelector { ComplexRendererFns selectShiftMask(MachineOperand &Root) const; ComplexRendererFns selectAddrRegImm(MachineOperand &Root) const; + ComplexRendererFns selectSExtBits(MachineOperand &Root, unsigned Bits) const; + template + ComplexRendererFns selectSExtBits(MachineOperand &Root) const { + return selectSExtBits(Root, Bits); + } + ComplexRendererFns selectZExtBits(MachineOperand &Root, unsigned Bits) const; template ComplexRendererFns selectZExtBits(MachineOperand &Root) const { @@ -248,6 +254,27 @@ RISCVInstructionSelector::selectShiftMask(MachineOperand &Root) const { return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(ShAmtReg); }}}; } +InstructionSelector::ComplexRendererFns +RISCVInstructionSelector::selectSExtBits(MachineOperand &Root, + unsigned Bits) const { + if (!Root.isReg()) + return std::nullopt; + Register RootReg = Root.getReg(); + MachineInstr *RootDef = MRI->getVRegDef(RootReg); + + if (RootDef->getOpcode() == TargetOpcode::G_SEXT_INREG && + RootDef->getOperand(2).getImm() == Bits) { + return { + {[=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }}}; + } + + unsigned Size = MRI->getType(RootReg).getScalarSizeInBits(); + if ((Size - KB->computeNumSignBits(RootReg)) < Bits) + return {{[=](MachineInstrBuilder &MIB) { MIB.add(Root); }}}; + + return std::nullopt; +} + InstructionSelector::ComplexRendererFns RISCVInstructionSelector::selectZExtBits(MachineOperand &Root, unsigned Bits) const { diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 34742394a291ed..0704b57ff95650 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -203,8 +203,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower(); getActionDefinitionsBuilder({G_ROTL, G_ROTR}) - .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), - {{s32, s32}, {sXLen, sXLen}}) + .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), {{sXLen, sXLen}}) + .customFor(ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()), + {{s32, s32}}) .lower(); getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower(); @@ -225,7 +226,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) auto &CountZerosUndefActions = getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF}); if (ST.hasStdExtZbb()) { - CountZerosActions.legalFor({{s32, s32}, {sXLen, sXLen}}) + CountZerosActions.legalFor({{sXLen, sXLen}}) + .customFor({{s32, s32}}) .clampScalar(0, s32, sXLen) .widenScalarToNextPow2(0) .scalarSameSizeAs(1, 0); @@ -237,9 +239,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP); if (ST.hasStdExtZbb()) { - CTPOPActions.legalFor({{s32, s32}, {sXLen, sXLen}}) - .clampScalar(0, s32, sXLen) - .widenScalarToNextPow2(0) + CTPOPActions.legalFor({{sXLen, sXLen}}) + .clampScalar(0, sXLen, sXLen) .scalarSameSizeAs(1, 0); } else { CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower(); @@ -541,9 +542,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}); getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) - .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {s32, sXLen}))) + .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {sXLen}))) .widenScalarToNextPow2(1) - .minScalar(1, s32) + .minScalar(1, sXLen) .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}}) .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}); @@ -1158,6 +1159,21 @@ bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI, return true; } +static unsigned getRISCVWOpcode(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_ROTL: + return RISCV::G_ROLW; + case TargetOpcode::G_ROTR: + return RISCV::G_RORW; + case TargetOpcode::G_CTLZ: + return RISCV::G_CLZW; + case TargetOpcode::G_CTTZ: + return RISCV::G_CTZW; + } +} + bool RISCVLegalizerInfo::legalizeCustom( LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const { @@ -1194,6 +1210,25 @@ bool RISCVLegalizerInfo::legalizeCustom( return Helper.lower(MI, 0, /* Unused hint type */ LLT()) == LegalizerHelper::Legalized; } + case TargetOpcode::G_ROTL: + case TargetOpcode::G_ROTR: { + Helper.Observer.changingInstr(MI); + Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); + Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT); + Helper.widenScalarDst(MI, sXLen); + MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); + Helper.Observer.changedInstr(MI); + return true; + } + case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTTZ: { + Helper.Observer.changingInstr(MI); + Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); + Helper.widenScalarDst(MI, sXLen); + MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); + Helper.Observer.changedInstr(MI); + return true; + } case TargetOpcode::G_IS_FPCLASS: { Register GISFPCLASS = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td index a2e67eef03561b..60d942957c8861 100644 --- a/llvm/lib/Target/RISCV/RISCVCombine.td +++ b/llvm/lib/Target/RISCV/RISCVCombine.td @@ -23,6 +23,6 @@ def RISCVO0PreLegalizerCombiner: GICombiner< // TODO: Add more combines. def RISCVPostLegalizerCombiner : GICombiner<"RISCVPostLegalizerCombinerImpl", - [redundant_and, identity_combines, commute_constant_to_rhs, - constant_fold_cast_op]> { + [combines_for_extload, redundant_and, identity_combines, + commute_constant_to_rhs, constant_fold_cast_op]> { } diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 36881b02da2e40..10906aebf1bf84 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -96,6 +96,9 @@ def gi_sh2add_uw_op : GIComplexOperandMatcher">, def gi_sh3add_uw_op : GIComplexOperandMatcher">, GIComplexPatternEquiv; +def gi_sexti32 : GIComplexOperandMatcher">, + GIComplexPatternEquiv; + def gi_zexti32 : GIComplexOperandMatcher">, GIComplexPatternEquiv; def gi_zexti16 : GIComplexOperandMatcher">, @@ -264,10 +267,6 @@ def : PatGprGpr; //===----------------------------------------------------------------------===// let Predicates = [HasStdExtZbb, IsRV64] in { -def : PatGpr; -def : PatGpr; -def : PatGpr; - def : Pat<(i32 (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>; def : Pat<(i32 (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>; @@ -278,14 +277,6 @@ let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in { def : Pat<(i32 (and GPR:$rs1, (not GPR:$rs2))), (ANDN GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (or GPR:$rs1, (not GPR:$rs2))), (ORN GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (xor GPR:$rs1, (not GPR:$rs2))), (XNOR GPR:$rs1, GPR:$rs2)>; - -def : PatGprGpr; -def : PatGprGpr; -def : Pat<(i32 (rotr GPR:$rs1, uimm5i32:$imm)), - (RORIW GPR:$rs1, (i64 (as_i64imm $imm)))>; - -def : Pat<(i32 (rotl GPR:$rs1, uimm5i32:$rs2)), - (RORIW GPR:$rs1, (ImmSubFrom32 uimm5i32:$rs2))>; } // Predicates = [HasStdExtZbbOrZbkb, IsRV64] let Predicates = [HasStdExtZba, IsRV64] in { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5600524b69a620..48a7c1f047ff46 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4426,48 +4426,58 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, } // Is this a shuffle extracts either the even or odd elements of a vector? -// That is, specifically, either (a) or (b) below. -// t34: v8i8 = extract_subvector t11, Constant:i64<0> -// t33: v8i8 = extract_subvector t11, Constant:i64<8> -// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 -// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 -// Returns {Src Vector, Even Elements} on success -static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, - SDValue V2, ArrayRef Mask, - const RISCVSubtarget &Subtarget) { +// That is, specifically, either (a) or (b) in the options below. +// Single operand shuffle is easy: +// a) t35: v8i8 = vector_shuffle<0,2,4,6,u,u,u,u> t34, undef +// b) t35: v8i8 = vector_shuffle<1,3,5,7,u,u,u,u> t34, undef +// Double operand shuffle: +// t34: v8i8 = extract_subvector t11, Constant:i64<0> +// t33: v8i8 = extract_subvector t11, Constant:i64<8> +// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 +// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 +static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, + SDValue V2, ArrayRef Mask, + const RISCVSubtarget &Subtarget) { // Need to be able to widen the vector. if (VT.getScalarSizeInBits() >= Subtarget.getELen()) - return false; + return SDValue(); + + // First index must be the first even or odd element from V1. + if (Mask[0] != 0 && Mask[0] != 1) + return SDValue(); + + // The others must increase by 2 each time. + for (unsigned i = 1; i != Mask.size(); ++i) + if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2) + return SDValue(); + + if (1 == count_if(Mask, [](int Idx) { return Idx != -1; })) + return SDValue(); + + if (V2.isUndef() && + RISCVTargetLowering::getLMUL(ContainerVT) != RISCVII::VLMUL::LMUL_8) + return V1; // Both input must be extracts. if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) - return false; + return SDValue(); // Extracting from the same source. SDValue Src = V1.getOperand(0); if (Src != V2.getOperand(0)) - return false; + return SDValue(); // Src needs to have twice the number of elements. if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) - return false; + return SDValue(); // The extracts must extract the two halves of the source. if (V1.getConstantOperandVal(1) != 0 || V2.getConstantOperandVal(1) != Mask.size()) - return false; - - // First index must be the first even or odd element from V1. - if (Mask[0] != 0 && Mask[0] != 1) - return false; - - // The others must increase by 2 each time (or be undef). - for (unsigned i = 1; i != Mask.size(); ++i) - if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2) - return false; + return SDValue(); - return true; + return Src; } /// Is this shuffle interleaving contiguous elements from one vector into the @@ -4597,7 +4607,8 @@ static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, assert(Src.getSimpleValueType().isFixedLengthVector()); ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); - // The source is a vector of type + // The source is a vector of type (For the single source + // case, the high half is undefined) MVT SrcContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(), ContainerVT.getVectorElementCount() * 2); @@ -5300,10 +5311,9 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // If this is a deinterleave and we can widen the vector, then we can use // vnsrl to deinterleave. - if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) { - return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0, - Subtarget, DAG); - } + if (SDValue Src = + isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) + return getDeinterleaveViaVNSRL(DL, VT, Src, Mask[0] == 0, Subtarget, DAG); if (SDValue V = lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td index 763aead84dd8f4..424623360d2556 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td +++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td @@ -17,6 +17,38 @@ class RISCVGenericInstruction : GenericInstruction { let Namespace = "RISCV"; } +// Pseudo equivalent to a RISCVISD::RORW. +def G_RORW : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2); + let hasSideEffects = false; +} +def : GINodeEquiv; + +// Pseudo equivalent to a RISCVISD::ROLW. +def G_ROLW : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2); + let hasSideEffects = false; +} +def : GINodeEquiv; + +// Pseudo equivalent to a RISCVISD::CLZW. +def G_CLZW : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); + let hasSideEffects = false; +} +def : GINodeEquiv; + +// Pseudo equivalent to a RISCVISD::CTZW. +def G_CTZW : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); + let hasSideEffects = false; +} +def : GINodeEquiv; + // Pseudo equivalent to a RISCVISD::FCLASS. def G_FCLASS : RISCVGenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp index db86637ecf83f3..95dfafc13f3908 100644 --- a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp +++ b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp @@ -69,6 +69,9 @@ void XtensaAsmPrinter::emitMachineConstantPoolValue( const BlockAddress *BA = cast(ACPV)->getBlockAddress(); MCSym = GetBlockAddressSymbol(BA); + } else if (ACPV->isMachineBasicBlock()) { + const MachineBasicBlock *MBB = cast(ACPV)->getMBB(); + MCSym = MBB->getSymbol(); } else if (ACPV->isJumpTable()) { unsigned Idx = cast(ACPV)->getIndex(); MCSym = this->GetJTISymbol(Idx, false); diff --git a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp index f46d386c9186aa..005ba10b813133 100644 --- a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp @@ -12,6 +12,7 @@ #include "XtensaFrameLowering.h" #include "XtensaInstrInfo.h" +#include "XtensaMachineFunctionInfo.h" #include "XtensaSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -260,14 +261,26 @@ void XtensaFrameLowering::processFunctionBeforeFrameFinalized( // Set scavenging frame index if necessary. MachineFrameInfo &MFI = MF.getFrameInfo(); uint64_t MaxSPOffset = MFI.estimateStackSize(MF); + auto *XtensaFI = MF.getInfo(); + unsigned ScavSlotsNum = 0; - if (isInt<12>(MaxSPOffset)) - return; + if (!isInt<12>(MaxSPOffset)) + ScavSlotsNum = 1; + + // Far branches over 18-bit offset require a spill slot for scratch register. + bool IsLargeFunction = !isInt<18>(MF.estimateFunctionSizeInBytes()); + if (IsLargeFunction) + ScavSlotsNum = std::max(ScavSlotsNum, 1u); const TargetRegisterClass &RC = Xtensa::ARRegClass; unsigned Size = TRI->getSpillSize(RC); Align Alignment = TRI->getSpillAlign(RC); - int FI = MF.getFrameInfo().CreateStackObject(Size, Alignment, false); + for (unsigned I = 0; I < ScavSlotsNum; I++) { + int FI = MFI.CreateStackObject(Size, Alignment, false); + RS->addScavengingFrameIndex(FI); - RS->addScavengingFrameIndex(FI); + if (IsLargeFunction && + XtensaFI->getBranchRelaxationScratchFrameIndex() == -1) + XtensaFI->setBranchRelaxationScratchFrameIndex(FI); + } } diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp index b2b4376ca040b6..4c440da715fefe 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp @@ -13,11 +13,14 @@ //===----------------------------------------------------------------------===// #include "XtensaInstrInfo.h" +#include "XtensaConstantPoolValue.h" +#include "XtensaMachineFunctionInfo.h" #include "XtensaTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #define GET_INSTRINFO_CTOR_DTOR #include "XtensaGenInstrInfo.inc" @@ -186,6 +189,18 @@ void XtensaInstrInfo::loadImmediate(MachineBasicBlock &MBB, } } +unsigned XtensaInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case TargetOpcode::INLINEASM: { // Inline Asm: Variable size. + const MachineFunction *MF = MI.getParent()->getParent(); + const char *AsmStr = MI.getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + default: + return MI.getDesc().getSize(); + } +} + bool XtensaInstrInfo::reverseBranchCondition( SmallVectorImpl &Cond) const { assert(Cond.size() <= 4 && "Invalid branch condition!"); @@ -244,6 +259,74 @@ bool XtensaInstrInfo::reverseBranchCondition( } } +MachineBasicBlock * +XtensaInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { + unsigned OpCode = MI.getOpcode(); + switch (OpCode) { + case Xtensa::BR_JT: + case Xtensa::JX: + return nullptr; + case Xtensa::J: + return MI.getOperand(0).getMBB(); + case Xtensa::BEQ: + case Xtensa::BNE: + case Xtensa::BLT: + case Xtensa::BLTU: + case Xtensa::BGE: + case Xtensa::BGEU: + return MI.getOperand(2).getMBB(); + case Xtensa::BEQI: + case Xtensa::BNEI: + case Xtensa::BLTI: + case Xtensa::BLTUI: + case Xtensa::BGEI: + case Xtensa::BGEUI: + return MI.getOperand(2).getMBB(); + case Xtensa::BEQZ: + case Xtensa::BNEZ: + case Xtensa::BLTZ: + case Xtensa::BGEZ: + return MI.getOperand(1).getMBB(); + default: + llvm_unreachable("Unknown branch opcode"); + } +} + +bool XtensaInstrInfo::isBranchOffsetInRange(unsigned BranchOp, + int64_t BrOffset) const { + switch (BranchOp) { + case Xtensa::J: + BrOffset -= 4; + return isIntN(18, BrOffset); + case Xtensa::JX: + return true; + case Xtensa::BR_JT: + return true; + case Xtensa::BEQ: + case Xtensa::BNE: + case Xtensa::BLT: + case Xtensa::BLTU: + case Xtensa::BGE: + case Xtensa::BGEU: + case Xtensa::BEQI: + case Xtensa::BNEI: + case Xtensa::BLTI: + case Xtensa::BLTUI: + case Xtensa::BGEI: + case Xtensa::BGEUI: + BrOffset -= 4; + return isIntN(8, BrOffset); + case Xtensa::BEQZ: + case Xtensa::BNEZ: + case Xtensa::BLTZ: + case Xtensa::BGEZ: + BrOffset -= 4; + return isIntN(12, BrOffset); + default: + llvm_unreachable("Unknown branch opcode"); + } +} + bool XtensaInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -376,6 +459,130 @@ unsigned XtensaInstrInfo::insertBranch( return Count; } +void XtensaInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &DestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, int64_t BrOffset, + RegScavenger *RS) const { + assert(RS && "RegScavenger required for long branching"); + assert(MBB.empty() && + "new block should be inserted for expanding unconditional branch"); + assert(MBB.pred_size() == 1); + + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineConstantPool *ConstantPool = MF->getConstantPool(); + auto *XtensaFI = MF->getInfo(); + MachineBasicBlock *JumpToMBB = &DestBB; + + if (!isInt<32>(BrOffset)) + report_fatal_error( + "Branch offsets outside of the signed 32-bit range not supported"); + + Register ScratchReg = MRI.createVirtualRegister(&Xtensa::ARRegClass); + auto II = MBB.end(); + + // Create l32r without last operand. We will add this operand later when + // JumpToMMB will be calculated and placed to the ConstantPool. + MachineInstr &L32R = *BuildMI(MBB, II, DL, get(Xtensa::L32R), ScratchReg); + BuildMI(MBB, II, DL, get(Xtensa::JX)).addReg(ScratchReg, RegState::Kill); + + RS->enterBasicBlockEnd(MBB); + Register ScavRegister = + RS->scavengeRegisterBackwards(Xtensa::ARRegClass, L32R.getIterator(), + /*RestoreAfter=*/false, /*SpAdj=*/0, + /*AllowSpill=*/false); + if (ScavRegister != Xtensa::NoRegister) + RS->setRegUsed(ScavRegister); + else { + // The case when there is no scavenged register needs special handling. + // Pick A8 because it doesn't make a difference + ScavRegister = Xtensa::A12; + + int FrameIndex = XtensaFI->getBranchRelaxationScratchFrameIndex(); + if (FrameIndex == -1) + report_fatal_error( + "Unable to properly handle scavenged register for indirect jump, " + "function code size is significantly larger than estimated"); + + storeRegToStackSlot(MBB, L32R, ScavRegister, /*IsKill=*/true, FrameIndex, + &Xtensa::ARRegClass, &RI, Register()); + RI.eliminateFrameIndex(std::prev(L32R.getIterator()), + /*SpAdj=*/0, /*FIOperandNum=*/1); + + loadRegFromStackSlot(RestoreBB, RestoreBB.end(), ScavRegister, FrameIndex, + &Xtensa::ARRegClass, &RI, Register()); + RI.eliminateFrameIndex(RestoreBB.back(), + /*SpAdj=*/0, /*FIOperandNum=*/1); + JumpToMBB = &RestoreBB; + } + + XtensaConstantPoolValue *C = XtensaConstantPoolMBB::Create( + MF->getFunction().getContext(), JumpToMBB, 0); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align(4)); + L32R.addOperand(MachineOperand::CreateCPI(Idx, 0)); + + MRI.replaceRegWith(ScratchReg, ScavRegister); + MRI.clearVirtRegs(); +} + +unsigned XtensaInstrInfo::insertConstBranchAtInst( + MachineBasicBlock &MBB, MachineInstr *I, int64_t offset, + ArrayRef Cond, DebugLoc DL, int *BytesAdded) const { + // Shouldn't be a fall through. + assert(&MBB && "InsertBranch must not be told to insert a fallthrough"); + assert(Cond.size() <= 4 && + "Xtensa branch conditions have less than four components!"); + + if (Cond.empty() || (Cond[0].getImm() == Xtensa::J)) { + // Unconditional branch + MachineInstr *MI = BuildMI(MBB, I, DL, get(Xtensa::J)).addImm(offset); + if (BytesAdded && MI) + *BytesAdded += getInstSizeInBytes(*MI); + return 1; + } + + unsigned Count = 0; + unsigned BR_C = Cond[0].getImm(); + MachineInstr *MI = nullptr; + switch (BR_C) { + case Xtensa::BEQ: + case Xtensa::BNE: + case Xtensa::BLT: + case Xtensa::BLTU: + case Xtensa::BGE: + case Xtensa::BGEU: + MI = BuildMI(MBB, I, DL, get(BR_C)) + .addImm(offset) + .addReg(Cond[1].getReg()) + .addReg(Cond[2].getReg()); + break; + case Xtensa::BEQI: + case Xtensa::BNEI: + case Xtensa::BLTI: + case Xtensa::BLTUI: + case Xtensa::BGEI: + case Xtensa::BGEUI: + MI = BuildMI(MBB, I, DL, get(BR_C)) + .addImm(offset) + .addReg(Cond[1].getReg()) + .addImm(Cond[2].getImm()); + break; + case Xtensa::BEQZ: + case Xtensa::BNEZ: + case Xtensa::BLTZ: + case Xtensa::BGEZ: + MI = BuildMI(MBB, I, DL, get(BR_C)).addImm(offset).addReg(Cond[1].getReg()); + break; + default: + llvm_unreachable("Invalid branch type!"); + } + if (BytesAdded && MI) + *BytesAdded += getInstSizeInBytes(*MI); + ++Count; + return Count; +} + unsigned XtensaInstrInfo::insertBranchAtInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *TBB, diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h index 9f45cf7c29ada7..31da4d481d3097 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h @@ -38,6 +38,8 @@ class XtensaInstrInfo : public XtensaGenInstrInfo { void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; + // Return the XtensaRegisterInfo, which this class owns. const XtensaRegisterInfo &getRegisterInfo() const { return RI; } @@ -77,6 +79,11 @@ class XtensaInstrInfo : public XtensaGenInstrInfo { bool reverseBranchCondition(SmallVectorImpl &Cond) const override; + MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; + + bool isBranchOffsetInRange(unsigned BranchOpc, + int64_t BrOffset) const override; + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, @@ -90,12 +97,22 @@ class XtensaInstrInfo : public XtensaGenInstrInfo { const DebugLoc &DL, int *BytesAdded = nullptr) const override; + void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &DestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset = 0, + RegScavenger *RS = nullptr) const override; + unsigned insertBranchAtInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *TBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const; + unsigned insertConstBranchAtInst(MachineBasicBlock &MBB, MachineInstr *I, + int64_t offset, + ArrayRef Cond, DebugLoc DL, + int *BytesAdded) const; + // Return true if MI is a conditional or unconditional branch. // When returning true, set Cond to the mask of condition-code // values on which the instruction will branch, and set Target diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h new file mode 100644 index 00000000000000..c38c060b9387ff --- /dev/null +++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h @@ -0,0 +1,42 @@ +//==- XtensaMachineFunctionInfo.h - Xtensa machine function info --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares Xtensa-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class XtensaMachineFunctionInfo : public MachineFunctionInfo { + /// FrameIndex of the spill slot for the scratch register in BranchRelaxation. + int BranchRelaxationScratchFrameIndex = -1; + +public: + explicit XtensaMachineFunctionInfo(const Function &F, + const TargetSubtargetInfo *STI) {} + + int getBranchRelaxationScratchFrameIndex() const { + return BranchRelaxationScratchFrameIndex; + } + void setBranchRelaxationScratchFrameIndex(int Index) { + BranchRelaxationScratchFrameIndex = Index; + } +}; + +} // namespace llvm + +#endif /* LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H */ diff --git a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h index 8643ebb1c0f157..ede0eeb90b42de 100644 --- a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h +++ b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h @@ -38,6 +38,10 @@ class XtensaRegisterInfo : public XtensaGenRegisterInfo { return true; } + bool trackLivenessAfterRegAlloc(const MachineFunction &) const override { + return true; + } + const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp index 49c7faf84df1d3..8bbb2156e26904 100644 --- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp +++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp @@ -14,6 +14,7 @@ #include "XtensaTargetMachine.h" #include "TargetInfo/XtensaTargetInfo.h" +#include "XtensaMachineFunctionInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -83,6 +84,13 @@ XtensaTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } +MachineFunctionInfo *XtensaTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return XtensaMachineFunctionInfo::create(Allocator, + F, STI); +} + namespace { /// Xtensa Code Generator Pass Configuration Options. class XtensaPassConfig : public TargetPassConfig { @@ -95,6 +103,7 @@ class XtensaPassConfig : public TargetPassConfig { } bool addInstSelector() override; + void addPreEmitPass() override; }; } // end anonymous namespace @@ -103,6 +112,8 @@ bool XtensaPassConfig::addInstSelector() { return false; } +void XtensaPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } + TargetPassConfig *XtensaTargetMachine::createPassConfig(PassManagerBase &PM) { return new XtensaPassConfig(*this, PM); } diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h index f371f22ed3d0e7..6975076b5d6997 100644 --- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h +++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h @@ -45,6 +45,10 @@ class XtensaTargetMachine : public LLVMTargetMachine { return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + protected: mutable StringMap> SubtargetMap; }; diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp index 5f19d600a983aa..5e82ed2e98184e 100644 --- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp @@ -20,11 +20,9 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/IR/Analysis.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Utils/CallGraphUpdater.h" #include diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 585c9c0088bcba..1a3d43d06c5acf 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -16,7 +16,6 @@ //===----------------------------------------------------------------------===// #include "CoroInternal.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/StackLifetime.h" diff --git a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp index b909bf5b2d7b61..92042ddab38dc7 100644 --- a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp +++ b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp @@ -41,13 +41,12 @@ #include "llvm/Transforms/HipStdPar/HipStdPar.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/ModuleUtils.h" diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp index 0b8fde6489f8e7..4cdd1fa6110627 100644 --- a/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/llvm/lib/Transforms/IPO/Internalize.cpp @@ -176,7 +176,7 @@ void InternalizePass::checkComdat( if (!C) return; - ComdatInfo &Info = ComdatMap.try_emplace(C).first->second; + ComdatInfo &Info = ComdatMap[C]; ++Info.Size; if (shouldPreserveGV(GV)) Info.External = true; diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 70bee30fd151f6..0b4d3ff201e622 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -795,6 +795,53 @@ struct AllocMatchInfo { bool Matched = false; }; +DenseMap> +memprof::extractCallsFromIR(Module &M) { + DenseMap> Calls; + + auto GetOffset = [](const DILocation *DIL) { + return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & + 0xffff; + }; + + for (Function &F : M) { + if (F.isDeclaration()) + continue; + + for (auto &BB : F) { + for (auto &I : BB) { + const DILocation *DIL = I.getDebugLoc(); + if (!DIL) + continue; + + if (!isa(&I) || isa(&I)) + continue; + + auto *CB = dyn_cast(&I); + auto *CalledFunction = CB->getCalledFunction(); + // Disregard indirect calls and intrinsics. + if (!CalledFunction || CalledFunction->isIntrinsic()) + continue; + + StringRef CalleeName = CalledFunction->getName(); + uint64_t CallerGUID = + IndexedMemProfRecord::getGUID(DIL->getSubprogramLinkageName()); + uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName); + LineLocation Loc = {GetOffset(DIL), DIL->getColumn()}; + Calls[CallerGUID].emplace_back(Loc, CalleeGUID); + } + } + } + + // Sort each call list by the source location. + for (auto &[CallerGUID, CallList] : Calls) { + llvm::sort(CallList); + CallList.erase(llvm::unique(CallList), CallList.end()); + } + + return Calls; +} + static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 2c8b4e76312a0d..1d213e2aeae5a5 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -40,7 +40,6 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/ObjCARC.h" diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index a2434675a7b5ab..5bfbe95fafa05e 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -36,7 +36,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ObjCARCAliasAnalysis.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/Analysis/ObjCARCUtil.h" diff --git a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp index 23855231c5b988..155c9493e838f6 100644 --- a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp +++ b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp @@ -28,7 +28,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index dd37fe2b454138..889c432eef8466 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -64,7 +64,6 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SizeOpts.h" #include -#include #include #include #include diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index 31694ad1fa508a..1797a2f2366afa 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -192,7 +192,6 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ProfDataUtils.h" -#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp index ea2b419b17a59c..d3e0b807716ddb 100644 --- a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp +++ b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp @@ -14,10 +14,8 @@ #include "llvm/Transforms/Scalar/LowerWidenableCondition.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Scalar.h" diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 87393902054896..8e78040caa6e6e 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2260,11 +2260,9 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB, auto V = DbgValueMap.find(VI); if (V != DbgValueMap.end()) { auto *DbgII = cast(V->second); - auto NewDI = NewDbgValueMap.find({Parent, DbgII}); - if (NewDI == NewDbgValueMap.end()) { - auto *NewDbgII = cast(DbgII->clone()); - NewDI = NewDbgValueMap.insert({{Parent, DbgII}, NewDbgII}).first; - } + auto [NewDI, Inserted] = NewDbgValueMap.try_emplace({Parent, DbgII}); + if (Inserted) + NewDI->second = cast(DbgII->clone()); DbgVariableIntrinsic *NewDbgII = NewDI->second; // If PHI contains VI as an operand more than once, we may // replaced it in NewDbgII; confirm that it is present. diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2d34623b8ec5b5..b2f677fb84f983 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9362,6 +9362,12 @@ void BoUpSLP::reorderGatherNode(TreeEntry &TE) { DenseMap, SmallVector> LoadsMap; SmallSet LoadKeyUsed; + // Do not reorder nodes if it small (just 2 elements), all-constant or all + // instructions have same opcode already. + if (TE.Scalars.size() == 2 || (TE.getOpcode() && !TE.isAltShuffle()) || + all_of(TE.Scalars, isConstant)) + return; + if (any_of(seq(TE.Idx), [&](unsigned Idx) { return VectorizableTree[Idx]->isSame(TE.Scalars); })) @@ -10980,8 +10986,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, // If the selects are the only uses of the compares, they will be // dead and we can adjust the cost by removing their cost. if (VI && SelectOnly) { - assert((!Ty->isVectorTy() || SLPReVec) && - "Expected only for scalar type."); + assert(!Ty->isVectorTy() && "Expected only for scalar type."); auto *CI = cast(VI->getOperand(0)); IntrinsicCost -= TTI->getCmpSelInstrCost( CI->getOpcode(), Ty, Builder.getInt1Ty(), CI->getPredicate(), @@ -18510,25 +18515,30 @@ bool SLPVectorizerPass::vectorizeStores( } // Try to vectorize the first found set to avoid duplicate analysis. TryToVectorize(Set.second); + unsigned ItIdx = It->first; + int ItDist = It->second; StoreIndexToDistSet PrevSet; - PrevSet.swap(Set.second); + copy_if(Set.second, std::inserter(PrevSet, PrevSet.end()), + [&](const std::pair &Pair) { + return Pair.first > ItIdx; + }); + Set.second.clear(); Set.first = Idx; Set.second.emplace(Idx, 0); // Insert stores that followed previous match to try to vectorize them // with this store. - unsigned StartIdx = It->first + 1; + unsigned StartIdx = ItIdx + 1; SmallBitVector UsedStores(Idx - StartIdx); // Distances to previously found dup store (or this store, since they // store to the same addresses). SmallVector Dists(Idx - StartIdx, 0); for (const std::pair &Pair : reverse(PrevSet)) { // Do not try to vectorize sequences, we already tried. - if (Pair.first <= It->first || - VectorizedStores.contains(Stores[Pair.first])) + if (VectorizedStores.contains(Stores[Pair.first])) break; unsigned BI = Pair.first - StartIdx; UsedStores.set(BI); - Dists[BI] = Pair.second - It->second; + Dists[BI] = Pair.second - ItDist; } for (unsigned I = StartIdx; I < Idx; ++I) { unsigned BI = I - StartIdx; diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp index 0d928af1902073..1dbdd80117563c 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp @@ -7,10 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Type.h" #include "llvm/SandboxIR/Instruction.h" #include "llvm/SandboxIR/Utils.h" diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll index c57a35aa1880db..bdba8c57dc745d 100644 --- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll @@ -17,9 +17,8 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] -; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3 -; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2 -; SI-NEXT: v_lshrrev_b32_e32 v2, v3, v2 +; SI-NEXT: v_and_b32_e32 v3, 31, v3 +; SI-NEXT: v_bfe_u32 v2, v2, 0, v3 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -38,9 +37,8 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4 -; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3 -; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3 +; VI-NEXT: v_and_b32_e32 v2, 31, v4 +; VI-NEXT: v_bfe_u32 v2, v3, 0, v2 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -49,7 +47,8 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x %src = load volatile i32, ptr addrspace(1) %in0.gep %width = load volatile i32, ptr addrspace(1) %in0.gep - %sub = sub i32 32, %width + %width5 = and i32 %width, 31 + %sub = sub i32 32, %width5 %shl = shl i32 %src, %sub %bfe = lshr i32 %shl, %sub store i32 %bfe, ptr addrspace(1) %out.gep @@ -72,6 +71,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] ; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: v_and_b32_e32 v3, 31, v3 ; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3 ; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2 ; SI-NEXT: v_lshrrev_b32_e32 v3, v3, v2 @@ -95,7 +95,8 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4 +; VI-NEXT: v_and_b32_e32 v2, 31, v4 +; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v2 ; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3 ; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 @@ -108,7 +109,8 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x %src = load volatile i32, ptr addrspace(1) %in0.gep %width = load volatile i32, ptr addrspace(1) %in0.gep - %sub = sub i32 32, %width + %width5 = and i32 %width, 31 + %sub = sub i32 32, %width5 %shl = shl i32 %src, %sub %bfe = lshr i32 %shl, %sub store i32 %bfe, ptr addrspace(1) %out.gep @@ -219,9 +221,8 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] -; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3 -; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2 -; SI-NEXT: v_ashrrev_i32_e32 v2, v3, v2 +; SI-NEXT: v_and_b32_e32 v3, 31, v3 +; SI-NEXT: v_bfe_i32 v2, v2, 0, v3 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -240,9 +241,8 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4 -; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3 -; VI-NEXT: v_ashrrev_i32_e32 v2, v2, v3 +; VI-NEXT: v_and_b32_e32 v2, 31, v4 +; VI-NEXT: v_bfe_i32 v2, v3, 0, v2 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -251,7 +251,8 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x %src = load volatile i32, ptr addrspace(1) %in0.gep %width = load volatile i32, ptr addrspace(1) %in0.gep - %sub = sub i32 32, %width + %width5 = and i32 %width, 31 + %sub = sub i32 32, %width5 %shl = shl i32 %src, %sub %bfe = ashr i32 %shl, %sub store i32 %bfe, ptr addrspace(1) %out.gep diff --git a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll index 3d9616f02d52d1..3de8db2c6a448e 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll @@ -150,22 +150,14 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; ---------------------------------------------------------------------------- ; define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { -; SI-LABEL: bzhi32_d0: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 -; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 -; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: bzhi32_d0: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 -; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 -; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] - %numhighbits = sub i32 32, %numlowbits +; GCN-LABEL: bzhi32_d0: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v1, 31, v1 +; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %numlow5bits = and i32 %numlowbits, 31 + %numhighbits = sub i32 32, %numlow5bits %highbitscleared = shl i32 %val, %numhighbits %masked = lshr i32 %highbitscleared, %numhighbits ret i32 %masked diff --git a/llvm/test/CodeGen/DirectX/updateCounter.ll b/llvm/test/CodeGen/DirectX/updateCounter.ll new file mode 100644 index 00000000000000..68ea1e9eac9d50 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/updateCounter.ll @@ -0,0 +1,41 @@ +; RUN: opt -S -dxil-op-lower %s | FileCheck %s + + +target triple = "dxil-pc-shadermodel6.6-compute" + + ; CHECK-LABEL: define void @update_counter_decrement_vector() { +define void @update_counter_decrement_vector() { + ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, + %buffer = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]] + ; CHECK-NEXT: call void @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1) + call void @llvm.dx.updateCounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 -1) + ret void +} + + ; CHECK-LABEL: define void @update_counter_increment_vector() { +define void @update_counter_increment_vector() { + ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, + %buffer = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]] + ; CHECK-NEXT: call void @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 1) + call void @llvm.dx.updateCounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 1) + ret void +} + +; CHECK-LABEL: define void @update_counter_decrement_scalar() { +define void @update_counter_decrement_scalar() { + ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, + %buffer = call target("dx.RawBuffer", i8, 0, 0) + @llvm.dx.handle.fromBinding.tdx.RawBuffer_i8_0_0t( + i32 1, i32 8, i32 1, i32 0, i1 false) + ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]] + ; CHECK-NEXT: call void @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1) + call void @llvm.dx.updateCounter(target("dx.RawBuffer", i8, 0, 0) %buffer, i8 -1) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll index 785cc2aafde11b..0e5cbe63004b62 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll @@ -98,11 +98,17 @@ define double @fcvt_d_wu(i32 %a) nounwind { } define double @fcvt_d_wu_load(ptr %p) nounwind { -; CHECKIFD-LABEL: fcvt_d_wu_load: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: lw a0, 0(a0) -; CHECKIFD-NEXT: fcvt.d.wu fa0, a0 -; CHECKIFD-NEXT: ret +; RV32IFD-LABEL: fcvt_d_wu_load: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: lw a0, 0(a0) +; RV32IFD-NEXT: fcvt.d.wu fa0, a0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_load: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lwu a0, 0(a0) +; RV64IFD-NEXT: fcvt.d.wu fa0, a0 +; RV64IFD-NEXT: ret %a = load i32, ptr %p %1 = uitofp i32 %a to double ret double %1 @@ -294,7 +300,9 @@ define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, ptr %1) nounwind { ; RV64IFD-LABEL: fcvt_d_wu_demanded_bits: ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: addiw a0, a0, 1 -; RV64IFD-NEXT: fcvt.d.wu fa5, a0 +; RV64IFD-NEXT: slli a2, a0, 32 +; RV64IFD-NEXT: srli a2, a2, 32 +; RV64IFD-NEXT: fcvt.d.wu fa5, a2 ; RV64IFD-NEXT: fsd fa5, 0(a1) ; RV64IFD-NEXT: ret %3 = add i32 %0, 1 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll index d6a36c5a702ac8..c5a36d063c0ad6 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll @@ -101,11 +101,17 @@ define float @fcvt_s_wu(i32 %a) nounwind { } define float @fcvt_s_wu_load(ptr %p) nounwind { -; CHECKIF-LABEL: fcvt_s_wu_load: -; CHECKIF: # %bb.0: -; CHECKIF-NEXT: lw a0, 0(a0) -; CHECKIF-NEXT: fcvt.s.wu fa0, a0 -; CHECKIF-NEXT: ret +; RV32IF-LABEL: fcvt_s_wu_load: +; RV32IF: # %bb.0: +; RV32IF-NEXT: lw a0, 0(a0) +; RV32IF-NEXT: fcvt.s.wu fa0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_load: +; RV64IF: # %bb.0: +; RV64IF-NEXT: lwu a0, 0(a0) +; RV64IF-NEXT: fcvt.s.wu fa0, a0 +; RV64IF-NEXT: ret %a = load i32, ptr %p %1 = uitofp i32 %a to float ret float %1 @@ -266,7 +272,9 @@ define signext i32 @fcvt_s_wu_demanded_bits(i32 signext %0, ptr %1) nounwind { ; RV64IF-LABEL: fcvt_s_wu_demanded_bits: ; RV64IF: # %bb.0: ; RV64IF-NEXT: addiw a0, a0, 1 -; RV64IF-NEXT: fcvt.s.wu fa5, a0 +; RV64IF-NEXT: slli a2, a0, 32 +; RV64IF-NEXT: srli a2, a2, 32 +; RV64IF-NEXT: fcvt.s.wu fa5, a2 ; RV64IF-NEXT: fsw fa5, 0(a1) ; RV64IF-NEXT: ret %3 = add i32 %0, 1 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctlz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctlz-rv64.mir index 8c75bdd38d732a..f6e04a9999dbf8 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctlz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctlz-rv64.mir @@ -15,10 +15,8 @@ body: | ; RV64I-NEXT: $x10 = COPY [[CLZW]] ; RV64I-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0 - %2:gprb(s32) = G_CTLZ %1 - %3:gprb(s64) = G_ANYEXT %2 - $x10 = COPY %3(s64) + %1:gprb(s64) = G_CLZW %0 + $x10 = COPY %1(s64) PseudoRET implicit $x10 ... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctpop-rv64.mir index 7d584a8589b901..f91f029209220f 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctpop-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ctpop-rv64.mir @@ -3,25 +3,6 @@ # RUN: -simplify-mir -verify-machineinstrs %s -o - \ # RUN: | FileCheck -check-prefix=RV64I %s ---- -name: ctpop_s32 -legalized: true -regBankSelected: true -body: | - bb.0.entry: - ; RV64I-LABEL: name: ctpop_s32 - ; RV64I: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; RV64I-NEXT: [[CPOPW:%[0-9]+]]:gpr = CPOPW [[COPY]] - ; RV64I-NEXT: $x10 = COPY [[CPOPW]] - ; RV64I-NEXT: PseudoRET implicit $x10 - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0 - %2:gprb(s32) = G_CTPOP %1 - %3:gprb(s64) = G_ANYEXT %2 - $x10 = COPY %3(s64) - PseudoRET implicit $x10 - -... --- name: ctpop_s64 legalized: true diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/cttz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/cttz-rv64.mir index b56d45f0993ada..17fb381da6cdbb 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/cttz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/cttz-rv64.mir @@ -15,10 +15,8 @@ body: | ; RV64I-NEXT: $x10 = COPY [[CTZW]] ; RV64I-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0 - %2:gprb(s32) = G_CTTZ %1 - %3:gprb(s64) = G_ANYEXT %2 - $x10 = COPY %3(s64) + %1:gprb(s64) = G_CTZW %0 + $x10 = COPY %1(s64) PseudoRET implicit $x10 ... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-f16-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-f16-rv64.mir index 1afb1d9be6a099..b813a79c339ec5 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-f16-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-f16-rv64.mir @@ -2,52 +2,6 @@ # RUN: llc -mtriple=riscv64 -mattr=+zfh -run-pass=instruction-select \ # RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s ---- -name: sitofp_s64_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_H_W:%[0-9]+]]:fpr16 = nofpexcept FCVT_H_W [[COPY]], 7 - ; CHECK-NEXT: $f10_h = COPY [[FCVT_H_W]] - ; CHECK-NEXT: PseudoRET implicit $f10_h - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s16) = G_SITOFP %1(s32) - $f10_h = COPY %2(s16) - PseudoRET implicit $f10_h - -... ---- -name: uitofp_s64_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_H_WU:%[0-9]+]]:fpr16 = nofpexcept FCVT_H_WU [[COPY]], 7 - ; CHECK-NEXT: $f10_h = COPY [[FCVT_H_WU]] - ; CHECK-NEXT: PseudoRET implicit $f10_h - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s16) = G_UITOFP %1(s32) - $f10_h = COPY %2(s16) - PseudoRET implicit $f10_h - -... --- name: sitofp_s64_s64 legalized: true diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-rv64.mir index 31175d7af93f98..f99a15a850517f 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/itofp-rv64.mir @@ -2,52 +2,6 @@ # RUN: llc -mtriple=riscv64 -mattr=+d -run-pass=instruction-select \ # RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s ---- -name: sitofp_s32_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s32_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_S_W:%[0-9]+]]:fpr32 = nofpexcept FCVT_S_W [[COPY]], 7 - ; CHECK-NEXT: $f10_f = COPY [[FCVT_S_W]] - ; CHECK-NEXT: PseudoRET implicit $f10_f - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s32) = G_SITOFP %1(s32) - $f10_f = COPY %2(s32) - PseudoRET implicit $f10_f - -... ---- -name: uitofp_s32_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s32_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_S_WU:%[0-9]+]]:fpr32 = nofpexcept FCVT_S_WU [[COPY]], 7 - ; CHECK-NEXT: $f10_f = COPY [[FCVT_S_WU]] - ; CHECK-NEXT: PseudoRET implicit $f10_f - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s32) = G_UITOFP %1(s32) - $f10_f = COPY %2(s32) - PseudoRET implicit $f10_f - -... --- name: sitofp_s32_s64 legalized: true @@ -91,52 +45,6 @@ body: | $f10_f = COPY %1(s32) PseudoRET implicit $f10_f -... ---- -name: sitofp_s64_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_D_W:%[0-9]+]]:fpr64 = nofpexcept FCVT_D_W [[COPY]], 0 - ; CHECK-NEXT: $f10_d = COPY [[FCVT_D_W]] - ; CHECK-NEXT: PseudoRET implicit $f10_d - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s64) = G_SITOFP %1(s32) - $f10_d = COPY %2(s64) - PseudoRET implicit $f10_d - -... ---- -name: uitofp_s64_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[FCVT_D_WU:%[0-9]+]]:fpr64 = nofpexcept FCVT_D_WU [[COPY]], 0 - ; CHECK-NEXT: $f10_d = COPY [[FCVT_D_WU]] - ; CHECK-NEXT: PseudoRET implicit $f10_d - %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:fprb(s64) = G_UITOFP %1(s32) - $f10_d = COPY %2(s64) - PseudoRET implicit $f10_d - ... --- name: sitofp_s64_s64 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir index 50b96e0ee972e6..edf7ef2203cbff 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir @@ -22,12 +22,9 @@ body: | ; CHECK-NEXT: $x10 = COPY [[ROLW]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:gprb(s64) = COPY $x11 - %6:gprb(s32) = G_TRUNC %2(s64) - %4:gprb(s32) = G_ROTL %1, %6(s32) - %5:gprb(s64) = G_ANYEXT %4(s32) - $x10 = COPY %5(s64) + %1:gprb(s64) = COPY $x11 + %2:gprb(s64) = G_ROLW %0, %1(s64) + $x10 = COPY %2(s64) PseudoRET implicit $x10 ... @@ -72,12 +69,9 @@ body: | ; CHECK-NEXT: $x10 = COPY [[RORW]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:gprb(s64) = COPY $x11 - %6:gprb(s32) = G_TRUNC %2(s64) - %4:gprb(s32) = G_ROTR %1, %6(s32) - %5:gprb(s64) = G_ANYEXT %4(s32) - $x10 = COPY %5(s64) + %1:gprb(s64) = COPY $x11 + %2:gprb(s64) = G_RORW %0, %1(s64) + $x10 = COPY %2(s64) PseudoRET implicit $x10 ... @@ -121,11 +115,9 @@ body: | ; CHECK-NEXT: $x10 = COPY [[RORIW]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:gprb(s32) = G_CONSTANT i32 15 - %3:gprb(s32) = G_ROTL %1, %2(s32) - %4:gprb(s64) = G_ANYEXT %3(s32) - $x10 = COPY %4(s64) + %1:gprb(s64) = G_CONSTANT i64 15 + %2:gprb(s64) = G_ROLW %0, %1(s64) + $x10 = COPY %2(s64) PseudoRET implicit $x10 ... @@ -169,11 +161,9 @@ body: | ; CHECK-NEXT: $x10 = COPY [[RORIW]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 - %1:gprb(s32) = G_TRUNC %0(s64) - %2:gprb(s32) = G_CONSTANT i32 15 - %3:gprb(s32) = G_ROTR %1, %2(s32) - %4:gprb(s64) = G_ANYEXT %3(s32) - $x10 = COPY %4(s64) + %1:gprb(s64) = G_CONSTANT i64 15 + %2:gprb(s64) = G_RORW %0, %1(s64) + $x10 = COPY %2(s64) PseudoRET implicit $x10 ... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir index bc6aafb1e3b2cc..f4ea4f5eb43aa3 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir @@ -57,12 +57,12 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[AND]] + ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[CLZW]](s64) ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) @@ -133,12 +133,12 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[AND]] + ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[CLZW]](s64) ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) @@ -204,10 +204,8 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[TRUNC]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[CTLZ]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[COPY]] + ; RV64ZBB-NEXT: $x10 = COPY [[CLZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %1(s64) @@ -333,12 +331,12 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[AND]] + ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[CLZW]](s64) ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) @@ -409,12 +407,12 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[AND]] + ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[CLZW]](s64) ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) @@ -480,10 +478,8 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[TRUNC]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[CTLZ]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[COPY]] + ; RV64ZBB-NEXT: $x10 = COPY [[CLZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %1(s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir index ec885c170b5b60..48595dc9809c74 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir @@ -46,13 +46,11 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64) + ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64) + ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s8) = G_TRUNC %1(s64) @@ -106,13 +104,11 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64) + ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64) + ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s16) = G_TRUNC %1(s64) @@ -161,10 +157,11 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[TRUNC]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[CTPOP]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64) + ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64) + ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %1(s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir index f8285d609875ba..c3b6d357d241d7 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir @@ -52,10 +52,9 @@ body: | ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; RV64ZBB-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[ANYEXT]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s8) = G_TRUNC %1(s64) @@ -115,10 +114,9 @@ body: | ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; RV64ZBB-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[ANYEXT]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s16) = G_TRUNC %1(s64) @@ -171,10 +169,8 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[TRUNC]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[CTTZ]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[COPY]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %1(s64) @@ -282,10 +278,9 @@ body: | ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; RV64ZBB-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[ANYEXT]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s8) = G_TRUNC %1(s64) @@ -345,10 +340,9 @@ body: | ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; RV64ZBB-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[C]] - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[ANYEXT]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s16) = G_TRUNC %1(s64) @@ -401,10 +395,8 @@ body: | ; RV64ZBB: liveins: $x10 ; RV64ZBB-NEXT: {{ $}} ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64ZBB-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[TRUNC]](s32) - ; RV64ZBB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[CTTZ]](s32) - ; RV64ZBB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB-NEXT: [[CTZW:%[0-9]+]]:_(s64) = G_CTZW [[COPY]] + ; RV64ZBB-NEXT: $x10 = COPY [[CTZW]](s64) ; RV64ZBB-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %1(s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-f16-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-f16-rv64.mir index 52c69d1acbffc7..6a70a331a02c8c 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-f16-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-f16-rv64.mir @@ -13,11 +13,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_h = COPY [[SITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -38,10 +37,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_h = COPY [[UITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -62,11 +60,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_h = COPY [[SITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -87,10 +84,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_h = COPY [[UITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -111,11 +107,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_h = COPY [[SITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -136,10 +131,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_h = COPY [[UITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -160,8 +154,8 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[SEXT_INREG]](s64) ; CHECK-NEXT: $f10_h = COPY [[SITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 @@ -182,8 +176,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_h = COPY [[UITOFP]](s16) ; CHECK-NEXT: PseudoRET implicit $f10_h %1:_(s64) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-rv64.mir index bc09a44dee2e09..2d6ee6250cf328 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-itofp-rv64.mir @@ -13,11 +13,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_f = COPY [[SITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -38,10 +37,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_f = COPY [[UITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -62,11 +60,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_f = COPY [[SITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -87,10 +84,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_f = COPY [[UITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -111,11 +107,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_f = COPY [[SITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -136,10 +131,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_f = COPY [[UITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -160,8 +154,8 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[SEXT_INREG]](s64) ; CHECK-NEXT: $f10_f = COPY [[SITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -182,8 +176,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_f = COPY [[UITOFP]](s32) ; CHECK-NEXT: PseudoRET implicit $f10_f %1:_(s64) = COPY $x10 @@ -244,11 +239,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_d = COPY [[SITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -269,10 +263,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_d = COPY [[UITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -293,11 +286,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_d = COPY [[SITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -318,10 +310,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_d = COPY [[UITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -342,11 +333,10 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s64) ; CHECK-NEXT: $f10_d = COPY [[SITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -367,10 +357,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_d = COPY [[UITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -391,8 +380,8 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[SEXT_INREG]](s64) ; CHECK-NEXT: $f10_d = COPY [[SITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 @@ -413,8 +402,9 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[TRUNC]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s64) ; CHECK-NEXT: $f10_d = COPY [[UITOFP]](s64) ; CHECK-NEXT: PseudoRET implicit $f10_d %1:_(s64) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-rotate-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-rotate-rv64.mir index 2334fe1015e2f6..a0d23d891b14a4 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-rotate-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-rotate-rv64.mir @@ -109,12 +109,9 @@ body: | ; RV64ZBB_OR_RV64ZBKB: liveins: $x10, $x11 ; RV64ZBB_OR_RV64ZBKB-NEXT: {{ $}} ; RV64ZBB_OR_RV64ZBKB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB_OR_RV64ZBKB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[ROTL:%[0-9]+]]:_(s32) = G_ROTL [[TRUNC]], [[TRUNC1]](s32) - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ROTL]](s32) - ; RV64ZBB_OR_RV64ZBKB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB_OR_RV64ZBKB-NEXT: [[ROLW:%[0-9]+]]:_(s64) = G_ROLW [[COPY]], [[COPY1]] + ; RV64ZBB_OR_RV64ZBKB-NEXT: $x10 = COPY [[ROLW]](s64) ; RV64ZBB_OR_RV64ZBKB-NEXT: PseudoRET implicit $x10 %2:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %2(s64) @@ -268,12 +265,9 @@ body: | ; RV64ZBB_OR_RV64ZBKB: liveins: $x10, $x11 ; RV64ZBB_OR_RV64ZBKB-NEXT: {{ $}} ; RV64ZBB_OR_RV64ZBKB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64ZBB_OR_RV64ZBKB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[TRUNC]], [[TRUNC1]](s32) - ; RV64ZBB_OR_RV64ZBKB-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ROTR]](s32) - ; RV64ZBB_OR_RV64ZBKB-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64ZBB_OR_RV64ZBKB-NEXT: [[RORW:%[0-9]+]]:_(s64) = G_RORW [[COPY]], [[COPY1]] + ; RV64ZBB_OR_RV64ZBKB-NEXT: $x10 = COPY [[RORW]](s64) ; RV64ZBB_OR_RV64ZBKB-NEXT: PseudoRET implicit $x10 %2:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %2(s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-f16-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-f16-rv64.mir index 10da1f8fc9fb30..5b48a7e947ae67 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-f16-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-f16-rv64.mir @@ -3,52 +3,6 @@ # RUN: -simplify-mir -verify-machineinstrs %s \ # RUN: -o - | FileCheck %s ---- -name: sitofp_s16_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s16_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fprb(s16) = G_SITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_h = COPY [[SITOFP]](s16) - ; CHECK-NEXT: PseudoRET implicit $f10_h - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s16) = G_SITOFP %1(s32) - $f10_h = COPY %2(s16) - PseudoRET implicit $f10_h - -... ---- -name: uitofp_s16_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s16_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:fprb(s16) = G_UITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_h = COPY [[UITOFP]](s16) - ; CHECK-NEXT: PseudoRET implicit $f10_h - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s16) = G_UITOFP %1(s32) - $f10_h = COPY %2(s16) - PseudoRET implicit $f10_h - -... --- name: sitofp_s16_s64 legalized: true diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-rv64.mir index e0f039d5983ee8..6cb38cf38a6a56 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/itofp-rv64.mir @@ -3,52 +3,6 @@ # RUN: -simplify-mir -verify-machineinstrs %s \ # RUN: -o - | FileCheck %s ---- -name: sitofp_s32_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s32_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fprb(s32) = G_SITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_f = COPY [[SITOFP]](s32) - ; CHECK-NEXT: PseudoRET implicit $f10_f - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s32) = G_SITOFP %1(s32) - $f10_f = COPY %2(s32) - PseudoRET implicit $f10_f - -... ---- -name: uitofp_s32_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s32_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:fprb(s32) = G_UITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_f = COPY [[UITOFP]](s32) - ; CHECK-NEXT: PseudoRET implicit $f10_f - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s32) = G_UITOFP %1(s32) - $f10_f = COPY %2(s32) - PseudoRET implicit $f10_f - -... --- name: sitofp_s32_s64 legalized: true @@ -90,52 +44,6 @@ body: | $f10_f = COPY %1(s32) PseudoRET implicit $f10_f -... ---- -name: sitofp_s64_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: sitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fprb(s64) = G_SITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_d = COPY [[SITOFP]](s64) - ; CHECK-NEXT: PseudoRET implicit $f10_d - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s64) = G_SITOFP %1(s32) - $f10_d = COPY %2(s64) - PseudoRET implicit $f10_d - -... ---- -name: uitofp_s64_s32 -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $x10 - - ; CHECK-LABEL: name: uitofp_s64_s32 - ; CHECK: liveins: $x10 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(s64) = COPY $x10 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gprb(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:fprb(s64) = G_UITOFP [[TRUNC]](s32) - ; CHECK-NEXT: $f10_d = COPY [[UITOFP]](s64) - ; CHECK-NEXT: PseudoRET implicit $f10_d - %0:_(s64) = COPY $x10 - %1:_(s32) = G_TRUNC %0(s64) - %2:_(s64) = G_UITOFP %1(s32) - $f10_d = COPY %2(s64) - PseudoRET implicit $f10_d - ... --- name: sitofp_s64_s64 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll index 3d78d15057ba41..d9b7f16131c352 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll @@ -166,7 +166,9 @@ define signext i32 @rol_i32_neg_constant_rhs(i32 signext %a) nounwind { ; ; RV64ZBB-ZBKB-LABEL: rol_i32_neg_constant_rhs: ; RV64ZBB-ZBKB: # %bb.0: -; RV64ZBB-ZBKB-NEXT: li a1, -2 +; RV64ZBB-ZBKB-NEXT: li a1, 1 +; RV64ZBB-ZBKB-NEXT: slli a1, a1, 32 +; RV64ZBB-ZBKB-NEXT: addi a1, a1, -2 ; RV64ZBB-ZBKB-NEXT: rolw a0, a1, a0 ; RV64ZBB-ZBKB-NEXT: ret %1 = tail call i32 @llvm.fshl.i32(i32 -2, i32 -2, i32 %a) @@ -250,7 +252,9 @@ define signext i32 @ror_i32_neg_constant_rhs(i32 signext %a) nounwind { ; ; RV64ZBB-ZBKB-LABEL: ror_i32_neg_constant_rhs: ; RV64ZBB-ZBKB: # %bb.0: -; RV64ZBB-ZBKB-NEXT: li a1, -2 +; RV64ZBB-ZBKB-NEXT: li a1, 1 +; RV64ZBB-ZBKB-NEXT: slli a1, a1, 32 +; RV64ZBB-ZBKB-NEXT: addi a1, a1, -2 ; RV64ZBB-ZBKB-NEXT: rorw a0, a1, a0 ; RV64ZBB-ZBKB-NEXT: ret %1 = tail call i32 @llvm.fshr.i32(i32 -2, i32 -2, i32 %a) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll index 61c96b325f17fd..835b4e32ae3206 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll @@ -752,8 +752,6 @@ define i1 @ctpop_i32_ult_two(i32 signext %a) nounwind { ; RV64ZBB-LABEL: ctpop_i32_ult_two: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: cpopw a0, a0 -; RV64ZBB-NEXT: slli a0, a0, 32 -; RV64ZBB-NEXT: srli a0, a0, 32 ; RV64ZBB-NEXT: sltiu a0, a0, 2 ; RV64ZBB-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) @@ -793,7 +791,7 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind { ; ; RV64ZBB-LABEL: ctpop_i32_load: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: lw a0, 0(a0) +; RV64ZBB-NEXT: lwu a0, 0(a0) ; RV64ZBB-NEXT: cpopw a0, a0 ; RV64ZBB-NEXT: ret %a = load i32, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index 76720c5641563c..060a5c4224fe15 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -11,29 +11,28 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: vlm.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v8, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v12, v9, 1, v0 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vadd.vv v11, v9, v9 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vrgather.vv v9, v10, v11 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: li a0, -256 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vadd.vi v12, v11, -16 -; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t -; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vadd.vi v12, v11, 1 -; CHECK-NEXT: vrgather.vv v13, v10, v12 -; CHECK-NEXT: vadd.vi v10, v11, -15 -; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmsne.vi v8, v13, 0 +; CHECK-NEXT: vadd.vi v9, v11, -16 +; CHECK-NEXT: vrgather.vv v8, v10, v9, v0.t +; CHECK-NEXT: vmsne.vi v9, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v12, 8 +; CHECK-NEXT: vadd.vi v11, v11, -15 +; CHECK-NEXT: vrgather.vv v8, v10, v11, v0.t +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: ret %vec = load <32 x i1>, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index dcd35b4558e5ea..c9e6a8730eec7e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -99,45 +99,39 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, 1 -; RV32-NEXT: vmv.v.i v14, 6 +; RV32-NEXT: vmv.v.i v13, 6 ; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v14, v12, 1 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vadd.vi v15, v12, 1 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vrgatherei16.vv v12, v8, v15 +; RV32-NEXT: vslideup.vi v13, v12, 1 +; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmv4r.v v20, v8 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vmv2r.v v22, v14 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.i v0, 10 -; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 8 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vrgatherei16.vv v12, v8, v14, v0.t -; RV32-NEXT: vmv1r.v v8, v12 +; RV32-NEXT: vnsrl.wx v8, v20, a0 +; RV32-NEXT: vrgatherei16.vv v8, v16, v13, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: v4i32_v16i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vadd.vv v12, v12, v12 -; RV64-NEXT: vadd.vi v14, v12, 1 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vrgatherei16.vv v12, v8, v14 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.v.i v0, 10 ; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 8 +; RV64-NEXT: vslidedown.vi v16, v8, 8 +; RV64-NEXT: vmv4r.v v20, v8 +; RV64-NEXT: li a0, 32 +; RV64-NEXT: vmv2r.v v22, v12 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vnsrl.wx v8, v20, a0 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v0, 10 ; RV64-NEXT: li a0, 3 ; RV64-NEXT: slli a0, a0, 33 ; RV64-NEXT: addi a0, a0, 1 ; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: vrgatherei16.vv v12, v8, v10, v0.t -; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: vrgatherei16.vv v8, v16, v10, v0.t ; RV64-NEXT: ret %2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> ret <4 x i32> %2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll new file mode 100644 index 00000000000000..a8f75f8d1c24d1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll @@ -0,0 +1,308 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -mattr=+v \ +; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32V +; RUN: llc < %s -mtriple=riscv64 -mattr=+v \ +; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64V + +define void @deinterleave3_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave3_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: vmul.vx v9, v9, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vadd.vi v9, v9, -8 +; CHECK-NEXT: li a0, 56 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave3_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave3_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 1 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: vmadd.vx v10, a0, v9 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: li a0, 24 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave4_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave4_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vwaddu.vv v10, v8, v9 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vwmaccu.vx v10, a0, v9 +; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vsll.vi v9, v9, 2 +; CHECK-NEXT: vadd.vi v9, v9, -8 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave4_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave4_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -9 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmacc.vx v9, a0, v10 +; CHECK-NEXT: vsll.vi v10, v10, 2 +; CHECK-NEXT: vadd.vi v10, v10, 1 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v11, v8, v9, v0.t +; CHECK-NEXT: vse8.v v11, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave5_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave5_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmul.vx v9, v9, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vadd.vi v9, v9, -8 +; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave5_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave5_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 1 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmadd.vx v10, a0, v9 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v9, v8, 3, v0.t +; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave6_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave6_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: li a0, 6 +; CHECK-NEXT: vmul.vx v9, v9, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 4, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave6_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave6_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 1 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a0, 6 +; CHECK-NEXT: vmadd.vx v10, a0, v9 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v9, v8, 5, v0.t +; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave7_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave7_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: li a0, 7 +; CHECK-NEXT: vmul.vx v9, v9, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 6, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave7_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave7_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -6 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a0, 6 +; CHECK-NEXT: vmadd.vx v10, a0, v9 +; CHECK-NEXT: vmv.v.i v0, 6 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v11, v8, 1 +; CHECK-NEXT: vrgather.vv v11, v9, v10, v0.t +; CHECK-NEXT: vse8.v v11, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave8_0_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave8_0_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @deinterleave8_8_i8(ptr %in, ptr %out) { +; CHECK-LABEL: deinterleave8_8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.v.i v0, -3 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t +; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32V: {{.*}} +; RV64V: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index 3af3540e1964b6..15c2c2298c0dd6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -444,10 +444,8 @@ define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <8 x i8>, ptr %in, align 1 @@ -461,10 +459,8 @@ define void @vnsrl_0_i8_single_src2(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <8 x i8>, ptr %in, align 1 @@ -472,3 +468,24 @@ entry: store <8 x i8> %shuffle.i5, ptr %out, align 1 ret void } + +; Can't match the m8 result type as the source would have to be m16 which +; isn't a legal type. +define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_0_i32_single_src_m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vadd.vv v16, v16, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 +; CHECK-NEXT: vse32.v v24, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <64 x i32>, ptr %in, align 4 + %shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> + store <64 x i32> %shuffle.i5, ptr %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index b2973826d65ded..075e463e41a6b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -7,28 +7,28 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; CHECK-LABEL: vector_deinterleave_v16i1_v32i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vv v11, v9, v9 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vrgather.vv v9, v10, v11 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v12, v9, 1, v0 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vv v11, v9, v9 ; CHECK-NEXT: li a0, -256 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vadd.vi v12, v11, -16 -; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t -; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vadd.vi v12, v11, 1 -; CHECK-NEXT: vrgather.vv v13, v10, v12 -; CHECK-NEXT: vadd.vi v10, v11, -15 -; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmsne.vi v8, v13, 0 +; CHECK-NEXT: vadd.vi v9, v11, -16 +; CHECK-NEXT: vrgather.vv v8, v10, v9, v0.t +; CHECK-NEXT: vmsne.vi v9, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v12, 8 +; CHECK-NEXT: vadd.vi v11, v11, -15 +; CHECK-NEXT: vrgather.vv v8, v10, v11, v0.t +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: ret %retval = call {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1> %vec) diff --git a/llvm/test/CodeGen/Xtensa/branch-relaxation.ll b/llvm/test/CodeGen/Xtensa/branch-relaxation.ll new file mode 100644 index 00000000000000..15dbd855f71acb --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/branch-relaxation.ll @@ -0,0 +1,246 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +define i32 @jump(i1 %a) { +; XTENSA-LABEL: jump: +; XTENSA: movi a8, 1 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: beqz a8, .LBB0_2 +; XTENSA-NEXT: # %bb.1: # %iftrue +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: .space 1024 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: movi a2, 1 +; XTENSA-NEXT: ret +; XTENSA-NEXT: .LBB0_2: # %jmp +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: movi a2, 1 +; XTENSA-NEXT: ret + br i1 %a, label %iftrue, label %jmp + +jmp: + call void asm sideeffect "", ""() + br label %tail + +iftrue: + call void asm sideeffect "", ""() + br label %space + +space: + call void asm sideeffect ".space 1024", ""() + br label %tail + +tail: + ret i32 1 +} + +define i32 @jx(i1 %a) { +; XTENSA-LABEL: jx: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: .cfi_def_cfa_offset 16 +; XTENSA-NEXT: movi a8, 1 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: bnez a8, .LBB1_1 +; XTENSA-NEXT: # %bb.4: +; XTENSA-NEXT: l32r a8, .LCPI1_0 +; XTENSA-NEXT: jx a8 +; XTENSA-NEXT: .LBB1_1: # %iftrue +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: .space 1048576 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: j .LBB1_3 +; XTENSA-NEXT: .LBB1_2: # %jmp +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: .LBB1_3: # %tail +; XTENSA-NEXT: movi a2, 1 +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + br i1 %a, label %iftrue, label %jmp + +jmp: + call void asm sideeffect "", ""() + br label %tail + +iftrue: + call void asm sideeffect "", ""() + br label %space + +space: + call void asm sideeffect ".space 1048576", ""() + br label %tail + +tail: + ret i32 1 +} + +define void @relax_spill() { +; XTENSA-LABEL: relax_spill: +; XTENSA: addi a8, a1, -32 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: .cfi_def_cfa_offset 32 +; XTENSA-NEXT: s32i a12, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a13, a1, 12 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a14, a1, 8 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 4 # 4-byte Folded Spill +; XTENSA-NEXT: .cfi_offset a12, -4 +; XTENSA-NEXT: .cfi_offset a13, -8 +; XTENSA-NEXT: .cfi_offset a14, -12 +; XTENSA-NEXT: .cfi_offset a15, -16 +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a2, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a3, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a4, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a5, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a6, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a7, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a8, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a9, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a10, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a11, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a12, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a13, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a14, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a15, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: beq a5, a6, .LBB2_1 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: s32i a12, a1, 0 +; XTENSA-NEXT: l32r a12, .LCPI2_0 +; XTENSA-NEXT: jx a12 +; XTENSA-NEXT: .LBB2_1: # %iftrue +; XTENSA-NEXT: #APP +; XTENSA-NEXT: .space 536870912 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: j .LBB2_2 +; XTENSA-NEXT: .LBB2_4: # %iffalse +; XTENSA-NEXT: l32i a12, a1, 0 +; XTENSA-NEXT: .LBB2_2: # %iffalse +; XTENSA-NEXT: #APP +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a2 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a3 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a4 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a5 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a6 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a7 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a8 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a9 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a10 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a11 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a12 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a13 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a14 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: #APP +; XTENSA-NEXT: # reg use a15 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: l32i a15, a1, 4 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a14, a1, 8 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a13, a1, 12 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a12, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 32 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + call void asm sideeffect "", ""() + %a2 = call i32 asm sideeffect "addi a2, a3, 1", "={a2}"() + %a3 = call i32 asm sideeffect "addi a3, a3, 1", "={a3}"() + %a4 = call i32 asm sideeffect "addi a4, a3, 1", "={a4}"() + %a5 = call i32 asm sideeffect "addi a5, a3, 1", "={a5}"() + %a6 = call i32 asm sideeffect "addi a6, a3, 1", "={a6}"() + %a7 = call i32 asm sideeffect "addi a7, a3, 1", "={a7}"() + %a8 = call i32 asm sideeffect "addi a8, a3, 1", "={a8}"() + %a9 = call i32 asm sideeffect "addi a9, a3, 1", "={a9}"() + %a10 = call i32 asm sideeffect "addi a10, a3, 1", "={a10}"() + %a11 = call i32 asm sideeffect "addi a11, a3, 1", "={a11}"() + %a12 = call i32 asm sideeffect "addi a12, a3, 1", "={a12}"() + %a13 = call i32 asm sideeffect "addi a13, a3, 1", "={a13}"() + %a14 = call i32 asm sideeffect "addi a14, a3, 1", "={a14}"() + %a15 = call i32 asm sideeffect "addi a15, a3, 1", "={a15}"() + + %cmp = icmp eq i32 %a5, %a6 + br i1 %cmp, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 536870912", ""() + br label %iffalse + +iffalse: + call void asm sideeffect "", ""() + call void asm sideeffect "# reg use $0", "{a2}"(i32 %a2) + call void asm sideeffect "# reg use $0", "{a3}"(i32 %a3) + call void asm sideeffect "# reg use $0", "{a4}"(i32 %a4) + call void asm sideeffect "# reg use $0", "{a5}"(i32 %a5) + call void asm sideeffect "# reg use $0", "{a6}"(i32 %a6) + call void asm sideeffect "# reg use $0", "{a7}"(i32 %a7) + call void asm sideeffect "# reg use $0", "{a8}"(i32 %a8) + call void asm sideeffect "# reg use $0", "{a9}"(i32 %a9) + call void asm sideeffect "# reg use $0", "{a10}"(i32 %a10) + call void asm sideeffect "# reg use $0", "{a11}"(i32 %a11) + call void asm sideeffect "# reg use $0", "{a12}"(i32 %a12) + call void asm sideeffect "# reg use $0", "{a13}"(i32 %a13) + call void asm sideeffect "# reg use $0", "{a14}"(i32 %a14) + call void asm sideeffect "# reg use $0", "{a15}"(i32 %a15) + ret void +} diff --git a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll index bad57d58b28a67..60303235386256 100644 --- a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll @@ -8,8 +8,8 @@ declare i32 @llvm.ctpop.i32(i32) define i32 @test_cttz_i32(i32 %a) nounwind { ; XTENSA-LABEL: test_cttz_i32: -; XTENSA: beqz a2, .LBB0_1 -; XTENSA-NEXT: # %bb.2: # %cond.false +; XTENSA: beqz a2, .LBB0_2 +; XTENSA-NEXT: # %bb.1: # %cond.false ; XTENSA-NEXT: movi a8, -1 ; XTENSA-NEXT: xor a8, a2, a8 ; XTENSA-NEXT: addi a9, a2, -1 @@ -33,7 +33,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: extui a2, a8, 24, 8 ; XTENSA-NEXT: ret -; XTENSA-NEXT: .LBB0_1: +; XTENSA-NEXT: .LBB0_2: ; XTENSA-NEXT: movi a2, 32 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) @@ -71,8 +71,8 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { define i32 @test_ctlz_i32(i32 %a) nounwind { ; XTENSA-LABEL: test_ctlz_i32: -; XTENSA: beqz a2, .LBB2_1 -; XTENSA-NEXT: # %bb.2: # %cond.false +; XTENSA: beqz a2, .LBB2_2 +; XTENSA-NEXT: # %bb.1: # %cond.false ; XTENSA-NEXT: srli a8, a2, 1 ; XTENSA-NEXT: or a8, a2, a8 ; XTENSA-NEXT: srli a9, a8, 2 @@ -104,7 +104,7 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: extui a2, a8, 24, 8 ; XTENSA-NEXT: ret -; XTENSA-NEXT: .LBB2_1: +; XTENSA-NEXT: .LBB2_2: ; XTENSA-NEXT: movi a2, 32 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false) diff --git a/llvm/test/MC/Disassembler/AMDGPU/vinterp-fake16.txt b/llvm/test/MC/Disassembler/AMDGPU/vinterp.txt similarity index 60% rename from llvm/test/MC/Disassembler/AMDGPU/vinterp-fake16.txt rename to llvm/test/MC/Disassembler/AMDGPU/vinterp.txt index 239f1d8b3058da..0e19f39764e7f8 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/vinterp-fake16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/vinterp.txt @@ -1,252 +1,255 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefix=CHECK %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefix=CHECK %s +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK %s -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0 # Check that unused bits in the encoding are ignored. -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x80,0xcd,0x01,0x05,0x0e,0x1c +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f32 v1, v10, v20, v30 wait_exp:0{{$}} 0x01,0x00,0x00,0xcd,0x0a,0x29,0x7a,0x04 +# CHECK: v_interp_p10_f32 v1, v10, v20, v30 wait_exp:0 -# CHECK: v_interp_p10_f32 v2, v11, v21, v31 wait_exp:0{{$}} 0x02,0x00,0x00,0xcd,0x0b,0x2b,0x7e,0x04 +# CHECK: v_interp_p10_f32 v2, v11, v21, v31 wait_exp:0 -# CHECK: v_interp_p10_f32 v3, v12, v22, v32 wait_exp:0{{$}} 0x03,0x00,0x00,0xcd,0x0c,0x2d,0x82,0x04 +# CHECK: v_interp_p10_f32 v3, v12, v22, v32 wait_exp:0 -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x00,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p10_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p10_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p10_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p10_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x00,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x00,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:7{{$}} 0x00,0x87,0x00,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:7 -# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f32 v1, v10, v20, v30 wait_exp:0{{$}} 0x01,0x00,0x01,0xcd,0x0a,0x29,0x7a,0x04 +# CHECK: v_interp_p2_f32 v1, v10, v20, v30 wait_exp:0 -# CHECK: v_interp_p2_f32 v2, v11, v21, v31 wait_exp:0{{$}} 0x02,0x00,0x01,0xcd,0x0b,0x2b,0x7e,0x04 +# CHECK: v_interp_p2_f32 v2, v11, v21, v31 wait_exp:0 -# CHECK: v_interp_p2_f32 v3, v12, v22, v32 wait_exp:0{{$}} 0x03,0x00,0x01,0xcd,0x0c,0x2d,0x82,0x04 +# CHECK: v_interp_p2_f32 v3, v12, v22, v32 wait_exp:0 -# CHECK: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x01,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p2_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p2_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p2_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p2_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x01,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x01,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:7{{$}} 0x00,0x87,0x01,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:7 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p10_f16_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p10_f16_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0{{$}} 0x00,0x08,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0{{$}} 0x00,0x10,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0{{$}} 0x00,0x20,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0{{$}} 0x00,0x40,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0{{$}} 0x00,0x78,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0x4d,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x02,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p10_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x02,0xcd,0x01,0x05,0x0e,0xe4 +# CHECK: v_interp_p10_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p2_f16_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p2_f16_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0{{$}} 0x00,0x08,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0{{$}} 0x00,0x10,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0{{$}} 0x00,0x20,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0{{$}} 0x00,0x40,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0{{$}} 0x00,0x78,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0x4d,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0xe4 +# CHECK: v_interp_p2_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p10_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0{{$}} 0x00,0x08,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0{{$}} 0x00,0x10,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0{{$}} 0x00,0x20,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0{{$}} 0x00,0x40,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0{{$}} 0x00,0x78,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0x4d,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x04,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p10_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x04,0xcd,0x01,0x05,0x0e,0xe4 +# CHECK: v_interp_p10_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0{{$}} 0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x24 +# CHECK: v_interp_p2_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0{{$}} 0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x44 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0{{$}} 0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x84 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp wait_exp:0{{$}} 0x00,0x80,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} 0x00,0x01,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} 0x00,0x07,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0{{$}} 0x00,0x08,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0{{$}} 0x00,0x10,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0{{$}} 0x00,0x20,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0{{$}} 0x00,0x40,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0{{$}} 0x00,0x78,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] wait_exp:0 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0x4d,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0x04 +# CHECK: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 -# CHECK: v_interp_p2_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} 0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0xe4 +# CHECK: v_interp_p2_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/repeated-address-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/repeated-address-store.ll new file mode 100644 index 00000000000000..48928d2dfd4738 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/repeated-address-store.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux -mattr=+v < %s | FileCheck %s + +define void @test(ptr %dest) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[DEST:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[INC3:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 3 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[DEST]], align 4 +; CHECK-NEXT: store i32 2, ptr [[DEST]], align 2 +; CHECK-NEXT: store i32 1, ptr [[INC3]], align 2 +; CHECK-NEXT: ret void +; +entry: + %inc3 = getelementptr inbounds i32, ptr %dest, i64 3 + store i32 1, ptr %inc3, align 2 + + store i32 1, ptr %dest, align 4 + %inc1 = getelementptr inbounds i32, ptr %dest, i64 1 + store i32 1, ptr %inc1, align 2 + %inc2 = getelementptr inbounds i32, ptr %dest, i64 2 + store i32 1, ptr %inc2, align 2 + store i32 2, ptr %dest, align 2 + store i32 1, ptr %inc3, align 2 + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll index b312688b7932dc..3d00ddf89aaa3b 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll @@ -94,43 +94,3 @@ entry: %23 = fcmp ogt <8 x float> zeroinitializer, %19 ret void } - -define void @test3(float %0) { -; CHECK-LABEL: @test3( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] -; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2) -; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2) -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; -entry: - br label %for.body.lr.ph - -for.body.lr.ph: - br i1 false, label %for.cond.cleanup, label %for.body - -for.cond.cleanup: ; preds = %for.body, %for.body.lr.ph - %1 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %5, %for.body ] - %2 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %6, %for.body ] - ret void - -for.body: - %3 = load <2 x float>, ptr null, align 4 - %4 = fcmp olt <2 x float> zeroinitializer, %3 - %5 = select <2 x i1> , <2 x float> %3, <2 x float> zeroinitializer - %6 = select <2 x i1> %4, <2 x float> %3, <2 x float> zeroinitializer - br label %for.cond.cleanup -} diff --git a/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt b/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt index 1f249b0049d062..80fac2353be416 100644 --- a/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_unittest(InstrumentationTests + MemProfUseTest.cpp PGOInstrumentationTest.cpp ) diff --git a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp new file mode 100644 index 00000000000000..a510a57099aba4 --- /dev/null +++ b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp @@ -0,0 +1,104 @@ +//===- MemProfUseTest.cpp - MemProf use tests -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/ProfileData/MemProf.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Transforms/Instrumentation/MemProfiler.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { +using namespace llvm; +using namespace llvm::memprof; +using testing::FieldsAre; +using testing::Pair; +using testing::SizeIs; + +TEST(MemProf, ExtractDirectCallsFromIR) { + // The following IR is generated from: + // + // void f1(); + // void f2(); + // void f3(); + // + // void foo() { + // f1(); + // f2(); f3(); + // } + StringRef IR = R"IR( +define dso_local void @_Z3foov() !dbg !10 { +entry: + call void @_Z2f1v(), !dbg !13 + call void @_Z2f2v(), !dbg !14 + call void @_Z2f3v(), !dbg !15 + ret void, !dbg !16 +} + +declare !dbg !17 void @_Z2f1v() + +declare !dbg !18 void @_Z2f2v() + +declare !dbg !19 void @_Z2f3v() + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "foobar.cc", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"MemProfProfileFilename", !"memprof.profraw"} +!6 = !{i32 8, !"PIC Level", i32 2} +!7 = !{i32 7, !"PIE Level", i32 2} +!8 = !{i32 7, !"uwtable", i32 2} +!9 = !{!"clang"} +!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 5, type: !11, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!11 = !DISubroutineType(types: !12) +!12 = !{} +!13 = !DILocation(line: 6, column: 3, scope: !10) +!14 = !DILocation(line: 7, column: 3, scope: !10) +!15 = !DILocation(line: 7, column: 9, scope: !10) +!16 = !DILocation(line: 8, column: 1, scope: !10) +!17 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !1, file: !1, line: 1, type: !11, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) +!18 = !DISubprogram(name: "f2", linkageName: "_Z2f2v", scope: !1, file: !1, line: 2, type: !11, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) +!19 = !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !11, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) +)IR"; + + LLVMContext Ctx; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString(IR, Err, Ctx); + ASSERT_TRUE(M); + + auto Calls = extractCallsFromIR(*M); + + // Expect exactly one caller. + ASSERT_THAT(Calls, SizeIs(1)); + + auto It = Calls.begin(); + ASSERT_NE(It, Calls.end()); + + const auto &[CallerGUID, CallSites] = *It; + EXPECT_EQ(CallerGUID, IndexedMemProfRecord::getGUID("_Z3foov")); + ASSERT_THAT(CallSites, SizeIs(3)); + + // Verify that call sites show up in the ascending order of their source + // locations. + EXPECT_THAT(CallSites[0], + Pair(FieldsAre(1U, 3U), IndexedMemProfRecord::getGUID("_Z2f1v"))); + EXPECT_THAT(CallSites[1], + Pair(FieldsAre(2U, 3U), IndexedMemProfRecord::getGUID("_Z2f2v"))); + EXPECT_THAT(CallSites[2], + Pair(FieldsAre(2U, 9U), IndexedMemProfRecord::getGUID("_Z2f3v"))); +} +} // namespace diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn index 57b86f53254f57..58f76d3def3db3 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn @@ -10,6 +10,9 @@ group("lib") { if (current_os == "linux" || current_os == "android") { deps += [ "//compiler-rt/lib/ubsan_minimal" ] } + if (current_os == "android" && current_cpu == "arm64") { + deps += [ "//compiler-rt/lib/hwasan" ] + } if (current_os != "baremetal") { deps += [ "//compiler-rt/lib/asan", diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn index e39d8114d1f473..0f1d3d2a50e34c 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn @@ -11,7 +11,7 @@ gen_version_script("version_script") { extra = "hwasan.syms.extra" output = "$target_gen_dir/hwasan.vers" libs = [ - ":hwasan", + ":hwasan_static", ":hwasan_cxx", ] lib_names = [ @@ -88,7 +88,7 @@ source_set("cxx_sources") { sources = [ "hwasan_new_delete.cpp" ] } -static_library("hwasan") { +static_library("hwasan_static") { output_dir = crt_current_out_dir output_name = "clang_rt.$hwasan_name$crt_current_target_suffix" complete_static_lib = true @@ -140,3 +140,13 @@ static_library("hwasan_preinit") { configs += [ "//llvm/utils/gn/build:crt_code" ] sources = [ "hwasan_preinit.cpp" ] } + +group("hwasan") { + deps = [ + ":hwasan_preinit", + ":hwasan_shared", + ":hwasan_static", + ":hwasan_cxx", + ":version_script", + ] +} diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 8a9aff9c05d185..dc62280d12c666 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -561,6 +561,7 @@ if (current_toolchain == default_toolchain) { "__iterator/segmented_iterator.h", "__iterator/size.h", "__iterator/sortable.h", + "__iterator/static_bounded_iter.h", "__iterator/unreachable_sentinel.h", "__iterator/wrap_iter.h", "__locale", diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 7aea804ef8c309..0dbcb63ee7cfc5 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -67,6 +67,7 @@ static_library("Analysis") { "InstructionPrecedenceTracking.cpp", "InstructionSimplify.cpp", "InteractiveModelRunner.cpp", + "LastRunTrackingAnalysis.cpp", "LazyBlockFrequencyInfo.cpp", "LazyBranchProbabilityInfo.cpp", "LazyCallGraph.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn index e297aa9da4e50d..28efe0db6a82c9 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn @@ -88,7 +88,6 @@ static_library("Scalar") { "SpeculativeExecution.cpp", "StraightLineStrengthReduce.cpp", "StructurizeCFG.cpp", - "TLSVariableHoist.cpp", "TailRecursionElimination.cpp", "WarnMissedTransforms.cpp", ] diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn index 38ff30f3fab7d1..2e4fde266c77b8 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn @@ -33,6 +33,7 @@ unittest("AnalysisTests") { "IRSimilarityIdentifierTest.cpp", "IVDescriptorsTest.cpp", "InlineCostTest.cpp", + "LastRunTrackingAnalysisTest.cpp", "LazyCallGraphTest.cpp", "LoadsTest.cpp", "LoopInfoTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn index c9c59acda22aca..bab8616b214a95 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn @@ -10,5 +10,8 @@ unittest("InstrumentationTests") { "//llvm/lib/Testing/Support", "//llvm/lib/Transforms/Instrumentation", ] - sources = [ "PGOInstrumentationTest.cpp" ] + sources = [ + "MemProfUseTest.cpp", + "PGOInstrumentationTest.cpp", + ] } diff --git a/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp b/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp index 8682294c8a6972..59c9759d35393f 100644 --- a/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp +++ b/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp @@ -40,6 +40,11 @@ void arith::ConstantOp::inferResultRanges(ArrayRef argRanges, setResultRange(getResult(), ConstantIntRanges::constant(value)); return; } + if (auto splatAttr = llvm::dyn_cast_or_null(getValue())) { + setResultRange(getResult(), ConstantIntRanges::constant( + splatAttr.getSplatValue())); + return; + } if (auto arrayCstAttr = llvm::dyn_cast_or_null(getValue())) { std::optional result; diff --git a/offload/cmake/caches/Offload.cmake b/offload/cmake/caches/Offload.cmake new file mode 100644 index 00000000000000..57363e99b10afc --- /dev/null +++ b/offload/cmake/caches/Offload.cmake @@ -0,0 +1,12 @@ +set(LLVM_ENABLE_PROJECTS "clang;clang-tools-extra;compiler-rt;lld" CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "libunwind;libcxx;libcxxabi;openmp;offload" CACHE STRING "") +set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "") + +set(CLANG_DEFAULT_CXX_STDLIB "libc++" CACHE STRING "") +set(CLANG_DEFAULT_LINKER "lld" CACHE STRING "") + +set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "") +set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "") +set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "") +set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;libcxx;libcxxabi" CACHE STRING "") +set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;libcxx;libcxxabi" CACHE STRING "") diff --git a/openmp/docs/SupportAndFAQ.rst b/openmp/docs/SupportAndFAQ.rst index 0ae6ad32a208f6..a1f1a1759c2fdd 100644 --- a/openmp/docs/SupportAndFAQ.rst +++ b/openmp/docs/SupportAndFAQ.rst @@ -51,7 +51,23 @@ All patches go through the regular `LLVM review process Q: How to build an OpenMP GPU offload capable compiler? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To build an *effective* OpenMP offload capable compiler, only one extra CMake + +The easiest way to create an offload capable compiler is to use the provided +CMake cache file. This will enable the projects and runtimes necessary for +offloading as well as some extra options. + +.. code-block:: sh + + $> cd llvm-project # The llvm-project checkout + $> mkdir build + $> cd build + $> cmake ../llvm -G Ninja \ + -C ../offload/cmake/caches/Offload.cmake \ # The preset cache file + -DCMAKE_BUILD_TYPE= \ # Select build type + -DCMAKE_INSTALL_PREFIX= \ # Where the libraries will live + $> ninja install + +To manually build an *effective* OpenMP offload capable compiler, only one extra CMake option, ``LLVM_ENABLE_RUNTIMES="openmp;offload"``, is needed when building LLVM (Generic information about building LLVM is available `here `__.). Make sure all backends that