diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 320623cfa15af1..2880bfd03be789 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -170,9 +170,6 @@ class DataAggregator : public DataReader { std::string BuildIDBinaryName; /// Memory map info for a single file as recorded in perf.data - /// When a binary has multiple text segments, the Size is computed as the - /// difference of the last address of these segments from the BaseAddress. - /// The base addresses of all text segments must be the same. struct MMapInfo { uint64_t BaseAddress{0}; /// Base address of the mapped binary. uint64_t MMapAddress{0}; /// Address of the executable segment. @@ -496,11 +493,6 @@ class DataAggregator : public DataReader { /// and return a file name matching a given \p FileBuildID. std::optional getFileNameForBuildID(StringRef FileBuildID); - /// Get a constant reference to the parsed binary mmap entries. - const std::unordered_map &getBinaryMMapInfo() { - return BinaryMMapInfo; - } - friend class YAMLProfileWriter; }; } // namespace bolt diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 2b02086e3e0c99..697cac9fbcaa08 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -95,12 +95,6 @@ cl::opt ReadPreAggregated( "pa", cl::desc("skip perf and read data from a pre-aggregated file format"), cl::cat(AggregatorCategory)); -cl::opt - ReadPerfEvents("perf-script-events", - cl::desc("skip perf event collection by supplying a " - "perf-script output in a textual format"), - cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory)); - static cl::opt TimeAggregator("time-aggr", cl::desc("time BOLT aggregator"), @@ -173,9 +167,8 @@ void DataAggregator::findPerfExecutable() { void DataAggregator::start() { outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; - // Don't launch perf for pre-aggregated files or when perf input is specified - // by the user. - if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty()) + // Don't launch perf for pre-aggregated files + if (opts::ReadPreAggregated) return; findPerfExecutable(); @@ -471,13 +464,6 @@ void DataAggregator::filterBinaryMMapInfo() { int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process, PerfProcessErrorCallbackTy Callback) { - if (!opts::ReadPerfEvents.empty()) { - outs() << "PERF2BOLT: using pre-processed perf events for '" << Name - << "' (perf-script-events)\n"; - ParsingBuf = opts::ReadPerfEvents; - return 0; - } - std::string Error; outs() << "PERF2BOLT: waiting for perf " << Name << " collection to finish...\n"; @@ -2070,6 +2056,15 @@ std::error_code DataAggregator::parseMMapEvents() { if (FileMMapInfo.first == "(deleted)") continue; + // Consider only the first mapping of the file for any given PID + auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first); + bool PIDExists = llvm::any_of(make_range(Range), [&](const auto &MI) { + return MI.second.PID == FileMMapInfo.second.PID; + }); + + if (PIDExists) + continue; + GlobalMMapInfo.insert(FileMMapInfo); } @@ -2121,22 +2116,12 @@ std::error_code DataAggregator::parseMMapEvents() { << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset) << ". Ignoring profile data for this mapping\n"; continue; + } else { + MMapInfo.BaseAddress = *BaseAddress; } - MMapInfo.BaseAddress = *BaseAddress; } - // Try to add MMapInfo to the map and update its size. Large binaries may - // span to multiple text segments, so the mapping is inserted only on the - // first occurrence. - if (!BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)).second) - assert(MMapInfo.BaseAddress == BinaryMMapInfo[MMapInfo.PID].BaseAddress && - "Base address on multiple segment mappings should match"); - - // Update mapping size. - const uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size; - const uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress; - if (Size > BinaryMMapInfo[MMapInfo.PID].Size) - BinaryMMapInfo[MMapInfo.PID].Size = Size; + BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); } if (BinaryMMapInfo.empty()) { diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt index 208cf6ced73585..bad7108dad0b7b 100644 --- a/bolt/unittests/Core/CMakeLists.txt +++ b/bolt/unittests/Core/CMakeLists.txt @@ -8,7 +8,6 @@ set(LLVM_LINK_COMPONENTS add_bolt_unittest(CoreTests BinaryContext.cpp MCPlusBuilder.cpp - MemoryMaps.cpp DynoStats.cpp DISABLE_LLVM_LINK_LLVM_DYLIB @@ -18,8 +17,6 @@ target_link_libraries(CoreTests PRIVATE LLVMBOLTCore LLVMBOLTRewrite - LLVMBOLTProfile - LLVMTestingSupport ) foreach (tgt ${BOLT_TARGETS_TO_BUILD}) diff --git a/bolt/unittests/Core/MemoryMaps.cpp b/bolt/unittests/Core/MemoryMaps.cpp deleted file mode 100644 index 9b5769d051cb6f..00000000000000 --- a/bolt/unittests/Core/MemoryMaps.cpp +++ /dev/null @@ -1,142 +0,0 @@ -//===- bolt/unittest/Core/MemoryMaps.cpp ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "bolt/Core/BinaryContext.h" -#include "bolt/Profile/DataAggregator.h" -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Testing/Support/Error.h" -#include "gtest/gtest.h" - -using namespace llvm; -using namespace llvm::object; -using namespace llvm::ELF; -using namespace bolt; - -namespace opts { -extern cl::opt ReadPerfEvents; -} // namespace opts - -namespace { - -/// Perform checks on memory map events normally captured in perf. Tests use -/// the 'opts::ReadPerfEvents' flag to emulate these events, passing a custom -/// 'perf script' output to DataAggregator. -struct MemoryMapsTester : public testing::TestWithParam { - void SetUp() override { - initalizeLLVM(); - prepareElf(); - initializeBOLT(); - } - -protected: - void initalizeLLVM() { - llvm::InitializeAllTargetInfos(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmParsers(); - llvm::InitializeAllDisassemblers(); - llvm::InitializeAllTargets(); - llvm::InitializeAllAsmPrinters(); - } - - void prepareElf() { - memcpy(ElfBuf, "\177ELF", 4); - ELF64LE::Ehdr *EHdr = reinterpret_cast(ElfBuf); - EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64; - EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB; - EHdr->e_machine = GetParam() == Triple::aarch64 ? EM_AARCH64 : EM_X86_64; - MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF"); - ObjFile = cantFail(ObjectFile::createObjectFile(Source)); - } - - void initializeBOLT() { - Relocation::Arch = ObjFile->makeTriple().getArch(); - BC = cantFail(BinaryContext::createBinaryContext( - ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true, - DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()})); - ASSERT_FALSE(!BC); - } - - char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {}; - std::unique_ptr ObjFile; - std::unique_ptr BC; -}; -} // namespace - -#ifdef X86_AVAILABLE - -INSTANTIATE_TEST_SUITE_P(X86, MemoryMapsTester, - ::testing::Values(Triple::x86_64)); - -#endif - -#ifdef AARCH64_AVAILABLE - -INSTANTIATE_TEST_SUITE_P(AArch64, MemoryMapsTester, - ::testing::Values(Triple::aarch64)); - -#endif - -/// Check that the correct mmap size is computed when we have multiple text -/// segment mappings. -TEST_P(MemoryMapsTester, ParseMultipleSegments) { - const int Pid = 1234; - StringRef Filename = "BINARY"; - opts::ReadPerfEvents = formatv( - "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " - "[0xabc0000000(0x1000000) @ 0x11c0000 103:01 1573523 0]: r-xp {1}\n" - "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " - "[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n", - Pid, Filename); - - BC->SegmentMapInfo[0x11da000] = - SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true}; - BC->SegmentMapInfo[0x31d0000] = - SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0000, 0x3000000, 0x200000, true}; - - DataAggregator DA(""); - BC->setFilename(Filename); - Error Err = DA.preprocessProfile(*BC); - - // Ignore errors from perf2bolt when parsing memory events later on. - ASSERT_THAT_ERROR(std::move(Err), Succeeded()); - - auto &BinaryMMapInfo = DA.getBinaryMMapInfo(); - auto El = BinaryMMapInfo.find(Pid); - // Check that memory mapping is present and has the expected size. - ASSERT_NE(El, BinaryMMapInfo.end()); - ASSERT_EQ(El->second.Size, static_cast(0xb1d0000)); -} - -/// Check that DataAggregator aborts when pre-processing an input binary -/// with multiple text segments that have different base addresses. -TEST_P(MemoryMapsTester, MultipleSegmentsMismatchedBaseAddress) { - const int Pid = 1234; - StringRef Filename = "BINARY"; - opts::ReadPerfEvents = formatv( - "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " - "[0xabc0000000(0x1000000) @ 0x11c0000 103:01 1573523 0]: r-xp {1}\n" - "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " - "[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n", - Pid, Filename); - - BC->SegmentMapInfo[0x11da000] = - SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true}; - // Using '0x31d0fff' FileOffset which triggers a different base address - // for this second text segment. - BC->SegmentMapInfo[0x31d0000] = - SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0fff, 0x3000000, 0x200000, true}; - - DataAggregator DA(""); - BC->setFilename(Filename); - ASSERT_DEATH( - { Error Err = DA.preprocessProfile(*BC); }, - "Base address on multiple segment mappings should match"); -} diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index fec2c20206bc4d..f8507156aa4198 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -73,6 +73,8 @@ Hover Code completion ^^^^^^^^^^^^^^^ +- Added completion for C++20 keywords. + Code actions ^^^^^^^^^^^^ diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 3c9078bcdf8118..c053a5ab3c528c 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -647,6 +647,9 @@ elementwise to the input. Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±infinity +The integer elementwise intrinsics, including ``__builtin_elementwise_popcount``, +can be called in a ``constexpr`` context. + ============================================== ====================================================================== ========================================= Name Operation Supported element types ============================================== ====================================================================== ========================================= diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 86aaf7e6d979f8..c09ae84a661ef4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -380,6 +380,7 @@ Non-comprehensive list of changes in this release - ``__builtin_reduce_mul`` function can now be used in constant expressions. - ``__builtin_reduce_and`` function can now be used in constant expressions. - ``__builtin_reduce_or`` and ``__builtin_reduce_xor`` functions can now be used in constant expressions. +- ``__builtin_elementwise_popcount`` function can now be used in constant expressions. New Compiler Flags ------------------ @@ -617,6 +618,8 @@ Improvements to Clang's diagnostics - For an rvalue reference bound to a temporary struct with an integer member, Clang will detect constant integer overflow in the initializer for the integer member (#GH46755). +- Fixed a false negative ``-Wunused-private-field`` diagnostic when a defaulted comparison operator is defined out of class (#GH116961). + Improvements to Clang's time-trace ---------------------------------- @@ -760,6 +763,8 @@ Bug Fixes to AST Handling sometimes incorrectly return null even if a comment was present. (#GH108145) - Clang now correctly parses the argument of the ``relates``, ``related``, ``relatesalso``, and ``relatedalso`` comment commands. +- Clang now uses the location of the begin of the member expression for ``CallExpr`` + involving deduced ``this``. (#GH116928) Miscellaneous Bug Fixes ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 14009826f2c550..b055cbd769bb50 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1916,7 +1916,7 @@ public: } auto getArgIdents() const { return ArgIdents; } auto getArgLocs() const { return ArgLocs; } - void setParamIdx(size_t Idx, int Val) { + void setParamIdx(size_t Idx, int Val) { assert(Idx < params_Size); params_[Idx] = Val; } @@ -4621,6 +4621,13 @@ def HLSLNumThreads: InheritableAttr { let Documentation = [NumThreadsDocs]; } +def HLSLSV_GroupID: HLSLAnnotationAttr { + let Spellings = [HLSLAnnotation<"SV_GroupID">]; + let Subjects = SubjectList<[ParmVar, Field]>; + let LangOpts = [HLSL]; + let Documentation = [HLSLSV_GroupIDDocs]; +} + def HLSLSV_GroupIndex: HLSLAnnotationAttr { let Spellings = [HLSLAnnotation<"SV_GroupIndex">]; let Subjects = SubjectList<[ParmVar, GlobalVar]>; @@ -4637,7 +4644,7 @@ def HLSLResourceBinding: InheritableAttr { let AdditionalMembers = [{ public: enum class RegisterType : unsigned { SRV, UAV, CBuffer, Sampler, C, I }; - + private: RegisterType RegType; unsigned SlotNumber; @@ -4707,7 +4714,7 @@ def HLSLResource : InheritableAttr { let Spellings = []; let Subjects = SubjectList<[Struct]>; let LangOpts = [HLSL]; - let Args = [ + let Args = [ EnumArgument< "ResourceKind", "llvm::hlsl::ResourceKind", /*is_string=*/0, @@ -4732,7 +4739,7 @@ def HLSLResource : InheritableAttr { def HLSLROV : TypeAttr { let Spellings = [CXX11<"hlsl", "is_rov">]; - let LangOpts = [HLSL]; + let LangOpts = [HLSL]; let Documentation = [InternalOnly]; } @@ -4757,7 +4764,7 @@ def HLSLContainedType : TypeAttr { def HLSLRawBuffer : TypeAttr { let Spellings = [CXX11<"hlsl", "raw_buffer">]; - let LangOpts = [HLSL]; + let LangOpts = [HLSL]; let Documentation = [InternalOnly]; } diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 6fb2eb3eb3e663..aafd4449e47004 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3943,7 +3943,7 @@ In this case, the capturing entity ``X`` could capture a dangling reference to t temporary object. .. code-block:: c++ - + void addToSet(std::string_view a [[clang::lifetime_capture_by(s)]], std::set& s) { s.insert(a); } @@ -3957,8 +3957,8 @@ temporary object. The capturing entity ``X`` can be one of the following: -- Another (named) function parameter. - +- Another (named) function parameter. + .. code-block:: c++ void addToSet(std::string_view a [[clang::lifetime_capture_by(s)]], std::set& s) { @@ -3966,7 +3966,7 @@ The capturing entity ``X`` can be one of the following: } - ``this`` (in case of member functions). - + .. code-block:: c++ class S { @@ -3977,7 +3977,7 @@ The capturing entity ``X`` can be one of the following: }; - `global`, `unknown`. - + .. code-block:: c++ std::set s; @@ -4000,7 +4000,7 @@ function by writing the attribute after the function type: The attribute supports specifying more than one capturing entities: .. code-block:: c++ - + void addToSets(std::string_view a [[clang::lifetime_capture_by(s1, s2)]], std::set& s1, std::set& s2) { @@ -4014,7 +4014,7 @@ statement-local and only detects use of a temporary as an argument to the annotated parameter. .. code-block:: c++ - + void addToSet(std::string_view a [[clang::lifetime_capture_by(s)]], std::set& s); void use() { std::set s; @@ -7174,8 +7174,8 @@ the field it is attached to, and it may also lead to emission of automatic fix-i hints which would help the user replace the use of unsafe functions(/fields) with safe alternatives, though the attribute can be used even when the fix can't be automated. -* Attribute attached to functions: The attribute does not suppress - ``-Wunsafe-buffer-usage`` inside the function to which it is attached. +* Attribute attached to functions: The attribute does not suppress + ``-Wunsafe-buffer-usage`` inside the function to which it is attached. These warnings still need to be addressed. The attribute is warranted even if the only way a function can overflow @@ -7238,10 +7238,10 @@ alternatives, though the attribute can be used even when the fix can't be automa and then use the attribute on the original ``baz()`` to help the users update their code to use the new function. -* Attribute attached to fields: The attribute should only be attached to - struct fields, if the fields can not be updated to a safe type with bounds - check, such as std::span. In other words, the buffers prone to unsafe accesses - should always be updated to use safe containers/views and attaching the attribute +* Attribute attached to fields: The attribute should only be attached to + struct fields, if the fields can not be updated to a safe type with bounds + check, such as std::span. In other words, the buffers prone to unsafe accesses + should always be updated to use safe containers/views and attaching the attribute must be last resort when such an update is infeasible. The attribute can be placed on individual fields or a set of them as shown below. @@ -7259,7 +7259,7 @@ alternatives, though the attribute can be used even when the fix can't be automa size_t sz; }; - Here, every read/write to the fields ptr1, ptr2, buf and sz will trigger a warning + Here, every read/write to the fields ptr1, ptr2, buf and sz will trigger a warning that the field has been explcitly marked as unsafe due to unsafe-buffer operations. }]; @@ -7814,10 +7814,10 @@ def HLSLLoopHintDocs : Documentation { let Content = [{ The ``[loop]`` directive allows loop optimization hints to be specified for the subsequent loop. The directive allows unrolling to -be disabled and is not compatible with [unroll(x)]. +be disabled and is not compatible with [unroll(x)]. Specifying the parameter, ``[loop]``, directs the -unroller to not unroll the loop. +unroller to not unroll the loop. .. code-block:: hlsl @@ -7934,6 +7934,16 @@ randomized. }]; } +def HLSLSV_GroupIDDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``SV_GroupID`` semantic, when applied to an input parameter, specifies which +thread group a shader is executing in. This attribute is only supported in compute shaders. + +The full documentation is available here: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sv-groupid + }]; +} + def HLSLSV_GroupIndexDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index eaff744924805e..db5cd73fba8ad1 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1354,7 +1354,7 @@ def ElementwiseLog10 : Builtin { def ElementwisePopcount : Builtin { let Spellings = ["__builtin_elementwise_popcount"]; - let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; let Prototype = "void(...)"; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index d492fae4145b92..c6b7cd637b9ece 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2286,7 +2286,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tblq">; def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">; // EXTQ - def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheck0_15>]>; + def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheckLaneIndex, 0>]>; // PMOV // Move to Pred diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 119351847bc372..1194c4ab1d0467 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5529,6 +5529,10 @@ def mld_seq_sa : Flag<["-"], "mld-seq-sa">, Group, HelpText<"Do not generate load-load barrier instructions (dbar 0x700)">; def mno_ld_seq_sa : Flag<["-"], "mno-ld-seq-sa">, Group, HelpText<"Generate load-load barrier instructions (dbar 0x700)">; +def mdiv32 : Flag<["-"], "mdiv32">, Group, + HelpText<"Use div.w[u] and mod.w[u] instructions with input not sign-extended.">; +def mno_div32 : Flag<["-"], "mno-div32">, Group, + HelpText<"Do not use div.w[u] and mod.w[u] instructions with input not sign-extended.">; def mannotate_tablejump : Flag<["-"], "mannotate-tablejump">, Group, HelpText<"Enable annotate table jump instruction to correlate it with the jump table.">; def mno_annotate_tablejump : Flag<["-"], "mno-annotate-tablejump">, Group, diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 06c541dec08cc8..ee685d95c96154 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -119,6 +119,7 @@ class SemaHLSL : public SemaBase { void handleNumThreadsAttr(Decl *D, const ParsedAttr &AL); void handleWaveSizeAttr(Decl *D, const ParsedAttr &AL); void handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL); + void handleSV_GroupIDAttr(Decl *D, const ParsedAttr &AL); void handlePackOffsetAttr(Decl *D, const ParsedAttr &AL); void handleShaderAttr(Decl *D, const ParsedAttr &AL); void handleResourceBindingAttr(Decl *D, const ParsedAttr &AL); @@ -136,6 +137,9 @@ class SemaHLSL : public SemaBase { bool CheckCompatibleParameterABI(FunctionDecl *New, FunctionDecl *Old); + // Diagnose whether the input ID is uint/unit2/uint3 type. + bool diagnoseInputIDType(QualType T, const ParsedAttr &AL); + ExprResult ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg); QualType getInoutParameterType(QualType Ty); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index c6d003073966f3..bb5ab67328fbc6 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11005,6 +11005,7 @@ namespace { bool VisitUnaryImag(const UnaryOperator *E); bool VisitBinaryOperator(const BinaryOperator *E); bool VisitUnaryOperator(const UnaryOperator *E); + bool VisitCallExpr(const CallExpr *E); bool VisitConvertVectorExpr(const ConvertVectorExpr *E); bool VisitShuffleVectorExpr(const ShuffleVectorExpr *E); @@ -11302,6 +11303,35 @@ static bool handleVectorElementCast(EvalInfo &Info, const FPOptions FPO, return false; } +bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { + if (!IsConstantEvaluatedBuiltinCall(E)) + return ExprEvaluatorBaseTy::VisitCallExpr(E); + + switch (E->getBuiltinCallee()) { + default: + return false; + case Builtin::BI__builtin_elementwise_popcount: { + APValue Source; + if (!EvaluateAsRValue(Info, E->getArg(0), Source)) + return false; + + QualType DestEltTy = E->getType()->castAs()->getElementType(); + unsigned SourceLen = Source.getVectorLength(); + SmallVector ResultElements; + ResultElements.reserve(SourceLen); + + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + APSInt Elt = Source.getVectorElt(EltNum).getInt(); + ResultElements.push_back( + APValue(APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), Elt.popcount()), + DestEltTy->isUnsignedIntegerOrEnumerationType()))); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + } +} + bool VectorExprEvaluator::VisitConvertVectorExpr(const ConvertVectorExpr *E) { APValue Source; QualType SourceVecType = E->getSrcExpr()->getType(); @@ -13118,6 +13148,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__builtin_popcountl: case Builtin::BI__builtin_popcountll: case Builtin::BI__builtin_popcountg: + case Builtin::BI__builtin_elementwise_popcount: case Builtin::BI__popcnt16: // Microsoft variants of popcount case Builtin::BI__popcnt: case Builtin::BI__popcnt64: { diff --git a/clang/lib/Analysis/FlowSensitive/Arena.cpp b/clang/lib/Analysis/FlowSensitive/Arena.cpp index 81137e8088e330..7542a137c735e0 100644 --- a/clang/lib/Analysis/FlowSensitive/Arena.cpp +++ b/clang/lib/Analysis/FlowSensitive/Arena.cpp @@ -23,8 +23,8 @@ canonicalFormulaPair(const Formula &LHS, const Formula &RHS) { } template -const Formula &cached(llvm::DenseMap &Cache, Key K, - ComputeFunc &&Compute) { +static const Formula &cached(llvm::DenseMap &Cache, Key K, + ComputeFunc &&Compute) { auto [It, Inserted] = Cache.try_emplace(std::forward(K)); if (Inserted) It->second = Compute(); diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index c5c6e900b79766..693313b322af1b 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -64,7 +64,8 @@ static llvm::DenseMap intersectDeclToLoc( // expression must map to the same location / value. This is the case if we are // performing a join for control flow within a full-expression (which is the // only case when this function should be used). -template MapT joinExprMaps(const MapT &Map1, const MapT &Map2) { +template +static MapT joinExprMaps(const MapT &Map1, const MapT &Map2) { MapT Result = Map1; for (const auto &Entry : Map2) { @@ -204,10 +205,11 @@ static WidenResult widenDistinctValues(QualType Type, Value &Prev, // Returns whether the values in `Map1` and `Map2` compare equal for those // keys that `Map1` and `Map2` have in common. template -bool compareKeyToValueMaps(const llvm::MapVector &Map1, - const llvm::MapVector &Map2, - const Environment &Env1, const Environment &Env2, - Environment::ValueModel &Model) { +static bool compareKeyToValueMaps(const llvm::MapVector &Map1, + const llvm::MapVector &Map2, + const Environment &Env1, + const Environment &Env2, + Environment::ValueModel &Model) { for (auto &Entry : Map1) { Key K = Entry.first; assert(K != nullptr); @@ -260,7 +262,7 @@ joinLocToVal(const llvm::MapVector &LocToVal, // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either // `const StorageLocation *` or `const Expr *`. template -llvm::MapVector +static llvm::MapVector widenKeyToValueMap(const llvm::MapVector &CurMap, const llvm::MapVector &PrevMap, Environment &CurEnv, const Environment &PrevEnv, diff --git a/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp index 77d817dafe8378..02fd73754f01be 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp @@ -16,8 +16,9 @@ namespace dataflow { /// Determines whether `D` is one of the methods used to implement Chromium's /// `CHECK` macros. Populates `CheckDecls`, if empty. -bool isCheckLikeMethod(llvm::SmallDenseSet &CheckDecls, - const CXXMethodDecl &D) { +static bool +isCheckLikeMethod(llvm::SmallDenseSet &CheckDecls, + const CXXMethodDecl &D) { // All of the methods of interest are static, so avoid any lookup for // non-static methods (the common case). if (!D.isStatic()) diff --git a/clang/lib/Analysis/IntervalPartition.cpp b/clang/lib/Analysis/IntervalPartition.cpp index 5f06606ec132e9..41199f358c5b97 100644 --- a/clang/lib/Analysis/IntervalPartition.cpp +++ b/clang/lib/Analysis/IntervalPartition.cpp @@ -36,8 +36,8 @@ static unsigned getID(const CFGIntervalNode &I) { return I.ID; } // `Node` must be one of `CFGBlock` or `CFGIntervalNode`. template -BuildResult buildInterval(llvm::BitVector &Partitioned, - const Node *Header) { +static BuildResult buildInterval(llvm::BitVector &Partitioned, + const Node *Header) { assert(Header != nullptr); BuildResult Interval; Interval.Nodes.push_back(Header); @@ -102,10 +102,10 @@ BuildResult buildInterval(llvm::BitVector &Partitioned, } template -void fillIntervalNode(CFGIntervalGraph &Graph, - std::vector &Index, - std::queue &Successors, - llvm::BitVector &Partitioned, const Node *Header) { +static void fillIntervalNode(CFGIntervalGraph &Graph, + std::vector &Index, + std::queue &Successors, + llvm::BitVector &Partitioned, const Node *Header) { BuildResult Result = buildInterval(Partitioned, Header); for (const auto *S : Result.Successors) Successors.push(S); @@ -138,8 +138,8 @@ void fillIntervalNode(CFGIntervalGraph &Graph, } template -CFGIntervalGraph partitionIntoIntervalsImpl(unsigned NumBlockIDs, - const Node *EntryBlock) { +static CFGIntervalGraph partitionIntoIntervalsImpl(unsigned NumBlockIDs, + const Node *EntryBlock) { assert(EntryBlock != nullptr); CFGIntervalGraph Graph; // `Index` maps all of the nodes of the input graph to the interval to which diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 5f36ffa926b269..321097e16a45f7 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -2326,7 +2326,8 @@ static StringRef getEndOfLine() { } // Returns the text indicating that the user needs to provide input there: -std::string getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") { +static std::string +getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") { std::string s = std::string("<# "); s += HintTextToUser; s += " #>"; diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp index 3f2d7317532aaf..9ed695358b1ca8 100644 --- a/clang/lib/Basic/Targets/LoongArch.cpp +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -205,7 +205,8 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, // TODO: As more features of the V1.1 ISA are supported, a unified "v1.1" // arch feature set will be used to include all sub-features belonging to // the V1.1 ISA version. - if (HasFeatureFrecipe && HasFeatureLAM_BH && HasFeatureLD_SEQ_SA) + if (HasFeatureFrecipe && HasFeatureLAM_BH && HasFeatureLD_SEQ_SA && + HasFeatureDiv32) Builder.defineMacro("__loongarch_arch", Twine('"') + "la64v1.1" + Twine('"')); else @@ -242,6 +243,9 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, if (HasFeatureLD_SEQ_SA) Builder.defineMacro("__loongarch_ld_seq_sa", Twine(1)); + if (HasFeatureDiv32) + Builder.defineMacro("__loongarch_div32", Twine(1)); + StringRef ABI = getABI(); if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") Builder.defineMacro("__loongarch_lp64"); @@ -322,6 +326,8 @@ bool LoongArchTargetInfo::handleTargetFeatures( HasFeatureLAM_BH = true; else if (Feature == "+ld-seq-sa") HasFeatureLD_SEQ_SA = true; + else if (Feature == "+div32") + HasFeatureDiv32 = true; } return true; } diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index e5eae7a8fcf677..3002a0bbc4491f 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -32,6 +32,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { bool HasFeatureFrecipe; bool HasFeatureLAM_BH; bool HasFeatureLD_SEQ_SA; + bool HasFeatureDiv32; public: LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) @@ -43,6 +44,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { HasFeatureFrecipe = false; HasFeatureLAM_BH = false; HasFeatureLD_SEQ_SA = false; + HasFeatureDiv32 = false; LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 38d3c521f1aeb8..ec40326e20a418 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5125,9 +5125,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Some architectures (such as x86-64) have the ABI changed based on // attribute-target/features. Give them a chance to diagnose. - CGM.getTargetCodeGenInfo().checkFunctionCallABI( - CGM, Loc, dyn_cast_or_null(CurCodeDecl), - dyn_cast_or_null(TargetDecl), CallArgs, RetTy); + const FunctionDecl *CallerDecl = dyn_cast_or_null(CurCodeDecl); + const FunctionDecl *CalleeDecl = dyn_cast_or_null(TargetDecl); + CGM.getTargetCodeGenInfo().checkFunctionCallABI(CGM, Loc, CallerDecl, + CalleeDecl, CallArgs, RetTy); // 1. Set up the arguments. @@ -5702,7 +5703,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline); // Add call-site always_inline attribute if exists. - if (InAlwaysInlineAttributedStmt) + // Note: This corresponds to the [[clang::always_inline]] statement attribute. + if (InAlwaysInlineAttributedStmt && + !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI( + CallerDecl, CalleeDecl)) Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); @@ -5718,7 +5722,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // FIXME: should this really take priority over __try, below? if (CurCodeDecl && CurCodeDecl->hasAttr() && !InNoInlineAttributedStmt && - !(TargetDecl && TargetDecl->hasAttr())) { + !(TargetDecl && TargetDecl->hasAttr()) && + !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI( + CallerDecl, CalleeDecl)) { Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 7ba0d615018181..2c293523fca8ca 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -389,6 +389,10 @@ llvm::Value *CGHLSLRuntime::emitInputSemantic(IRBuilder<> &B, CGM.getIntrinsic(getThreadIdIntrinsic()); return buildVectorInput(B, ThreadIDIntrinsic, Ty); } + if (D.hasAttr()) { + llvm::Function *GroupIDIntrinsic = CGM.getIntrinsic(Intrinsic::dx_group_id); + return buildVectorInput(B, GroupIDIntrinsic, Ty); + } assert(false && "Unhandled parameter attribute"); return nullptr; } diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 373f8b8a80fdb1..ab3142bdea684e 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -98,6 +98,24 @@ class TargetCodeGenInfo { const CallArgList &Args, QualType ReturnType) const {} + /// Returns true if inlining the function call would produce incorrect code + /// for the current target and should be ignored (even with the always_inline + /// or flatten attributes). + /// + /// Note: This probably should be handled in LLVM. However, the LLVM + /// `alwaysinline` attribute currently means the inliner will ignore + /// mismatched attributes (which sometimes can generate invalid code). So, + /// this hook allows targets to avoid adding the LLVM `alwaysinline` attribute + /// based on C/C++ attributes or other target-specific reasons. + /// + /// See previous discussion here: + /// https://discourse.llvm.org/t/rfc-avoid-inlining-alwaysinline-functions-when-they-cannot-be-inlined/79528 + virtual bool + wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller, + const FunctionDecl *Callee) const { + return false; + } + /// Determines the size of struct _Unwind_Exception on this platform, /// in 8-bit units. The Itanium ABI defines this as: /// struct _Unwind_Exception { diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 9320c6ef06efab..be33e26f047841 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -177,6 +177,9 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { const FunctionDecl *Callee, const CallArgList &Args, QualType ReturnType) const override; + bool wouldInliningViolateFunctionCallABI( + const FunctionDecl *Caller, const FunctionDecl *Callee) const override; + private: // Diagnose calls between functions with incompatible Streaming SVE // attributes. @@ -1143,12 +1146,22 @@ void AArch64TargetCodeGenInfo::checkFunctionABI( } } -void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming( - CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, - const FunctionDecl *Callee) const { - if (!Caller || !Callee || !Callee->hasAttr()) - return; +enum class ArmSMEInlinability : uint8_t { + Ok = 0, + ErrorCalleeRequiresNewZA = 1 << 0, + WarnIncompatibleStreamingModes = 1 << 1, + ErrorIncompatibleStreamingModes = 1 << 2, + + IncompatibleStreamingModes = + WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/ErrorIncompatibleStreamingModes), +}; + +/// Determines if there are any Arm SME ABI issues with inlining \p Callee into +/// \p Caller. Returns the issue (if any) in the ArmSMEInlinability bit enum. +static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller, + const FunctionDecl *Callee) { bool CallerIsStreaming = IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true); bool CalleeIsStreaming = @@ -1156,17 +1169,44 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming( bool CallerIsStreamingCompatible = isStreamingCompatible(Caller); bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee); + ArmSMEInlinability Inlinability = ArmSMEInlinability::Ok; + if (!CalleeIsStreamingCompatible && - (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) - CGM.getDiags().Report( - CallLoc, CalleeIsStreaming - ? diag::err_function_always_inline_attribute_mismatch - : diag::warn_function_always_inline_attribute_mismatch) - << Caller->getDeclName() << Callee->getDeclName() << "streaming"; + (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) { + if (CalleeIsStreaming) + Inlinability |= ArmSMEInlinability::ErrorIncompatibleStreamingModes; + else + Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes; + } if (auto *NewAttr = Callee->getAttr()) if (NewAttr->isNewZA()) - CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za) - << Callee->getDeclName(); + Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA; + + return Inlinability; +} + +void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming( + CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, + const FunctionDecl *Callee) const { + if (!Caller || !Callee || !Callee->hasAttr()) + return; + + ArmSMEInlinability Inlinability = GetArmSMEInlinability(Caller, Callee); + + if ((Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) != + ArmSMEInlinability::Ok) + CGM.getDiags().Report( + CallLoc, + (Inlinability & ArmSMEInlinability::ErrorIncompatibleStreamingModes) == + ArmSMEInlinability::ErrorIncompatibleStreamingModes + ? diag::err_function_always_inline_attribute_mismatch + : diag::warn_function_always_inline_attribute_mismatch) + << Caller->getDeclName() << Callee->getDeclName() << "streaming"; + + if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZA) == + ArmSMEInlinability::ErrorCalleeRequiresNewZA) + CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za) + << Callee->getDeclName(); } // If the target does not have floating-point registers, but we are using a @@ -1200,6 +1240,12 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM, checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType); } +bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI( + const FunctionDecl *Caller, const FunctionDecl *Callee) const { + return Caller && Callee && + GetArmSMEInlinability(Caller, Callee) != ArmSMEInlinability::Ok; +} + void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index, raw_ostream &Out) const { diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index 67b71a3ec623e4..5be57e866d85e8 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -283,6 +283,15 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, else Features.push_back("-ld-seq-sa"); } + + // Select div32 feature determined by -m[no-]div32. + if (const Arg *A = + Args.getLastArg(options::OPT_mdiv32, options::OPT_mno_div32)) { + if (A->getOption().matches(options::OPT_mdiv32)) + Features.push_back("+div32"); + else + Features.push_back("-div32"); + } } std::string loongarch::postProcessTargetCPUString(const std::string &CPU, diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 010ac9c1a3e3a9..f30603feb65c5d 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -460,6 +460,15 @@ Decl *Parser::ParseExportDeclaration() { assert(Tok.is(tok::kw_export)); SourceLocation ExportLoc = ConsumeToken(); + if (Tok.is(tok::code_completion)) { + cutOffParsing(); + Actions.CodeCompletion().CodeCompleteOrdinaryName( + getCurScope(), PP.isIncrementalProcessingEnabled() + ? SemaCodeCompletion::PCC_TopLevelOrExpression + : SemaCodeCompletion::PCC_Namespace); + return nullptr; + } + ParseScope ExportScope(this, Scope::DeclScope); Decl *ExportDecl = Actions.ActOnStartExportDecl( getCurScope(), ExportLoc, diff --git a/clang/lib/Parse/ParseHLSL.cpp b/clang/lib/Parse/ParseHLSL.cpp index 46a37e94353533..4de342b63ed802 100644 --- a/clang/lib/Parse/ParseHLSL.cpp +++ b/clang/lib/Parse/ParseHLSL.cpp @@ -280,6 +280,7 @@ void Parser::ParseHLSLAnnotations(ParsedAttributes &Attrs, case ParsedAttr::UnknownAttribute: Diag(Loc, diag::err_unknown_hlsl_semantic) << II; return; + case ParsedAttr::AT_HLSLSV_GroupID: case ParsedAttr::AT_HLSLSV_GroupIndex: case ParsedAttr::AT_HLSLSV_DispatchThreadID: break; diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 12da3a2cbca314..60ea1383b2a6ee 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -1836,6 +1836,9 @@ static void AddTypeSpecifierResults(const LangOptions &LangOpts, Builder.AddChunk(CodeCompletionString::CK_RightParen); Results.AddResult(Result(Builder.TakeString())); } + + if (LangOpts.Char8 || LangOpts.CPlusPlus20) + Results.AddResult(Result("char8_t", CCP_Type)); } else Results.AddResult(Result("__auto_type", CCP_Type)); @@ -1888,6 +1891,9 @@ AddStorageSpecifiers(SemaCodeCompletion::ParserCompletionContext CCC, Results.AddResult(Result("constexpr")); Results.AddResult(Result("thread_local")); } + + if (LangOpts.CPlusPlus20) + Results.AddResult(Result("constinit")); } static void @@ -1911,6 +1917,9 @@ AddFunctionSpecifiers(SemaCodeCompletion::ParserCompletionContext CCC, case SemaCodeCompletion::PCC_Template: if (LangOpts.CPlusPlus || LangOpts.C99) Results.AddResult(Result("inline")); + + if (LangOpts.CPlusPlus20) + Results.AddResult(Result("consteval")); break; case SemaCodeCompletion::PCC_ObjCInstanceVariableList: @@ -2186,6 +2195,69 @@ AddOrdinaryNameResults(SemaCodeCompletion::ParserCompletionContext CCC, } else { Results.AddResult(Result("template", CodeCompletionResult::RK_Keyword)); } + + if (SemaRef.getLangOpts().CPlusPlus20 && + SemaRef.getLangOpts().CPlusPlusModules) { + clang::Module *CurrentModule = SemaRef.getCurrentModule(); + if (SemaRef.CurContext->isTranslationUnit()) { + /// Global module fragment can only be declared in the beginning of + /// the file. CurrentModule should be null in this case. + if (!CurrentModule) { + // module; + Builder.AddTypedTextChunk("module"); + Builder.AddChunk(CodeCompletionString::CK_SemiColon); + Builder.AddChunk(CodeCompletionString::CK_VerticalSpace); + Results.AddResult(Result(Builder.TakeString())); + } + + /// Named module should be declared in the beginning of the file, + /// or after the global module fragment. + if (!CurrentModule || + CurrentModule->Kind == Module::ExplicitGlobalModuleFragment || + CurrentModule->Kind == Module::ImplicitGlobalModuleFragment) { + // export module; + // module name; + Builder.AddTypedTextChunk("module"); + Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + Builder.AddPlaceholderChunk("name"); + Builder.AddChunk(CodeCompletionString::CK_SemiColon); + Builder.AddChunk(CodeCompletionString::CK_VerticalSpace); + Results.AddResult(Result(Builder.TakeString())); + } + + /// Import can occur in non module file or after the named module + /// declaration. + if (!CurrentModule || + CurrentModule->Kind == Module::ModuleInterfaceUnit || + CurrentModule->Kind == Module::ModulePartitionInterface) { + // import name; + Builder.AddTypedTextChunk("import"); + Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + Builder.AddPlaceholderChunk("name"); + Builder.AddChunk(CodeCompletionString::CK_SemiColon); + Builder.AddChunk(CodeCompletionString::CK_VerticalSpace); + Results.AddResult(Result(Builder.TakeString())); + } + + if (CurrentModule && + (CurrentModule->Kind == Module::ModuleInterfaceUnit || + CurrentModule->Kind == Module::ModulePartitionInterface)) { + // module: private; + Builder.AddTypedTextChunk("module"); + Builder.AddChunk(CodeCompletionString::CK_Colon); + Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + Builder.AddTypedTextChunk("private"); + Builder.AddChunk(CodeCompletionString::CK_SemiColon); + Builder.AddChunk(CodeCompletionString::CK_VerticalSpace); + Results.AddResult(Result(Builder.TakeString())); + } + } + + // export + if (!CurrentModule || + CurrentModule->Kind != Module::ModuleKind::PrivateModuleFragment) + Results.AddResult(Result("export", CodeCompletionResult::RK_Keyword)); + } } if (SemaRef.getLangOpts().ObjC) @@ -2253,6 +2325,11 @@ AddOrdinaryNameResults(SemaCodeCompletion::ParserCompletionContext CCC, [[fallthrough]]; case SemaCodeCompletion::PCC_Template: + if (SemaRef.getLangOpts().CPlusPlus20 && + CCC == SemaCodeCompletion::PCC_Template) + Results.AddResult(Result("concept", CCP_Keyword)); + [[fallthrough]]; + case SemaCodeCompletion::PCC_MemberTemplate: if (SemaRef.getLangOpts().CPlusPlus && Results.includeCodePatterns()) { // template < parameters > @@ -2265,6 +2342,11 @@ AddOrdinaryNameResults(SemaCodeCompletion::ParserCompletionContext CCC, Results.AddResult(Result("template", CodeCompletionResult::RK_Keyword)); } + if (SemaRef.getLangOpts().CPlusPlus20 && + (CCC == SemaCodeCompletion::PCC_Template || + CCC == SemaCodeCompletion::PCC_MemberTemplate)) + Results.AddResult(Result("requires", CCP_Keyword)); + AddStorageSpecifiers(CCC, SemaRef.getLangOpts(), Results); AddFunctionSpecifiers(CCC, SemaRef.getLangOpts(), Results); break; @@ -2486,6 +2568,14 @@ AddOrdinaryNameResults(SemaCodeCompletion::ParserCompletionContext CCC, Builder.AddPlaceholderChunk("expression"); Builder.AddChunk(CodeCompletionString::CK_SemiColon); Results.AddResult(Result(Builder.TakeString())); + // "co_return expression ;" for coroutines(C++20). + if (SemaRef.getLangOpts().CPlusPlus20) { + Builder.AddTypedTextChunk("co_return"); + Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + Builder.AddPlaceholderChunk("expression"); + Builder.AddChunk(CodeCompletionString::CK_SemiColon); + Results.AddResult(Result(Builder.TakeString())); + } // When boolean, also add 'return true;' and 'return false;'. if (ReturnType->isBooleanType()) { Builder.AddTypedTextChunk("return true"); @@ -2706,6 +2796,44 @@ AddOrdinaryNameResults(SemaCodeCompletion::ParserCompletionContext CCC, Builder.AddChunk(CodeCompletionString::CK_RightParen); Results.AddResult(Result(Builder.TakeString())); } + + if (SemaRef.getLangOpts().CPlusPlus20) { + // co_await expression + Builder.AddTypedTextChunk("co_await"); + Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + Builder.AddPlaceholderChunk("expression"); + Results.AddResult(Result(Builder.TakeString())); + + // co_yield expression + Builder.AddTypedTextChunk("co_yield"); + Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + Builder.AddPlaceholderChunk("expression"); + Results.AddResult(Result(Builder.TakeString())); + + // requires (parameters) { requirements } + Builder.AddResultTypeChunk("bool"); + Builder.AddTypedTextChunk("requires"); + Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + Builder.AddChunk(CodeCompletionString::CK_LeftParen); + Builder.AddPlaceholderChunk("parameters"); + Builder.AddChunk(CodeCompletionString::CK_RightParen); + Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + Builder.AddChunk(CodeCompletionString::CK_LeftBrace); + Builder.AddChunk(CodeCompletionString::CK_VerticalSpace); + Builder.AddPlaceholderChunk("requirements"); + Builder.AddChunk(CodeCompletionString::CK_VerticalSpace); + Builder.AddChunk(CodeCompletionString::CK_RightBrace); + Results.AddResult(Result(Builder.TakeString())); + + if (SemaRef.CurContext->isRequiresExprBody()) { + // requires expression ; + Builder.AddTypedTextChunk("requires"); + Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + Builder.AddPlaceholderChunk("expression"); + Builder.AddChunk(CodeCompletionString::CK_SemiColon); + Results.AddResult(Result(Builder.TakeString())); + } + } } if (SemaRef.getLangOpts().ObjC) { diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 146d9c86e0715a..53cc8cb6afd7dc 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7103,6 +7103,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_HLSLWaveSize: S.HLSL().handleWaveSizeAttr(D, AL); break; + case ParsedAttr::AT_HLSLSV_GroupID: + S.HLSL().handleSV_GroupIDAttr(D, AL); + break; case ParsedAttr::AT_HLSLSV_GroupIndex: handleSimpleAttribute(S, D, AL); break; diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index 434768b99d631e..85d5dfcb3db6de 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -1874,8 +1874,16 @@ Sema::BuildFieldReferenceExpr(Expr *BaseExpr, bool IsArrow, Context.getAttributedType(attr::NoDeref, MemberType, MemberType); } - auto *CurMethod = dyn_cast(CurContext); - if (!(CurMethod && CurMethod->isDefaulted())) + auto isDefaultedSpecialMember = [this](const DeclContext *Ctx) { + auto *Method = dyn_cast(CurContext); + if (!Method || !Method->isDefaulted()) + return false; + + return getDefaultedFunctionKind(Method).isSpecialMember(); + }; + + // Implicit special members should not mark fields as used. + if (!isDefaultedSpecialMember(CurContext)) UnusedPrivateFields.remove(Field); ExprResult Base = PerformObjectMemberConversion(BaseExpr, SS.getScopeRep(), diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 8109c3a2cc0f1b..8b2f24a8e4be0a 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -434,6 +434,7 @@ void SemaHLSL::CheckSemanticAnnotation( switch (AnnotationAttr->getKind()) { case attr::HLSLSV_DispatchThreadID: case attr::HLSLSV_GroupIndex: + case attr::HLSLSV_GroupID: if (ST == llvm::Triple::Compute) return; DiagnoseAttrStageMismatch(AnnotationAttr, ST, {llvm::Triple::Compute}); @@ -764,26 +765,36 @@ void SemaHLSL::handleWaveSizeAttr(Decl *D, const ParsedAttr &AL) { D->addAttr(NewAttr); } -static bool isLegalTypeForHLSLSV_DispatchThreadID(QualType T) { - if (!T->hasUnsignedIntegerRepresentation()) +bool SemaHLSL::diagnoseInputIDType(QualType T, const ParsedAttr &AL) { + const auto *VT = T->getAs(); + + if (!T->hasUnsignedIntegerRepresentation() || + (VT && VT->getNumElements() > 3)) { + Diag(AL.getLoc(), diag::err_hlsl_attr_invalid_type) + << AL << "uint/uint2/uint3"; return false; - if (const auto *VT = T->getAs()) - return VT->getNumElements() <= 3; + } + return true; } void SemaHLSL::handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL) { auto *VD = cast(D); - if (!isLegalTypeForHLSLSV_DispatchThreadID(VD->getType())) { - Diag(AL.getLoc(), diag::err_hlsl_attr_invalid_type) - << AL << "uint/uint2/uint3"; + if (!diagnoseInputIDType(VD->getType(), AL)) return; - } D->addAttr(::new (getASTContext()) HLSLSV_DispatchThreadIDAttr(getASTContext(), AL)); } +void SemaHLSL::handleSV_GroupIDAttr(Decl *D, const ParsedAttr &AL) { + auto *VD = cast(D); + if (!diagnoseInputIDType(VD->getType(), AL)) + return; + + D->addAttr(::new (getASTContext()) HLSLSV_GroupIDAttr(getASTContext(), AL)); +} + void SemaHLSL::handlePackOffsetAttr(Decl *D, const ParsedAttr &AL) { if (!isa(D) || !isa(D->getDeclContext())) { Diag(AL.getLoc(), diag::err_hlsl_attr_invalid_ast_node) diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 76be9a2abf5e46..d146edeabab741 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -859,10 +859,11 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitNoCreateClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitPresentClause( SemaOpenACC::OpenACCParsedClause &Clause) { - // Restrictions only properly implemented on 'compute' constructs, and - // 'compute' constructs are the only construct that can do anything with - // this yet, so skip/treat as unimplemented in this case. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind())) + // Restrictions only properly implemented on 'compute'/'combined constructs, + // and 'compute'/'combined' constructs are the only construct that can do + // anything with this yet, so skip/treat as unimplemented in this case. + if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) && + !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind())) return isNotImplemented(); // ActOnVar ensured that everything is a valid variable reference, so there // really isn't anything to do here. GCC does some duplicate-finding, though diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index e4bf9aa521224b..4c9e37bd286dee 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -15565,7 +15565,7 @@ ExprResult Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE, // Build the actual expression node. ExprResult FnExpr = CreateFunctionRefExpr(*this, Method, FoundDecl, MemExpr, - HadMultipleCandidates, MemExpr->getExprLoc()); + HadMultipleCandidates, MemExpr->getBeginLoc()); if (FnExpr.isInvalid()) return ExprError(); diff --git a/clang/test/AST/ast-dump-cxx2b-deducing-this.cpp b/clang/test/AST/ast-dump-cxx2b-deducing-this.cpp new file mode 100644 index 00000000000000..04cff07376885a --- /dev/null +++ b/clang/test/AST/ast-dump-cxx2b-deducing-this.cpp @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=c++2b -ast-dump %s | FileCheck -strict-whitespace %s + +namespace GH116928 { +struct S { + int f(this S&); +}; + +int main() { + S s; + int x = s.f(); + // CHECK: CallExpr 0x{{[^ ]*}} 'int + // CHECK-NEXT: |-ImplicitCastExpr 0x{{[^ ]*}} 'int (*)(S &)' + // CHECK-NEXT: | `-DeclRefExpr 0x{{[^ ]*}} 'int (S &)' lvalue CXXMethod 0x{{[^ ]*}} 'f' 'int (S &)' +} +} diff --git a/clang/test/AST/ast-print-openacc-combined-construct.cpp b/clang/test/AST/ast-print-openacc-combined-construct.cpp index 6885806584f3d1..e04c39ac9bc5be 100644 --- a/clang/test/AST/ast-print-openacc-combined-construct.cpp +++ b/clang/test/AST/ast-print-openacc-combined-construct.cpp @@ -122,4 +122,13 @@ void foo() { #pragma acc kernels loop async for(int i = 0;i<5;++i); +// CHECK: #pragma acc parallel loop present(i, array[1], array, array[1:2]) +#pragma acc parallel loop present(i, array[1], array, array[1:2]) + for(int i = 0;i<5;++i); +// CHECK: #pragma acc serial loop present(i, array[1], array, array[1:2]) +#pragma acc serial loop present(i, array[1], array, array[1:2]) + for(int i = 0;i<5;++i); +// CHECK: #pragma acc kernels loop present(i, array[1], array, array[1:2]) +#pragma acc kernels loop present(i, array[1], array, array[1:2]) + for(int i = 0;i<5;++i); } diff --git a/clang/test/CodeCompletion/keywords-cxx20.cpp b/clang/test/CodeCompletion/keywords-cxx20.cpp new file mode 100644 index 00000000000000..612c3c0045e394 --- /dev/null +++ b/clang/test/CodeCompletion/keywords-cxx20.cpp @@ -0,0 +1,57 @@ +module; + +export module M; + +export const char8_t x = 1; + +template requires true +const int y = requires { typename T::type; requires T::value; }; + +class co_test {}; + +int f(){ co_test test; return 1; } + +module: private; + +// RUN: %clang_cc1 -std=c++20 -code-completion-at=%s:1:3 %s | FileCheck --check-prefix=CHECK-MODULE1 %s +// CHECK-MODULE1: module; +// CHECK-MODULE1: module <#name#>; + +// RUN: %clang_cc1 -std=c++20 -code-completion-at=%s:3:11 %s | FileCheck --check-prefix=CHECK-MODULE2 %s +// CHECK-MODULE2: module <#name#>; + +// RUN: %clang_cc1 -std=c++20 -code-completion-at=%s:14:3 %s | FileCheck --check-prefix=CHECK-MODULE3 %s +// CHECK-MODULE3: module: private; + +// RUN: %clang_cc1 -std=c++20 -code-completion-at=%s:3:3 %s | FileCheck --check-prefix=CHECK-EXPORT %s +// CHECK-EXPORT: export + +// RUN: %clang_cc1 -std=c++20 -code-completion-at=%s:5:11 %s | FileCheck --check-prefix=CHECK-CONST %s +// CHECK-CONST: const +// CHECK-CONST: consteval +// CHECK-CONST: constexpr +// CHECK-CONST: constinit + +// RUN: %clang_cc1 -std=c++20 -code-completion-at=%s:5:19 %s | FileCheck --check-prefix=CHECK-CHAR %s +// CHECK-CHAR: char8_t + +// RUN: %clang_cc1 -std=c++20 -code-completion-at=%s:8:3 %s | FileCheck --check-prefix=CHECK-CONSTRAINT %s +// CHECK-CONSTRAINT: concept +// CHECK-CONSTRAINT: const +// CHECK-CONSTRAINT: consteval +// CHECK-CONSTRAINT: constexpr +// CHECK-CONSTRAINT: constinit + +// RUN: %clang_cc1 -std=c++20 -code-completion-at=%s:7:27 %s | FileCheck --check-prefix=CHECK-REQUIRES2 %s +// CHECK-REQUIRES2: requires + +// RUN: %clang_cc1 -std=c++20 -code-completion-at=%s:8:20 %s | FileCheck -check-prefix=CHECK-REQUIRE %s +// CHECK-REQUIRE: [#bool#]requires (<#parameters#>) { +// CHECK-REQUIRE: <#requirements#> +// CHECK-REQUIRE: } + +// RUN: %clang_cc1 -std=c++20 -code-completion-at=%s:12:13 %s | FileCheck --check-prefix=CHECK-COROUTINE %s +// CHECK-COROUTINE: co_await <#expression#> +// CHECK-COROUTINE: co_return <#expression#>; +// CHECK-COROUTINE: co_yield <#expression#> + diff --git a/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c b/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c new file mode 100644 index 00000000000000..ce6f203631fc5c --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -DUSE_FLATTEN -o - | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s + +// REQUIRES: aarch64-registered-target + +extern void was_inlined(void); + +#if defined(USE_FLATTEN) + #define FN_ATTR __attribute__((flatten)) + #define STMT_ATTR +#elif defined(USE_ALWAYS_INLINE_STMT) + #define FN_ATTR + #define STMT_ATTR [[clang::always_inline]] +#else + #error Expected USE_FLATTEN or USE_ALWAYS_INLINE_STMT to be defined. +#endif + +void fn(void) { was_inlined(); } +void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); } +void fn_streaming(void) __arm_streaming { was_inlined(); } +__arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); } +__arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); } + +FN_ATTR +void caller(void) { + STMT_ATTR fn(); + STMT_ATTR fn_streaming_compatible(); + STMT_ATTR fn_streaming(); + STMT_ATTR fn_locally_streaming(); + STMT_ATTR fn_streaming_new_za(); +} +// CHECK-LABEL: void @caller() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @fn_streaming +// CHECK-NEXT: call void @fn_locally_streaming +// CHECK-NEXT: call void @fn_streaming_new_za + +FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible { + STMT_ATTR fn(); + STMT_ATTR fn_streaming_compatible(); + STMT_ATTR fn_streaming(); + STMT_ATTR fn_locally_streaming(); + STMT_ATTR fn_streaming_new_za(); +} +// CHECK-LABEL: void @caller_streaming_compatible() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @fn +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @fn_streaming +// CHECK-NEXT: call void @fn_locally_streaming +// CHECK-NEXT: call void @fn_streaming_new_za + +FN_ATTR void caller_streaming(void) __arm_streaming { + STMT_ATTR fn(); + STMT_ATTR fn_streaming_compatible(); + STMT_ATTR fn_streaming(); + STMT_ATTR fn_locally_streaming(); + STMT_ATTR fn_streaming_new_za(); +} +// CHECK-LABEL: void @caller_streaming() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @fn +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @fn_streaming_new_za + +FN_ATTR __arm_locally_streaming +void caller_locally_streaming(void) { + STMT_ATTR fn(); + STMT_ATTR fn_streaming_compatible(); + STMT_ATTR fn_streaming(); + STMT_ATTR fn_locally_streaming(); + STMT_ATTR fn_streaming_new_za(); +} +// CHECK-LABEL: void @caller_locally_streaming() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @fn +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @fn_streaming_new_za diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c index 5fbfa881500ba1..06eec1e00900cc 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c @@ -103,111 +103,111 @@ svuint32_t test_svextq_u32(svuint32_t zn, svuint32_t zm) { // CHECK-LABEL: define dso_local @test_svextq_s32 // CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv4i32( [[ZN]], [[ZM]], i32 6) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv4i32( [[ZN]], [[ZM]], i32 3) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: define dso_local @_Z15test_svextq_s32u11__SVInt32_tS_ // CPP-CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv4i32( [[ZN]], [[ZM]], i32 6) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv4i32( [[ZN]], [[ZM]], i32 3) // CPP-CHECK-NEXT: ret [[TMP0]] // svint32_t test_svextq_s32(svint32_t zn, svint32_t zm) { - return SVE_ACLE_FUNC(svextq, _s32,,)(zn, zm, 6); + return SVE_ACLE_FUNC(svextq, _s32,,)(zn, zm, 3); } // CHECK-LABEL: define dso_local @test_svextq_u64 // CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2i64( [[ZN]], [[ZM]], i32 3) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2i64( [[ZN]], [[ZM]], i32 1) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: define dso_local @_Z15test_svextq_u64u12__SVUint64_tS_ // CPP-CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2i64( [[ZN]], [[ZM]], i32 3) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2i64( [[ZN]], [[ZM]], i32 1) // CPP-CHECK-NEXT: ret [[TMP0]] // svuint64_t test_svextq_u64(svuint64_t zn, svuint64_t zm) { - return SVE_ACLE_FUNC(svextq, _u64,,)(zn, zm, 3); + return SVE_ACLE_FUNC(svextq, _u64,,)(zn, zm, 1); } // CHECK-LABEL: define dso_local @test_svextq_s64 // CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2i64( [[ZN]], [[ZM]], i32 7) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2i64( [[ZN]], [[ZM]], i32 0) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: define dso_local @_Z15test_svextq_s64u11__SVInt64_tS_ // CPP-CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2i64( [[ZN]], [[ZM]], i32 7) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2i64( [[ZN]], [[ZM]], i32 0) // CPP-CHECK-NEXT: ret [[TMP0]] // svint64_t test_svextq_s64(svint64_t zn, svint64_t zm) { - return SVE_ACLE_FUNC(svextq, _s64,,)(zn, zm, 7); + return SVE_ACLE_FUNC(svextq, _s64,,)(zn, zm, 0); } // CHECK-LABEL: define dso_local @test_svextq_f16 // CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv8f16( [[ZN]], [[ZM]], i32 8) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv8f16( [[ZN]], [[ZM]], i32 7) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: define dso_local @_Z15test_svextq_f16u13__SVFloat16_tS_ // CPP-CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv8f16( [[ZN]], [[ZM]], i32 8) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv8f16( [[ZN]], [[ZM]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // svfloat16_t test_svextq_f16(svfloat16_t zn, svfloat16_t zm) { - return SVE_ACLE_FUNC(svextq, _f16,,)(zn, zm, 8); + return SVE_ACLE_FUNC(svextq, _f16,,)(zn, zm, 7); } // CHECK-LABEL: define dso_local @test_svextq_f32 // CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv4f32( [[ZN]], [[ZM]], i32 9) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv4f32( [[ZN]], [[ZM]], i32 2) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: define dso_local @_Z15test_svextq_f32u13__SVFloat32_tS_ // CPP-CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv4f32( [[ZN]], [[ZM]], i32 9) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv4f32( [[ZN]], [[ZM]], i32 2) // CPP-CHECK-NEXT: ret [[TMP0]] // svfloat32_t test_svextq_f32(svfloat32_t zn, svfloat32_t zm) { - return SVE_ACLE_FUNC(svextq, _f32,,)(zn, zm, 9); + return SVE_ACLE_FUNC(svextq, _f32,,)(zn, zm, 2); } // CHECK-LABEL: define dso_local @test_svextq_f64 // CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2f64( [[ZN]], [[ZM]], i32 10) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2f64( [[ZN]], [[ZM]], i32 0) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: define dso_local @_Z15test_svextq_f64u13__SVFloat64_tS_ // CPP-CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2f64( [[ZN]], [[ZM]], i32 10) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv2f64( [[ZN]], [[ZM]], i32 0) // CPP-CHECK-NEXT: ret [[TMP0]] // svfloat64_t test_svextq_f64(svfloat64_t zn, svfloat64_t zm) { - return SVE_ACLE_FUNC(svextq, _f64,,)(zn, zm, 10); + return SVE_ACLE_FUNC(svextq, _f64,,)(zn, zm, 0); } // CHECK-LABEL: define dso_local @test_svextq_bf16 // CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv8bf16( [[ZN]], [[ZM]], i32 11) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv8bf16( [[ZN]], [[ZM]], i32 6) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: define dso_local @_Z16test_svextq_bf16u14__SVBfloat16_tS_ // CPP-CHECK-SAME: ( [[ZN:%.*]], [[ZM:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv8bf16( [[ZN]], [[ZM]], i32 11) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.extq.nxv8bf16( [[ZN]], [[ZM]], i32 6) // CPP-CHECK-NEXT: ret [[TMP0]] // svbfloat16_t test_svextq_bf16(svbfloat16_t zn, svbfloat16_t zm) { - return SVE_ACLE_FUNC(svextq, _bf16,,)(zn, zm, 11); + return SVE_ACLE_FUNC(svextq, _bf16,,)(zn, zm, 6); } diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index 748b5e4add7c76..f1f34432ca0ea1 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -666,11 +666,9 @@ void test_builtin_elementwise_log2(float f1, float f2, double d1, double d2, vf2 = __builtin_elementwise_log2(vf1); } -void test_builtin_elementwise_popcount(si8 vi1, si8 vi2, - long long int i1, long long int i2, short si, - _BitInt(31) bi1, _BitInt(31) bi2) { - - +void test_builtin_elementwise_popcount(si8 vi1, si8 vi2, long long int i1, + long long int i2, short si, + _BitInt(31) bi1, _BitInt(31) bi2) { // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 // CHECK-NEXT: call i64 @llvm.ctpop.i64(i64 [[I1]]) i2 = __builtin_elementwise_popcount(i1); @@ -693,7 +691,7 @@ void test_builtin_elementwise_popcount(si8 vi1, si8 vi2, // CHECK-NEXT: call i32 @llvm.ctpop.i32(i32 [[IA1]]) b = __builtin_elementwise_popcount(int_as_one); - // CHECK: call i32 @llvm.ctpop.i32(i32 -10) + // CHECK: store i32 30, ptr @b, align 4 b = __builtin_elementwise_popcount(-10); // CHECK: [[SI:%.+]] = load i16, ptr %si.addr, align 2 diff --git a/clang/test/CodeGenHLSL/semantics/SV_GroupID.hlsl b/clang/test/CodeGenHLSL/semantics/SV_GroupID.hlsl new file mode 100644 index 00000000000000..5e09f0fe06d4e6 --- /dev/null +++ b/clang/test/CodeGenHLSL/semantics/SV_GroupID.hlsl @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s + +// Make sure SV_GroupID translated into dx.group.id. + +// CHECK: define void @foo() +// CHECK: %[[#ID:]] = call i32 @llvm.dx.group.id(i32 0) +// CHECK: call void @{{.*}}foo{{.*}}(i32 %[[#ID]]) +[shader("compute")] +[numthreads(8,8,1)] +void foo(uint Idx : SV_GroupID) {} + +// CHECK: define void @bar() +// CHECK: %[[#ID_X:]] = call i32 @llvm.dx.group.id(i32 0) +// CHECK: %[[#ID_X_:]] = insertelement <2 x i32> poison, i32 %[[#ID_X]], i64 0 +// CHECK: %[[#ID_Y:]] = call i32 @llvm.dx.group.id(i32 1) +// CHECK: %[[#ID_XY:]] = insertelement <2 x i32> %[[#ID_X_]], i32 %[[#ID_Y]], i64 1 +// CHECK: call void @{{.*}}bar{{.*}}(<2 x i32> %[[#ID_XY]]) +[shader("compute")] +[numthreads(8,8,1)] +void bar(uint2 Idx : SV_GroupID) {} + +// CHECK: define void @test() +// CHECK: %[[#ID_X:]] = call i32 @llvm.dx.group.id(i32 0) +// CHECK: %[[#ID_X_:]] = insertelement <3 x i32> poison, i32 %[[#ID_X]], i64 0 +// CHECK: %[[#ID_Y:]] = call i32 @llvm.dx.group.id(i32 1) +// CHECK: %[[#ID_XY:]] = insertelement <3 x i32> %[[#ID_X_]], i32 %[[#ID_Y]], i64 1 +// CHECK: %[[#ID_Z:]] = call i32 @llvm.dx.group.id(i32 2) +// CHECK: %[[#ID_XYZ:]] = insertelement <3 x i32> %[[#ID_XY]], i32 %[[#ID_Z]], i64 2 +// CHECK: call void @{{.*}}test{{.*}}(<3 x i32> %[[#ID_XYZ]]) +[shader("compute")] +[numthreads(8,8,1)] +void test(uint3 Idx : SV_GroupID) {} diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c index c7091336f3bc80..981ae5c5c7dc1c 100644 --- a/clang/test/Driver/loongarch-march.c +++ b/clang/test/Driver/loongarch-march.c @@ -39,21 +39,21 @@ // CC1-LA64V1P1: "-target-cpu" "loongarch64" // CC1-LA64V1P1-NOT: "-target-feature" -// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+ld-seq-sa" +// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32" // CC1-LA64V1P1-NOT: "-target-feature" // CC1-LA64V1P1: "-target-abi" "lp64d" // CC1-LA664: "-target-cpu" "la664" // CC1-LA664-NOT: "-target-feature" -// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+ld-seq-sa" +// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32" // CC1-LA664-NOT: "-target-feature" // CC1-LA664: "-target-abi" "lp64d" // IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual" // IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" // IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual" -// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lam-bh,+ld-seq-sa,+lsx,+ual" -// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lam-bh,+lasx,+ld-seq-sa,+lsx,+ual" +// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+div32,+frecipe,+lam-bh,+ld-seq-sa,+lsx,+ual" +// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+div32,+f,+frecipe,+lam-bh,+lasx,+ld-seq-sa,+lsx,+ual" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-mdiv32.c b/clang/test/Driver/loongarch-mdiv32.c new file mode 100644 index 00000000000000..cf774b3818c55d --- /dev/null +++ b/clang/test/Driver/loongarch-mdiv32.c @@ -0,0 +1,30 @@ +/// Test -m[no]div32 options. + +// RUN: %clang --target=loongarch64 -mdiv32 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-DIV32 +// RUN: %clang --target=loongarch64 -mno-div32 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-DIV32 +// RUN: %clang --target=loongarch64 -mno-div32 -mdiv32 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-DIV32 +// RUN: %clang --target=loongarch64 -mdiv32 -mno-div32 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-DIV32 + +// RUN: %clang --target=loongarch64 -mdiv32 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-DIV32 +// RUN: %clang --target=loongarch64 -mno-div32 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-DIV32 +// RUN: %clang --target=loongarch64 -mno-div32 -mdiv32 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-DIV32 +// RUN: %clang --target=loongarch64 -mdiv32 -mno-div32 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-DIV32 + + +// CC1-DIV32: "-target-feature" "+div32" +// CC1-NO-DIV32: "-target-feature" "-div32" + +// IR-DIV32: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+div32{{(,.*)?}}" +// IR-NO-DIV32: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-div32{{(,.*)?}}" + +int foo(void) { + return 42; +} diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c index 0e3320f01b328c..9045073cbb7893 100644 --- a/clang/test/Preprocessor/init-loongarch.c +++ b/clang/test/Preprocessor/init-loongarch.c @@ -798,7 +798,7 @@ // LA64-FPU0-LP64S-NOT: #define __loongarch_single_float // LA64-FPU0-LP64S: #define __loongarch_soft_float 1 -/// Check __loongarch_arch{_tune/_frecipe/_lam_bh/_ld_seq_sa}. +/// Check __loongarch_arch{_tune/_frecipe/_lam_bh/_ld_seq_sa/_div32}. // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s @@ -823,11 +823,11 @@ // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=la64v1.1 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -frecipe | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lsx | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +frecipe | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \ @@ -835,7 +835,7 @@ // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +lam-bh | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lam-bh | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lam-bh | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +lam-bh | \ @@ -843,23 +843,32 @@ // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +ld-seq-sa | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -ld-seq-sa | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +ld-seq-sa | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LD-SEQ-SA -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +ld-seq-sa | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s -// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe -Xclang -target-feature -Xclang +lam-bh -Xclang -target-feature -Xclang +ld-seq-sa | \ +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +div32 | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -div32| \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +div32 | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +div32 | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe -Xclang -target-feature -Xclang +lam-bh -Xclang -target-feature -Xclang +ld-seq-sa -Xclang -target-feature -Xclang +div32 | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE -DARCH=la64v1.1 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA -DARCH=la664 -DTUNE=la664 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=la664 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA -DARCH=la664 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=la664 -DTUNE=loongarch64 %s // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" +// DIV32: #define __loongarch_div32 1 // FRECIPE: #define __loongarch_frecipe 1 // LAM-BH: #define __loongarch_lam_bh 1 // LD-SEQ-SA: #define __loongarch_ld_seq_sa 1 diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp index f7047c99e884e9..ac7586e202b96c 100644 --- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp +++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp @@ -138,9 +138,47 @@ void test_svbfmul_lane(svbfloat16_t zn, svbfloat16_t zm, uint64_t idx){ } __attribute__((target("+sve2p1"))) -void test_svextq_lane(svint16_t zn_i16, svint16_t zm_i16, svfloat16_t zn_f16, svfloat16_t zm_f16){ - svextq_s16(zn_i16, zm_i16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}} - svextq_f16(zn_f16, zm_f16, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +void test_svextq_8b_offset(svint8_t s8, svuint8_t u8){ + svextq_s8(s8, s8, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}} + svextq_u8(u8, u8, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}} + + svextq_s8(s8, s8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + svextq_u8(u8, u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +} + +__attribute__((target("+sve2p1"))) +void test_svextq_16b_offset(svint16_t s16, svuint16_t u16, svfloat16_t f16, svbfloat16_t bf16){ + svextq_s16(s16, s16, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + svextq_u16(u16, u16, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + svextq_f16(f16, f16, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + svextq_bf16(bf16, bf16, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + + svextq_s16(s16, s16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + svextq_u16(u16, u16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + svextq_f16(f16, f16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + svextq_bf16(bf16, bf16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +} + +__attribute__((target("+sve2p1"))) +void test_svextq_32b_offset(svint32_t s32, svuint32_t u32, svfloat32_t f32){ + svextq_s32(s32, s32, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} + svextq_u32(u32, u32, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} + svextq_f32(f32, f32, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} + + svextq_s32(s32, s32, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svextq_u32(u32, u32, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svextq_f32(f32, f32, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +} + +__attribute__((target("+sve2p1"))) +void test_svextq_64b_offset(svint64_t s64, svuint64_t u64, svfloat64_t f64){ + svextq_s64(s64, s64, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} + svextq_u64(u64, u64, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} + svextq_f64(f64, f64, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} + + svextq_s64(s64, s64, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svextq_u64(u64, u64, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svextq_f64(f64, f64, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} } __attribute__((target("+sve2p1"))) diff --git a/clang/test/Sema/constant_builtins_vector.cpp b/clang/test/Sema/constant_builtins_vector.cpp index e84d09b24672b4..772a682141ce41 100644 --- a/clang/test/Sema/constant_builtins_vector.cpp +++ b/clang/test/Sema/constant_builtins_vector.cpp @@ -797,3 +797,23 @@ static_assert(__builtin_reduce_xor((vector4int){(int)0x11111111, (int)0x22222222 static_assert(__builtin_reduce_xor((vector4long){(long long)0x1111111111111111L, (long long)0x2222222222222222L, (long long)0x4444444444444444L, (long long)0x8888888888888888L}) == (long long)0xFFFFFFFFFFFFFFFFL); static_assert(__builtin_reduce_xor((vector4uint){0x11111111U, 0x22222222U, 0x44444444U, 0x88888888U}) == 0xFFFFFFFFU); static_assert(__builtin_reduce_xor((vector4ulong){0x1111111111111111UL, 0x2222222222222222UL, 0x4444444444444444UL, 0x8888888888888888UL}) == 0xFFFFFFFFFFFFFFFFUL); + +static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_popcount((vector4char){1, 2, 3, 4})) == (LITTLE_END ? 0x01020101 : 0x01010201)); +static_assert(__builtin_bit_cast(unsigned long long, __builtin_elementwise_popcount((vector4short){0, 0x0F0F, ~0, ~0x0F0F})) == (LITTLE_END ? 0x0008001000080000 : 0x0000000800100008)); +static_assert(__builtin_reduce_add(__builtin_elementwise_popcount((vector4int){1, 2, 3, 4})) == 5); +static_assert(__builtin_reduce_add(__builtin_elementwise_popcount((vector4int){0, 0xF0F0, ~0, ~0xF0F0})) == 16 * sizeof(int)); +static_assert(__builtin_reduce_add(__builtin_elementwise_popcount((vector4long){1L, 2L, 3L, 4L})) == 5L); +static_assert(__builtin_reduce_add(__builtin_elementwise_popcount((vector4long){0L, 0xF0F0L, ~0L, ~0xF0F0L})) == 16 * sizeof(long long)); +static_assert(__builtin_reduce_add(__builtin_elementwise_popcount((vector4uint){1U, 2U, 3U, 4U})) == 5U); +static_assert(__builtin_reduce_add(__builtin_elementwise_popcount((vector4uint){0U, 0xF0F0U, ~0U, ~0xF0F0U})) == 16 * sizeof(int)); +static_assert(__builtin_reduce_add(__builtin_elementwise_popcount((vector4ulong){1UL, 2UL, 3UL, 4UL})) == 5UL); +static_assert(__builtin_reduce_add(__builtin_elementwise_popcount((vector4ulong){0ULL, 0xF0F0ULL, ~0ULL, ~0xF0F0ULL})) == 16 * sizeof(unsigned long long)); +static_assert(__builtin_elementwise_popcount(0) == 0); +static_assert(__builtin_elementwise_popcount(0xF0F0) == 8); +static_assert(__builtin_elementwise_popcount(~0) == 8 * sizeof(int)); +static_assert(__builtin_elementwise_popcount(0U) == 0); +static_assert(__builtin_elementwise_popcount(0xF0F0U) == 8); +static_assert(__builtin_elementwise_popcount(~0U) == 8 * sizeof(int)); +static_assert(__builtin_elementwise_popcount(0L) == 0); +static_assert(__builtin_elementwise_popcount(0xF0F0L) == 8); +static_assert(__builtin_elementwise_popcount(~0LL) == 8 * sizeof(long long)); diff --git a/clang/test/SemaCXX/warn-unused-private-field.cpp b/clang/test/SemaCXX/warn-unused-private-field.cpp index 1128eacc309d9f..bf104b1a76a656 100644 --- a/clang/test/SemaCXX/warn-unused-private-field.cpp +++ b/clang/test/SemaCXX/warn-unused-private-field.cpp @@ -20,6 +20,26 @@ class SpaceShipDefaultCompare { int operator<=>(const SpaceShipDefaultCompare &) const = default; }; +class EqDefaultCompareOutOfClass { + int used; // no warning, the compiler generated AST for the comparison operator + // references the fields of the class, and this should be considered + // a use. + // This test case is needed because clang does not emit the body + // of the defaulted operator when it is defined in-class until it + // finds a call to it. `-Wunused-private-field` is suppressed in + // a different way in that case. + bool operator==(const EqDefaultCompareOutOfClass &) const; +}; + +bool EqDefaultCompareOutOfClass::operator==(const EqDefaultCompareOutOfClass &) const = default; + +class FriendEqDefaultCompareOutOfClass { + int used; // no warning, same reasoning just tested via a friend declaration. + friend bool operator==(const FriendEqDefaultCompareOutOfClass &, const FriendEqDefaultCompareOutOfClass &); +}; + +bool operator==(const FriendEqDefaultCompareOutOfClass &, const FriendEqDefaultCompareOutOfClass &) = default; + #endif class NotFullyDefined { diff --git a/clang/test/SemaHLSL/Semantics/entry_parameter.hlsl b/clang/test/SemaHLSL/Semantics/entry_parameter.hlsl index 8484259f84692b..13c07038d2e4a4 100644 --- a/clang/test/SemaHLSL/Semantics/entry_parameter.hlsl +++ b/clang/test/SemaHLSL/Semantics/entry_parameter.hlsl @@ -2,12 +2,15 @@ // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-mesh -hlsl-entry CSMain -x hlsl -finclude-default-header -verify -o - %s [numthreads(8,8,1)] -// expected-error@+2 {{attribute 'SV_GroupIndex' is unsupported in 'mesh' shaders, requires compute}} -// expected-error@+1 {{attribute 'SV_DispatchThreadID' is unsupported in 'mesh' shaders, requires compute}} -void CSMain(int GI : SV_GroupIndex, uint ID : SV_DispatchThreadID) { -// CHECK: FunctionDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> line:[[@LINE-1]]:6 CSMain 'void (int, uint)' +// expected-error@+3 {{attribute 'SV_GroupIndex' is unsupported in 'mesh' shaders, requires compute}} +// expected-error@+2 {{attribute 'SV_DispatchThreadID' is unsupported in 'mesh' shaders, requires compute}} +// expected-error@+1 {{attribute 'SV_GroupID' is unsupported in 'mesh' shaders, requires compute}} +void CSMain(int GI : SV_GroupIndex, uint ID : SV_DispatchThreadID, uint GID : SV_GroupID) { +// CHECK: FunctionDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> line:[[@LINE-1]]:6 CSMain 'void (int, uint, uint)' // CHECK-NEXT: ParmVarDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:17 GI 'int' // CHECK-NEXT: HLSLSV_GroupIndexAttr // CHECK-NEXT: ParmVarDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:42 ID 'uint' // CHECK-NEXT: HLSLSV_DispatchThreadIDAttr +// CHECK-NEXT: ParmVarDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:73 GID 'uint' +// CHECK-NEXT: HLSLSV_GroupIDAttr } diff --git a/clang/test/SemaHLSL/Semantics/invalid_entry_parameter.hlsl b/clang/test/SemaHLSL/Semantics/invalid_entry_parameter.hlsl index bc3cf8bc51daf4..4e1f88aa2294b5 100644 --- a/clang/test/SemaHLSL/Semantics/invalid_entry_parameter.hlsl +++ b/clang/test/SemaHLSL/Semantics/invalid_entry_parameter.hlsl @@ -27,3 +27,25 @@ struct ST2 { static uint X : SV_DispatchThreadID; uint s : SV_DispatchThreadID; }; + +[numthreads(8,8,1)] +// expected-error@+1 {{attribute 'SV_GroupID' only applies to a field or parameter of type 'uint/uint2/uint3'}} +void CSMain_GID(float ID : SV_GroupID) { +} + +[numthreads(8,8,1)] +// expected-error@+1 {{attribute 'SV_GroupID' only applies to a field or parameter of type 'uint/uint2/uint3'}} +void CSMain2_GID(ST GID : SV_GroupID) { + +} + +void foo_GID() { +// expected-warning@+1 {{'SV_GroupID' attribute only applies to parameters and non-static data members}} + uint GIS : SV_GroupID; +} + +struct ST2_GID { +// expected-warning@+1 {{'SV_GroupID' attribute only applies to parameters and non-static data members}} + static uint GID : SV_GroupID; + uint s_gid : SV_GroupID; +}; diff --git a/clang/test/SemaHLSL/Semantics/valid_entry_parameter.hlsl b/clang/test/SemaHLSL/Semantics/valid_entry_parameter.hlsl index 8e79fc4d85ec91..10a5e5dabac87b 100644 --- a/clang/test/SemaHLSL/Semantics/valid_entry_parameter.hlsl +++ b/clang/test/SemaHLSL/Semantics/valid_entry_parameter.hlsl @@ -24,3 +24,28 @@ void CSMain3(uint3 : SV_DispatchThreadID) { // CHECK-NEXT: ParmVarDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:20 'uint3' // CHECK-NEXT: HLSLSV_DispatchThreadIDAttr } + +[numthreads(8,8,1)] +void CSMain_GID(uint ID : SV_GroupID) { +// CHECK: FunctionDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> line:[[@LINE-1]]:6 CSMain_GID 'void (uint)' +// CHECK-NEXT: ParmVarDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:22 ID 'uint' +// CHECK-NEXT: HLSLSV_GroupIDAttr +} +[numthreads(8,8,1)] +void CSMain1_GID(uint2 ID : SV_GroupID) { +// CHECK: FunctionDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> line:[[@LINE-1]]:6 CSMain1_GID 'void (uint2)' +// CHECK-NEXT: ParmVarDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:24 ID 'uint2' +// CHECK-NEXT: HLSLSV_GroupIDAttr +} +[numthreads(8,8,1)] +void CSMain2_GID(uint3 ID : SV_GroupID) { +// CHECK: FunctionDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> line:[[@LINE-1]]:6 CSMain2_GID 'void (uint3)' +// CHECK-NEXT: ParmVarDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:24 ID 'uint3' +// CHECK-NEXT: HLSLSV_GroupIDAttr +} +[numthreads(8,8,1)] +void CSMain3_GID(uint3 : SV_GroupID) { +// CHECK: FunctionDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> line:[[@LINE-1]]:6 CSMain3_GID 'void (uint3)' +// CHECK-NEXT: ParmVarDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:24 'uint3' +// CHECK-NEXT: HLSLSV_GroupIDAttr +} diff --git a/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c b/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c index a770020764d356..69f93a6c605156 100644 --- a/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c +++ b/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c @@ -111,8 +111,6 @@ void uses() { // expected-warning@+1{{OpenACC clause 'no_create' not yet implemented}} #pragma acc parallel loop auto no_create(Var) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'present' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'present' not yet implemented}} #pragma acc parallel loop auto present(Var) for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop auto private(Var) @@ -274,8 +272,6 @@ void uses() { // expected-warning@+1{{OpenACC clause 'no_create' not yet implemented}} #pragma acc parallel loop no_create(Var) auto for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'present' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'present' not yet implemented}} #pragma acc parallel loop present(Var) auto for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop private(Var) auto @@ -438,8 +434,6 @@ void uses() { // expected-warning@+1{{OpenACC clause 'no_create' not yet implemented}} #pragma acc parallel loop independent no_create(Var) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'present' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'present' not yet implemented}} #pragma acc parallel loop independent present(Var) for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop independent private(Var) @@ -601,8 +595,6 @@ void uses() { // expected-warning@+1{{OpenACC clause 'no_create' not yet implemented}} #pragma acc parallel loop no_create(Var) independent for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'present' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'present' not yet implemented}} #pragma acc parallel loop present(Var) independent for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop private(Var) independent @@ -771,8 +763,6 @@ void uses() { // expected-warning@+1{{OpenACC clause 'no_create' not yet implemented}} #pragma acc parallel loop seq no_create(Var) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'present' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'present' not yet implemented}} #pragma acc parallel loop seq present(Var) for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop seq private(Var) @@ -940,8 +930,6 @@ void uses() { // expected-warning@+1{{OpenACC clause 'no_create' not yet implemented}} #pragma acc parallel loop no_create(Var) seq for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'present' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'present' not yet implemented}} #pragma acc parallel loop present(Var) seq for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop private(Var) seq diff --git a/clang/test/SemaOpenACC/combined-construct-default-clause.c b/clang/test/SemaOpenACC/combined-construct-default-clause.c index 646942ec700133..c420c8ebeb31d6 100644 --- a/clang/test/SemaOpenACC/combined-construct-default-clause.c +++ b/clang/test/SemaOpenACC/combined-construct-default-clause.c @@ -11,8 +11,7 @@ void SingleOnly() { int i; - // expected-warning@+4{{OpenACC clause 'copy' not yet implemented}} - // expected-warning@+3{{OpenACC clause 'present' not yet implemented}} + // expected-warning@+3{{OpenACC clause 'copy' not yet implemented}} // expected-error@+2{{OpenACC 'default' clause cannot appear more than once on a 'kernels loop' directive}} // expected-note@+1{{previous clause is here}} #pragma acc kernels loop self default(present) present(i) default(none) copy(i) diff --git a/clang/test/SemaOpenACC/combined-construct-present-ast.cpp b/clang/test/SemaOpenACC/combined-construct-present-ast.cpp new file mode 100644 index 00000000000000..028831c5f9899c --- /dev/null +++ b/clang/test/SemaOpenACC/combined-construct-present-ast.cpp @@ -0,0 +1,78 @@ +// RUN: %clang_cc1 %s -fopenacc -ast-dump | FileCheck %s + +// Test this with PCH. +// RUN: %clang_cc1 %s -fopenacc -emit-pch -o %t %s +// RUN: %clang_cc1 %s -fopenacc -include-pch %t -ast-dump-all | FileCheck %s + +#ifndef PCH_HELPER +#define PCH_HELPER + +int Global; +short GlobalArray[5]; + +void NormalUses(float *PointerParam) { + // CHECK: FunctionDecl{{.*}}NormalUses + // CHECK: ParmVarDecl + // CHECK-NEXT: CompoundStmt + +#pragma acc parallel loop present(GlobalArray, PointerParam[Global]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: present clause + // CHECK-NEXT: DeclRefExpr{{.*}}'short[5]' lvalue Var{{.*}}'GlobalArray' 'short[5]' + // CHECK-NEXT: ArraySubscriptExpr{{.*}}'float' lvalue + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'float *' + // CHECK-NEXT: DeclRefExpr{{.*}}'float *' lvalue ParmVar{{.*}}'PointerParam' 'float *' + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'int' + // CHECK-NEXT: DeclRefExpr{{.*}}'int' lvalue Var{{.*}}'Global' 'int' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt +} + +template +void TemplUses(T t) { + // CHECK-NEXT: FunctionTemplateDecl + // CHECK-NEXT: NonTypeTemplateParmDecl {{.*}}referenced 'auto &' depth 0 index 0 NTTP + // CHECK-NEXT: TemplateTypeParmDecl{{.*}}typename depth 0 index 1 T + // CHECK-NEXT: FunctionDecl{{.*}} TemplUses 'void (T)' + // CHECK-NEXT: ParmVarDecl{{.*}} referenced t 'T' + // CHECK-NEXT: CompoundStmt + +#pragma acc serial loop seq present(NTTP, t) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} serial loop + // CHECK-NEXT: seq clause + // CHECK-NEXT: present clause + // CHECK-NEXT: DeclRefExpr{{.*}}'auto' lvalue NonTypeTemplateParm{{.*}} 'NTTP' 'auto &' + // CHECK-NEXT: DeclRefExpr{{.*}}'T' lvalue ParmVar{{.*}} 't' 'T' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + + // Check the instantiated versions of the above. + // CHECK-NEXT: FunctionDecl{{.*}} used TemplUses 'void (int)' implicit_instantiation + // CHECK-NEXT: TemplateArgument decl + // CHECK-NEXT: Var{{.*}} 'CEVar' 'const unsigned int' + // CHECK-NEXT: TemplateArgument type 'int' + // CHECK-NEXT: BuiltinType{{.*}} 'int' + // CHECK-NEXT: ParmVarDecl{{.*}} used t 'int' + // CHECK-NEXT: CompoundStmt + +// #pragma acc parallel seq present(NTTP, t) + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} serial loop + // CHECK-NEXT: seq clause + // CHECK-NEXT: present clause + // CHECK-NEXT: SubstNonTypeTemplateParmExpr{{.*}}'const unsigned int' lvalue + // CHECK-NEXT: NonTypeTemplateParmDecl{{.*}} referenced 'auto &' depth 0 index 0 NTTP + // CHECK-NEXT: DeclRefExpr{{.*}}'const unsigned int' lvalue Var{{.*}} 'CEVar' 'const unsigned int' + // CHECK-NEXT: DeclRefExpr{{.*}}'int' lvalue ParmVar{{.*}} 't' 'int' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +} + +void Inst() { + static constexpr unsigned CEVar = 1; + TemplUses(5); +} +#endif diff --git a/clang/test/SemaOpenACC/combined-construct-present-clause.c b/clang/test/SemaOpenACC/combined-construct-present-clause.c new file mode 100644 index 00000000000000..acdaaa33929233 --- /dev/null +++ b/clang/test/SemaOpenACC/combined-construct-present-clause.c @@ -0,0 +1,58 @@ +// RUN: %clang_cc1 %s -fopenacc -verify + +typedef struct IsComplete { + struct S { int A; } CompositeMember; + int ScalarMember; + float ArrayMember[5]; + void *PointerMember; +} Complete; +void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete CompositeParam) { + int LocalInt; + short *LocalPointer; + float LocalArray[5]; + Complete LocalComposite; + // Check Appertainment: +#pragma acc parallel loop present(LocalInt) + for(int i = 5; i < 10;++i); +#pragma acc serial loop present(LocalInt) + for(int i = 5; i < 10;++i); +#pragma acc kernels loop present(LocalInt) + for(int i = 5; i < 10;++i); + + // Valid cases: +#pragma acc parallel loop present(LocalInt, LocalPointer, LocalArray) + for(int i = 5; i < 10;++i); +#pragma acc parallel loop present(LocalArray[2:1]) + for(int i = 5; i < 10;++i); + +#pragma acc parallel loop present(LocalComposite.ScalarMember, LocalComposite.ScalarMember) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} +#pragma acc parallel loop present(1 + IntParam) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} +#pragma acc parallel loop present(+IntParam) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is not an array}} +#pragma acc parallel loop present(PointerParam[2:]) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}} +#pragma acc parallel loop present(ArrayParam[2:5]) + for(int i = 5; i < 10;++i); + + // expected-error@+2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}} + // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} +#pragma acc parallel loop present((float*)ArrayParam[2:5]) + for(int i = 5; i < 10;++i); + // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} +#pragma acc parallel loop present((float)ArrayParam[2]) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{OpenACC 'present' clause is not valid on 'loop' directive}} +#pragma acc loop present(LocalInt) + for(int i = 5; i < 10;++i); +} diff --git a/clang/test/SemaOpenACC/combined-construct-present-clause.cpp b/clang/test/SemaOpenACC/combined-construct-present-clause.cpp new file mode 100644 index 00000000000000..814acf25c01c02 --- /dev/null +++ b/clang/test/SemaOpenACC/combined-construct-present-clause.cpp @@ -0,0 +1,112 @@ +// RUN: %clang_cc1 %s -fopenacc -verify + +enum SomeE{}; +typedef struct IsComplete { + struct S { int A; } CompositeMember; + int ScalarMember; + float ArrayMember[5]; + SomeE EnumMember; + char *PointerMember; +} Complete; + +void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete CompositeParam, int &IntParamRef) { + int LocalInt; + char *LocalPointer; + float LocalArray[5]; + // Check Appertainment: +#pragma acc parallel loop present(LocalInt) + for(unsigned I = 0; I < 5; ++I); +#pragma acc serial loop present(LocalInt) + for(unsigned I = 0; I < 5; ++I); +#pragma acc kernels loop present(LocalInt) + for(unsigned I = 0; I < 5; ++I); + + // Valid cases: +#pragma acc parallel loop present(LocalInt, LocalPointer, LocalArray) + for(unsigned I = 0; I < 5; ++I); +#pragma acc parallel loop present(LocalArray[2:1]) + for(unsigned I = 0; I < 5; ++I); + + Complete LocalComposite2; +#pragma acc parallel loop present(LocalComposite2.ScalarMember, LocalComposite2.ScalarMember) + for(unsigned I = 0; I < 5; ++I); + + // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} +#pragma acc parallel loop present(1 + IntParam) + for(unsigned I = 0; I < 5; ++I); + + // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} +#pragma acc parallel loop present(+IntParam) + for(unsigned I = 0; I < 5; ++I); + + // expected-error@+1{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is not an array}} +#pragma acc parallel loop present(PointerParam[2:]) + for(unsigned I = 0; I < 5; ++I); + + // expected-error@+1{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}} +#pragma acc parallel loop present(ArrayParam[2:5]) + for(unsigned I = 0; I < 5; ++I); + + // expected-error@+2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}} + // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} +#pragma acc parallel loop present((float*)ArrayParam[2:5]) + for(unsigned I = 0; I < 5; ++I); + // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} +#pragma acc parallel loop present((float)ArrayParam[2]) + for(unsigned I = 0; I < 5; ++I); +} + +template +void TemplUses(T t, T (&arrayT)[Int], V TemplComp) { + // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} +#pragma acc parallel loop present(+t) + for(unsigned I = 0; I < 5; ++I); + + // NTTP's are only valid if it is a reference to something. + // expected-error@+2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} + // expected-note@#TEMPL_USES_INST{{in instantiation of}} +#pragma acc parallel loop present(Int) + for(unsigned I = 0; I < 5; ++I); + + // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} +#pragma acc parallel loop present(t, Int) + for(unsigned I = 0; I < 5; ++I); + +#pragma acc parallel loop present(arrayT) + for(unsigned I = 0; I < 5; ++I); + +#pragma acc parallel loop present(TemplComp) + for(unsigned I = 0; I < 5; ++I); + +#pragma acc parallel loop present(TemplComp.PointerMember[5]) + for(unsigned I = 0; I < 5; ++I); + int *Pointer; +#pragma acc parallel loop present(Pointer[:Int]) + for(unsigned I = 0; I < 5; ++I); +#pragma acc parallel loop present(Pointer[:t]) + for(unsigned I = 0; I < 5; ++I); + // expected-error@+1{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is not an array}} +#pragma acc parallel loop present(Pointer[1:]) + for(unsigned I = 0; I < 5; ++I); +} + +template +void NTTP() { + // NTTP's are only valid if it is a reference to something. + // expected-error@+2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}} + // expected-note@#NTTP_INST{{in instantiation of}} +#pragma acc parallel loop present(Int) + for(unsigned I = 0; I < 5; ++I); + +#pragma acc parallel loop present(NTTP_REF) + for(unsigned I = 0; I < 5; ++I); +} + +void Inst() { + static constexpr int NTTP_REFed = 1; + int i; + int Arr[5]; + Complete C; + TemplUses(i, Arr, C); // #TEMPL_USES_INST + NTTP<5, NTTP_REFed>(); // #NTTP_INST +} diff --git a/clang/test/SemaOpenACC/compute-construct-varlist-ast.cpp b/clang/test/SemaOpenACC/compute-construct-varlist-ast.cpp index e057678d924957..1bfd4e8af64818 100644 --- a/clang/test/SemaOpenACC/compute-construct-varlist-ast.cpp +++ b/clang/test/SemaOpenACC/compute-construct-varlist-ast.cpp @@ -1,5 +1,12 @@ // RUN: %clang_cc1 %s -fopenacc -Wno-openacc-deprecated-clause-alias -ast-dump | FileCheck %s +// Test this with PCH. +// RUN: %clang_cc1 %s -fopenacc -emit-pch -Wno-openacc-deprecated-clause-alias -o %t %s +// RUN: %clang_cc1 %s -fopenacc -include-pch %t -Wno-openacc-deprecated-clause-alias -ast-dump-all | FileCheck %s + +#ifndef PCH_HELPER +#define PCH_HELPER + int Global; short GlobalArray[5]; @@ -435,7 +442,7 @@ void TemplUses(T t, U u, T*PointerParam) { // CHECK-NEXT: TemplateArgument type 'int' // CHECK-NEXT: BuiltinType{{.*}} 'int' // CHECK-NEXT: TemplateArgument type 'int[1]' - // CHECK-NEXT: ConstantArrayType{{.*}} 'int[1]' 1 + // CHECK-NEXT: ConstantArrayType{{.*}} 'int[1]'{{.*}} 1 // CHECK-NEXT: BuiltinType{{.*}} 'int' // CHECK-NEXT: ParmVarDecl{{.*}} used t 'int' // CHECK-NEXT: ParmVarDecl{{.*}} used u 'int *' @@ -979,3 +986,4 @@ void Inst() { STempl stempl; stempl.bar(); } +#endif diff --git a/libcxx/test/std/containers/sequences/vector/addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/addressof.compile.pass.cpp new file mode 100644 index 00000000000000..120b7b289af93e --- /dev/null +++ b/libcxx/test/std/containers/sequences/vector/addressof.compile.pass.cpp @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 && !stdlib=libc++ + +// + +// Validate various member functions of std::vector with an ADL-hijacking operator& + +#include +#include + +#include "operator_hijacker.h" +#include "test_iterators.h" + +using Vector = std::vector; + +void test( + Vector v, Vector::const_iterator it, cpp17_input_iterator other_it, operator_hijacker val) { + // emplace / insert + v.emplace(it); + v.insert(it, it, it); + v.insert(it, other_it, other_it); + v.insert(it, operator_hijacker()); + v.insert(it, 1, val); + v.insert(it, val); + + // erase + v.erase(it); + v.erase(it, it); + + // assignment + v = static_cast(v); + v = std::move(v); + + // construction + { Vector v2(std::move(v)); } + { Vector v2(std::move(v), std::allocator()); } + + // swap + v.swap(v); +} diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_copy.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_copy.addressof.compile.pass.cpp deleted file mode 100644 index ceecdfda3fa304..00000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_copy.addressof.compile.pass.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// vector& operator=(const vector& c); - -// Validate whether the container can be copy-assigned with an ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::vector vo; - std::vector v; - v = vo; -} diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_move.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_move.addressof.compile.pass.cpp deleted file mode 100644 index 2008c8d048f4b4..00000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_move.addressof.compile.pass.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// vector& operator=(vector&& c); - -// Validate whether the container can be copy-assigned with an ADL-hijacking operator& - -#include -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::vector vo; - std::vector v; - v = std::move(vo); -} diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/move.addressof.compile.pass.cpp deleted file mode 100644 index 521b8705d49202..00000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/move.addressof.compile.pass.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 && !stdlib=libc++ - -// - -// vector(vector&& c); - -// Validate whether the container can be copy-assigned with an ADL-hijacking operator& - -#include -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - { - std::vector vo; - std::vector v(std::move(vo)); - } - { - std::vector vo; - std::vector v(std::move(vo), std::allocator()); - } -} diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/emplace.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/emplace.addressof.compile.pass.cpp deleted file mode 100644 index 43e553e71e7414..00000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/emplace.addressof.compile.pass.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 && !stdlib=libc++ - -// - -// template iterator emplace(const_iterator pos, Args&&... args); - -// Validate whether the container can be copy-assigned with an ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::vector v; - v.emplace(v.end()); -} diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.addressof.compile.pass.cpp deleted file mode 100644 index 0fce3498fec7e8..00000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.addressof.compile.pass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// iterator erase(const_iterator position); - -// Validate whether the container can be copy-assigned with an ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::vector v; - v.erase(v.end()); -} diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter_iter.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter_iter.addressof.compile.pass.cpp deleted file mode 100644 index bc90fa783e98f5..00000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter_iter.addressof.compile.pass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// iterator erase(const_iterator position); - -// Validate whether the container can be copy-assigned with an ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::vector v; - v.erase(v.begin(), v.end()); -} diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.addressof.compile.pass.cpp deleted file mode 100644 index f8311090b37e3c..00000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.addressof.compile.pass.cpp +++ /dev/null @@ -1,31 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// template -// iterator insert(const_iterator position, Iter first, Iter last); - -// Validate whether the container can be copy-assigned with an ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" -#include "test_iterators.h" - -void test(cpp17_input_iterator i) { - { - std::vector v; - v.insert(v.end(), i, i); - } - { - std::vector v; - v.insert(v.end(), v.begin(), v.end()); - } -} diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_rvalue.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_rvalue.addressof.compile.pass.cpp deleted file mode 100644 index 11f24604eeac4d..00000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_rvalue.addressof.compile.pass.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 && !stdlib=libc++ - -// - -// iterator insert(const_iterator position, value_type&& x); - -// Validate whether the container can be copy-assigned with an ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::vector v; - v.insert(v.end(), operator_hijacker()); -} diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_size_value.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_size_value.addressof.compile.pass.cpp deleted file mode 100644 index c02b92a4998e85..00000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_size_value.addressof.compile.pass.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// iterator insert(const_iterator position, size_type n, const value_type& x); - -// Validate whether the container can be copy-assigned with an ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::vector v; - operator_hijacker val; - v.insert(v.end(), 1, val); -} diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_value.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_value.addressof.compile.pass.cpp deleted file mode 100644 index fbf1a4f50b974f..00000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_value.addressof.compile.pass.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// iterator insert(const_iterator position, const value_type& x); - -// Validate whether the container can be copy-assigned with an ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::vector v; - operator_hijacker val; - v.insert(v.end(), val); -} diff --git a/libcxx/test/std/containers/sequences/vector/vector.special/swap.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.special/swap.addressof.compile.pass.cpp deleted file mode 100644 index 4e908d9ff6eaca..00000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.special/swap.addressof.compile.pass.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// template -// void swap(vector& x, vector& y); - -// Validate whether the container can be copy-assigned with an ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::vector vo; - std::vector v; - v.swap(vo); -} diff --git a/lldb/source/Core/PluginManager.cpp b/lldb/source/Core/PluginManager.cpp index a5219025495a91..80c9465f9af721 100644 --- a/lldb/source/Core/PluginManager.cpp +++ b/lldb/source/Core/PluginManager.cpp @@ -206,10 +206,9 @@ template class PluginInstances { if (!callback) return false; assert(!name.empty()); - Instance instance = - Instance(name, description, callback, std::forward(args)...); - m_instances.push_back(instance); - return false; + m_instances.emplace_back(name, description, callback, + std::forward(args)...); + return true; } bool UnregisterPlugin(typename Instance::CallbackType callback) { diff --git a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h index 4ebbaadebe9f90..6f8d41351a6bfb 100644 --- a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h +++ b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h @@ -83,10 +83,10 @@ struct ELFLinuxSigInfo { int32_t si_errno; int32_t si_code; // Copied from siginfo_t so we don't have to include signal.h on non 'Nix - // builds. - struct { - lldb::addr_t si_addr; /* faulting insn/memory ref. */ - short int si_addr_lsb; /* Valid LSB of the reported address. */ + // builds. Slight modifications to ensure no 32b vs 64b differences. + struct alignas(8) { + lldb::addr_t si_addr; /* faulting insn/memory ref. */ + int16_t si_addr_lsb; /* Valid LSB of the reported address. */ union { /* used when si_code=SEGV_BNDERR */ struct { @@ -98,7 +98,8 @@ struct ELFLinuxSigInfo { } bounds; } sigfault; - enum { eUnspecified, eNT_SIGINFO } note_type; + enum SigInfoNoteType : uint8_t { eUnspecified, eNT_SIGINFO }; + SigInfoNoteType note_type; ELFLinuxSigInfo(); diff --git a/llvm/docs/GitHub.rst b/llvm/docs/GitHub.rst index d785d9da9a7f48..37995969b6df38 100644 --- a/llvm/docs/GitHub.rst +++ b/llvm/docs/GitHub.rst @@ -433,3 +433,12 @@ will be created with the specified commits. If a commit you want to backport does not apply cleanly, you may resolve the conflicts locally and then create a pull request against the release branch. Just make sure to add the release milestone to the pull request. + +Getting admin access to CI infrastructure +========================================= + +Any individual who is responsible for setting up and/or maintaining CI infrastructure for a LLVM project can +request to be granted the CI/CD role to the LLVM organization admins. The request can be made by creating +`a Github issue `_ and using the ``infrastructure`` label. +Applicants must include a justification for why the role is being requested. Applications are reviewed on a +case-by-case basis by the LLVM admins and the role can be revoked at any point as the LLVM admins see fit. diff --git a/llvm/include/llvm/IR/ConstantRangeList.h b/llvm/include/llvm/IR/ConstantRangeList.h index 44d1daebe49e4a..b12c913103df57 100644 --- a/llvm/include/llvm/IR/ConstantRangeList.h +++ b/llvm/include/llvm/IR/ConstantRangeList.h @@ -35,7 +35,7 @@ class [[nodiscard]] ConstantRangeList { ConstantRangeList(ArrayRef RangesRef) { assert(isOrderedRanges(RangesRef)); for (const ConstantRange &R : RangesRef) { - assert(R.getBitWidth() == getBitWidth()); + assert(empty() || R.getBitWidth() == getBitWidth()); Ranges.push_back(R); } } @@ -59,8 +59,9 @@ class [[nodiscard]] ConstantRangeList { /// Return true if this list contains no members. bool empty() const { return Ranges.empty(); } - /// Get the bit width of this ConstantRangeList. - uint32_t getBitWidth() const { return 64; } + /// Get the bit width of this ConstantRangeList. It is invalid to call this + /// with an empty range. + uint32_t getBitWidth() const { return Ranges.front().getBitWidth(); } /// Return the number of ranges in this ConstantRangeList. size_t size() const { return Ranges.size(); } diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 0b3e45b752fe9c..646891754e7241 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -1460,6 +1460,7 @@ class MDNode : public Metadata { static MDNode *getMostGenericTBAA(MDNode *A, MDNode *B); static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B); static MDNode *getMostGenericRange(MDNode *A, MDNode *B); + static MDNode *getMostGenericNoaliasAddrspace(MDNode *A, MDNode *B); static MDNode *getMostGenericAliasScope(MDNode *A, MDNode *B); static MDNode *getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B); /// Merge !prof metadata from two instructions. diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def index 324d5c18e6dea3..e3285f89ef9eab 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def @@ -13,6 +13,7 @@ LOONGARCH_FEATURE("+ual", FK_UAL) LOONGARCH_FEATURE("+frecipe", FK_FRECIPE) LOONGARCH_FEATURE("+lam-bh", FK_LAM_BH) LOONGARCH_FEATURE("+ld-seq-sa", FK_LD_SEQ_SA) +LOONGARCH_FEATURE("+div32", FK_DIV32) #undef LOONGARCH_FEATURE @@ -22,6 +23,6 @@ LOONGARCH_FEATURE("+ld-seq-sa", FK_LD_SEQ_SA) LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) -LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH | FK_LD_SEQ_SA) +LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH | FK_LD_SEQ_SA | FK_DIV32) #undef LOONGARCH_ARCH diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h index 00957b84ab576c..5862becc92d774 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h @@ -57,6 +57,8 @@ enum FeatureKind : uint32_t { // Do not generate load-load barrier instructions (dbar 0x700). FK_LD_SEQ_SA = 1 << 12, + // Assume div.w[u] and mod.w[u] can handle inputs that are not sign-extended. + FK_DIV32 = 1 << 13, }; struct FeatureInfo { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 45455340a09e92..5b5d75d4b8e32b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2345,14 +2345,45 @@ findPrologueEndLoc(const MachineFunction *MF) { // better off synthesising an early prologue_end. auto CurBlock = MF->begin(); auto CurInst = CurBlock->begin(); - while (true) { - // Skip empty blocks, in rare cases the entry can be empty. - if (CurInst == CurBlock->end()) { - ++CurBlock; - CurInst = CurBlock->begin(); - continue; + + // Find the initial instruction, we're guaranteed one by the caller, but not + // which block it's in. + while (CurBlock->empty()) + CurInst = (++CurBlock)->begin(); + assert(CurInst != CurBlock->end()); + + // Helper function for stepping through the initial sequence of + // unconditionally executed instructions. + auto getNextInst = [&CurBlock, &CurInst, MF]() -> bool { + // We've reached the end of the block. Did we just look at a terminator? + if (CurInst->isTerminator()) { + // Some kind of "real" control flow is occurring. At the very least + // we would have to start exploring the CFG, a good signal that the + // prologue is over. + return false; } + // If we've already fallen through into a loop, don't fall through + // further, use a backup-location. + if (CurBlock->pred_size() > 1) + return false; + + // Fall-through from entry to the next block. This is common at -O0 when + // there's no initialisation in the function. Bail if we're also at the + // end of the function, or the remaining blocks have no instructions. + // Skip empty blocks, in rare cases the entry can be empty, and + // other optimisations may add empty blocks that the control flow falls + // through. + do { + ++CurBlock; + if (CurBlock == MF->end()) + return false; + } while (CurBlock->empty()); + CurInst = CurBlock->begin(); + return true; + }; + + while (true) { // Check whether this non-meta instruction a good position for prologue_end. if (!CurInst->isMetaInstruction()) { auto FoundInst = ExamineInst(*CurInst); @@ -2369,25 +2400,8 @@ findPrologueEndLoc(const MachineFunction *MF) { continue; } - // We've reached the end of the block. Did we just look at a terminator? - if (CurInst->isTerminator()) { - // Some kind of "real" control flow is occurring. At the very least - // we would have to start exploring the CFG, a good signal that the - // prologue is over. - break; - } - - // If we've already fallen through into a loop, don't fall through - // further, use a backup-location. - if (CurBlock->pred_size() > 1) - break; - - // Fall-through from entry to the next block. This is common at -O0 when - // there's no initialisation in the function. Bail if we're also at the - // end of the function. - if (++CurBlock == MF->end()) + if (!getNextInst()) break; - CurInst = CurBlock->begin(); } // We couldn't find any source-location, suggesting all meaningful information diff --git a/llvm/lib/IR/ConstantRangeList.cpp b/llvm/lib/IR/ConstantRangeList.cpp index 0856f79bb9191a..3ee8d6f22b7487 100644 --- a/llvm/lib/IR/ConstantRangeList.cpp +++ b/llvm/lib/IR/ConstantRangeList.cpp @@ -39,12 +39,14 @@ void ConstantRangeList::insert(const ConstantRange &NewRange) { return; assert(!NewRange.isFullSet() && "Do not support full set"); assert(NewRange.getLower().slt(NewRange.getUpper())); - assert(getBitWidth() == NewRange.getBitWidth()); // Handle common cases. if (empty() || Ranges.back().getUpper().slt(NewRange.getLower())) { Ranges.push_back(NewRange); return; } + + assert(getBitWidth() == NewRange.getBitWidth()); + if (NewRange.getUpper().slt(Ranges.front().getLower())) { Ranges.insert(Ranges.begin(), NewRange); return; @@ -142,14 +144,15 @@ void ConstantRangeList::subtract(const ConstantRange &SubRange) { ConstantRangeList ConstantRangeList::unionWith(const ConstantRangeList &CRL) const { - assert(getBitWidth() == CRL.getBitWidth() && - "ConstantRangeList bitwidths don't agree!"); // Handle common cases. if (empty()) return CRL; if (CRL.empty()) return *this; + assert(getBitWidth() == CRL.getBitWidth() && + "ConstantRangeList bitwidths don't agree!"); + ConstantRangeList Result; size_t i = 0, j = 0; // "PreviousRange" tracks the lowest unioned range that is being processed. @@ -192,15 +195,15 @@ ConstantRangeList::unionWith(const ConstantRangeList &CRL) const { ConstantRangeList ConstantRangeList::intersectWith(const ConstantRangeList &CRL) const { - assert(getBitWidth() == CRL.getBitWidth() && - "ConstantRangeList bitwidths don't agree!"); - // Handle common cases. if (empty()) return *this; if (CRL.empty()) return CRL; + assert(getBitWidth() == CRL.getBitWidth() && + "ConstantRangeList bitwidths don't agree!"); + ConstantRangeList Result; size_t i = 0, j = 0; while (i < size() && j < CRL.size()) { diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index 971897e740cfbc..c0d7737f741353 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/ConstantRangeList.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -1353,6 +1354,43 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) { return MDNode::get(A->getContext(), MDs); } +MDNode *MDNode::getMostGenericNoaliasAddrspace(MDNode *A, MDNode *B) { + if (!A || !B) + return nullptr; + + if (A == B) + return A; + + SmallVector RangeListA, RangeListB; + for (unsigned I = 0, E = A->getNumOperands() / 2; I != E; ++I) { + auto *LowA = mdconst::extract(A->getOperand(2 * I + 0)); + auto *HighA = mdconst::extract(A->getOperand(2 * I + 1)); + RangeListA.push_back(ConstantRange(LowA->getValue(), HighA->getValue())); + } + + for (unsigned I = 0, E = B->getNumOperands() / 2; I != E; ++I) { + auto *LowB = mdconst::extract(B->getOperand(2 * I + 0)); + auto *HighB = mdconst::extract(B->getOperand(2 * I + 1)); + RangeListB.push_back(ConstantRange(LowB->getValue(), HighB->getValue())); + } + + ConstantRangeList CRLA(RangeListA); + ConstantRangeList CRLB(RangeListB); + ConstantRangeList Result = CRLA.intersectWith(CRLB); + if (Result.empty()) + return nullptr; + + SmallVector MDs; + for (const ConstantRange &CR : Result) { + MDs.push_back(ConstantAsMetadata::get( + ConstantInt::get(A->getContext(), CR.getLower()))); + MDs.push_back(ConstantAsMetadata::get( + ConstantInt::get(A->getContext(), CR.getUpper()))); + } + + return MDNode::get(A->getContext(), MDs); +} + MDNode *MDNode::getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B) { if (!A || !B) return nullptr; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 242aea5fbb0142..a8ba89f784c8cd 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -529,6 +529,18 @@ def UImmS8XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i64); }]>; +def UImmM2XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i32); +}]>; + +def UImmM4XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i32); +}]>; + +def UImmM8XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() * 8, SDLoc(N), MVT::i32); +}]>; + // uimm5sN predicate - True if the immediate is a multiple of N in the range // [0 * N, 32 * N]. def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>; @@ -1098,6 +1110,13 @@ def timm32_0_1 : Operand, TImmLeaf, TImmLeaf { + let ParserMatchClass = Imm0_15Operand; +} + // timm32_1_1 - True if the 32-bit immediate is in the range [1,1] def timm32_1_1 : Operand, TImmLeaf, TImmLeaf, TImmLeaf { + let ParserMatchClass = Imm0_15Operand; +} + // timm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7] def timm32_0_7 : Operand, TImmLeaf, TImmLeaf, TImmLeaf { + let ParserMatchClass = Imm0_15Operand; +} + // timm32_1_7 predicate - True if the 32-bit immediate is in the range [1,7] def timm32_1_7 : Operand, TImmLeaf 0 && ((uint32_t)Imm) < 8; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 1ddb913f013f5e..4bdf327e0d3fc3 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -10459,15 +10459,14 @@ class sve2p1_extq multiclass sve2p1_extq { def NAME : sve2p1_extq; def : SVE_3_Op_Imm_Pat(NAME)>; - def : SVE_3_Op_Imm_Pat(NAME)>; - def : SVE_3_Op_Imm_Pat(NAME)>; - def : SVE_3_Op_Imm_Pat(NAME)>; - - def : SVE_3_Op_Imm_Pat(NAME)>; - def : SVE_3_Op_Imm_Pat(NAME)>; - def : SVE_3_Op_Imm_Pat(NAME)>; - def : SVE_3_Op_Imm_Pat(NAME -)>; + def : SVE_3_Op_Imm_Pat(NAME)>; + def : SVE_3_Op_Imm_Pat(NAME)>; + def : SVE_3_Op_Imm_Pat(NAME)>; + + def : SVE_3_Op_Imm_Pat(NAME)>; + def : SVE_3_Op_Imm_Pat(NAME)>; + def : SVE_3_Op_Imm_Pat(NAME)>; + def : SVE_3_Op_Imm_Pat(NAME)>; } // SVE move predicate from vector diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3f0845864336fe..2e0f95161935a9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -855,6 +855,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM}, {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16}, Custom); + } else { + // FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum + if (Subtarget->hasMinimum3Maximum3F32()) + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal); } setOperationAction(ISD::INTRINSIC_WO_CHAIN, diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 5d4d56e8b0ad22..2b207e008581b3 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1234,6 +1234,23 @@ def : IntClampPat; def : IntClampPat; def : IntClampPat; +//===----------------------------------------------------------------------===// +// Floating-point operation Patterns +//===----------------------------------------------------------------------===// + +// Implement fminimum(x, y) by using minimum3(x, y, y) +class MinimumMaximumByMinimum3Maximum3 : GCNPat< + (vt (node (VOP3Mods vt:$src0, i32:$src0_mods), (VOP3Mods vt:$src1, i32:$src1_mods))), + (inst $src0_mods, $src0, $src1_mods, $src1, $src1_mods, $src1) +>; + +// Prefer the real 2 operand form if legal +let SubtargetPredicate = HasMinimum3Maximum3F32, AddedComplexity = -1000 in { +def : MinimumMaximumByMinimum3Maximum3; +def : MinimumMaximumByMinimum3Maximum3; +} + //===----------------------------------------------------------------------===// // Target-specific instruction encodings. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index 100bdba36c440c..463786e72bdf8d 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -123,6 +123,12 @@ def FeatureLD_SEQ_SA "Don't use load-load barrier (dbar 0x700).">; def HasLD_SEQ_SA : Predicate<"Subtarget->hasLD_SEQ_SA()">; +// Assume div.w[u] and mod.w[u] can handle inputs that are not sign-extended. +def FeatureDiv32 + : SubtargetFeature<"div32", "HasDiv32", "true", + "Assume div.w[u] and mod.w[u] can handle inputs that are not sign-extended">; +def HasDiv32 : Predicate<"Subtarget->hasDiv32()">; + def TunePreferWInst : SubtargetFeature<"prefer-w-inst", "PreferWInst", "true", "Prefer instructions with W suffix">; @@ -165,7 +171,8 @@ def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit, FeatureExtLVZ, FeatureExtLBT, FeatureFrecipe, - FeatureLAM_BH]>; + FeatureLAM_BH, + FeatureDiv32]>; //===----------------------------------------------------------------------===// // Define the LoongArch target. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 1abb428175eea7..6c30cc0d6cfb3e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2833,7 +2833,10 @@ void LoongArchTargetLowering::ReplaceNodeResults( case ISD::UREM: assert(VT == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); - Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND)); + Results.push_back(customLegalizeToWOp(N, DAG, 2, + Subtarget.hasDiv32() && VT == MVT::i32 + ? ISD::ANY_EXTEND + : ISD::SIGN_EXTEND)); break; case ISD::SHL: case ISD::SRA: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f4d3668726164b..5451035b500846 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -18928,7 +18928,7 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent, SDValue Op1 = N.getOperand(1); int16_t Imm = Op1->getAsZExtVal(); if (!Align || isAligned(*Align, Imm)) { - Disp = DAG.getTargetConstant(Imm, DL, N.getValueType()); + Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType()); Base = Op0; if (FrameIndexSDNode *FI = dyn_cast(Op0)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); @@ -18959,7 +18959,7 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent, // this as "d, 0". int16_t Imm; if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) { - Disp = DAG.getTargetConstant(Imm, DL, CNType); + Disp = DAG.getSignedTargetConstant(Imm, DL, CNType); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CNType); break; @@ -18992,14 +18992,14 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent, if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) && (isIntS34Immediate(N.getOperand(1), Imm34))) { // N is an Add/OR Node, and it's operand is a 34-bit signed immediate. - Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType()); + Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); else Base = N.getOperand(0); } else if (isIntS34Immediate(N, Imm34)) { // The address is a 34-bit signed immediate. - Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType()); + Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType()); Base = DAG.getRegister(PPC::ZERO8, N.getValueType()); } break; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td index bb412fa88b0f12..38f9b5ef1d80be 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver4.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td @@ -1652,7 +1652,7 @@ def : InstRW<[Zn4MOVS], (instregex def Zn4MOVSZ: SchedWriteRes<[Zn4FPFMisc12]> { let Latency = 4; - let ReleaseAtCycles = [4]; + let ReleaseAtCycles = [2]; let NumMicroOps = 1; } def : InstRW<[Zn4MOVSZ], (instregex @@ -1661,7 +1661,7 @@ def : InstRW<[Zn4MOVSZ], (instregex def Zn4MOVSrr: SchedWriteRes<[Zn4FPFMisc12]> { let Latency = 5; - let ReleaseAtCycles = [5]; + let ReleaseAtCycles = [2]; let NumMicroOps = 1; } def : InstRW<[Zn4MOVSrr], (instregex diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index ac82c2f58b16ca..2dc3d4517eed30 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -2018,12 +2018,12 @@ const StringMap sys::getHostCPUFeatures() { Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ Features["frecipe"] = cpucfg2 & (1U << 25); // CPUCFG.2.FRECIPE + Features["div32"] = cpucfg2 & (1U << 26); // CPUCFG.2.DIV32 Features["lam-bh"] = cpucfg2 & (1U << 27); // CPUCFG.2.LAM_BH Features["ld-seq-sa"] = cpucfg3 & (1U << 23); // CPUCFG.3.LD_SEQ_SA // TODO: Need to complete. - // Features["div32"] = cpucfg2 & (1U << 26); // CPUCFG.2.DIV32 // Features["lamcas"] = cpucfg2 & (1U << 28); // CPUCFG.2.LAMCAS // Features["llacq-screl"] = cpucfg2 & (1U << 29); // CPUCFG.2.LLACQ_SCREL // Features["scq"] = cpucfg2 & (1U << 30); // CPUCFG.2.SCQ diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp index 9b8407a73bea3f..8e7681d526cef5 100644 --- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp +++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp @@ -54,6 +54,7 @@ bool LoongArch::getArchFeatures(StringRef Arch, Features.push_back("+frecipe"); Features.push_back("+lam-bh"); Features.push_back("+ld-seq-sa"); + Features.push_back("+div32"); } return true; } diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 94bfe44a847a37..3ade3202728931 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2030,7 +2030,9 @@ bool llvm::promoteLoopAccessesToScalars( bool DereferenceableInPH = false; bool StoreIsGuanteedToExecute = false; + bool LoadIsGuaranteedToExecute = false; bool FoundLoadToPromote = false; + // Goes from Unknown to either Safe or Unsafe, but can't switch between them. enum { StoreSafe, @@ -2089,6 +2091,10 @@ bool llvm::promoteLoopAccessesToScalars( Align InstAlignment = Load->getAlign(); + if (!LoadIsGuaranteedToExecute) + LoadIsGuaranteedToExecute = + SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop); + // Note that proving a load safe to speculate requires proving // sufficient alignment at the target location. Proving it guaranteed // to execute does as well. Thus we can increase our guaranteed @@ -2233,8 +2239,9 @@ bool llvm::promoteLoopAccessesToScalars( SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment, - SawUnorderedAtomic, AATags, *SafetyInfo, - StoreSafety == StoreSafe); + SawUnorderedAtomic, + StoreIsGuanteedToExecute ? AATags : AAMDNodes(), + *SafetyInfo, StoreSafety == StoreSafe); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. @@ -2247,7 +2254,7 @@ bool llvm::promoteLoopAccessesToScalars( PreheaderLoad->setOrdering(AtomicOrdering::Unordered); PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DebugLoc()); - if (AATags) + if (AATags && LoadIsGuaranteedToExecute) PreheaderLoad->setAAMetadata(AATags); MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB( diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index ed564ca82140af..d9a126fa99cf4d 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3753,6 +3753,11 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, if (DoesKMove) K->setMetadata(Kind, MDNode::getMergedProfMetadata(KMD, JMD, K, J)); break; + case LLVMContext::MD_noalias_addrspace: + if (DoesKMove) + K->setMetadata(Kind, + MDNode::getMostGenericNoaliasAddrspace(JMD, KMD)); + break; } } // Set !invariant.group from J if J has it. If both instructions have it @@ -3794,7 +3799,8 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, LLVMContext::MD_prof, LLVMContext::MD_nontemporal, LLVMContext::MD_noundef, - LLVMContext::MD_mmra}; + LLVMContext::MD_mmra, + LLVMContext::MD_noalias_addrspace}; combineMetadata(K, J, KnownIDs, KDominatesJ); } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 52d06285a570d4..a80cbb1d814b49 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9533,8 +9533,20 @@ void BoUpSLP::reorderGatherNode(TreeEntry &TE) { Cost += ::getShuffleCost(*TTI, TTI::SK_InsertSubvector, VecTy, {}, CostKind, Idx, getWidenedType(ScalarTy, Sz)); } - Cost += TTI->getScalarizationOverhead(VecTy, DemandedElts, /*Insert=*/true, - /*Extract=*/false, CostKind); + if (auto *FTy = dyn_cast(ScalarTy)) { + assert(SLPReVec && "Only supported by REVEC."); + // If ScalarTy is FixedVectorType, we should use CreateInsertVector instead + // of CreateInsertElement. + unsigned ScalarTyNumElements = getNumElements(ScalarTy); + for (unsigned I : seq(TE.Scalars.size())) + if (DemandedElts[I]) + Cost += + TTI->getShuffleCost(TTI::SK_InsertSubvector, VecTy, std::nullopt, + CostKind, I * ScalarTyNumElements, FTy); + } else { + Cost += TTI->getScalarizationOverhead(VecTy, DemandedElts, /*Insert=*/true, + /*Extract=*/false, CostKind); + } int Sz = TE.Scalars.size(); SmallVector ReorderMask(TE.ReorderIndices.begin(), TE.ReorderIndices.end()); diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll index a49aa7cfcf8a2d..bb4c67fca5dc8b 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll @@ -4,16 +4,16 @@ define @test_extq_i8 ( %zn, %zm) { ; CHECK-LABEL: test_extq_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: extq z0.b, z0.b, z1.b, #0 +; CHECK-NEXT: extq z0.b, z0.b, z1.b, #15 ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.extq.nxv16i8( %zn, %zm, i32 0) + %res = call @llvm.aarch64.sve.extq.nxv16i8( %zn, %zm, i32 15) ret %res } define @test_extq_i16 ( %zn, %zm) { ; CHECK-LABEL: test_extq_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: extq z0.b, z0.b, z1.b, #1 +; CHECK-NEXT: extq z0.b, z0.b, z1.b, #2 ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.extq.nxv8i16( %zn, %zm, i32 1) ret %res @@ -22,7 +22,7 @@ define @test_extq_i16 ( %zn, @test_extq_i32 ( %zn, %zm) { ; CHECK-LABEL: test_extq_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: extq z0.b, z0.b, z1.b, #2 +; CHECK-NEXT: extq z0.b, z0.b, z1.b, #8 ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.extq.nxv4i32( %zn, %zm, i32 2) ret %res @@ -31,45 +31,45 @@ define @test_extq_i32 ( %zn, @test_extq_i64 ( %zn, %zm) { ; CHECK-LABEL: test_extq_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: extq z0.b, z0.b, z1.b, #3 +; CHECK-NEXT: extq z0.b, z0.b, z1.b, #8 ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.extq.nxv2i64( %zn, %zm, i32 3) + %res = call @llvm.aarch64.sve.extq.nxv2i64( %zn, %zm, i32 1) ret %res } define @test_extq_f16( %zn, %zm) { ; CHECK-LABEL: test_extq_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: extq z0.b, z0.b, z1.b, #4 +; CHECK-NEXT: extq z0.b, z0.b, z1.b, #14 ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.extq.nxv8f16( %zn, %zm, i32 4) + %res = call @llvm.aarch64.sve.extq.nxv8f16( %zn, %zm, i32 7) ret %res } define @test_extq_f32( %zn, %zm) { ; CHECK-LABEL: test_extq_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: extq z0.b, z0.b, z1.b, #5 +; CHECK-NEXT: extq z0.b, z0.b, z1.b, #4 ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.extq.nxv4f32( %zn, %zm, i32 5) + %res = call @llvm.aarch64.sve.extq.nxv4f32( %zn, %zm, i32 1) ret %res } define @test_extq_f64( %zn, %zm) { ; CHECK-LABEL: test_extq_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: extq z0.b, z0.b, z1.b, #6 +; CHECK-NEXT: extq z0.b, z0.b, z1.b, #8 ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.extq.nxv2f64( %zn, %zm, i32 6) + %res = call @llvm.aarch64.sve.extq.nxv2f64( %zn, %zm, i32 1) ret %res } define @test_extq_bf16( %zn, %zm) { ; CHECK-LABEL: test_extq_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: extq z0.b, z0.b, z1.b, #15 +; CHECK-NEXT: extq z0.b, z0.b, z1.b, #6 ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.extq.nxv8bf16( %zn, %zm, i32 15) + %res = call @llvm.aarch64.sve.extq.nxv8bf16( %zn, %zm, i32 3) ret %res } diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index 9c2527ae4781bd..45b161d7959f4f 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -1,18 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn - -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX7LESS,GFX7LESS_ITERATIVE %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX8,GFX8_ITERATIVE %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_ITERATIVE %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064,GFX1064_ITERATIVE %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032,GFX1032_ITERATIVE %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164,GFX1164_ITERATIVE %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132,GFX1132_ITERATIVE %s -; RUN: llc -mtriple=amdgcn - -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX7LESS,GFX7LESS_DPP %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX8,GFX8_DPP %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_DPP %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064,GFX1064_DPP %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032,GFX1032_DPP %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164,GFX1164_DPP %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132,GFX1132_DPP %s +; RUN: llc -mtriple=amdgcn - -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX7LESS,GFX7LESS_ITERATIVE %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX8,GFX8_ITERATIVE %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_ITERATIVE %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064,GFX1064_ITERATIVE %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032,GFX1032_ITERATIVE %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164,GFX1164_ITERATIVE %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132,GFX1132_ITERATIVE %s +; RUN: llc -mtriple=amdgcn - -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX7LESS,GFX7LESS_DPP %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX8,GFX8_DPP %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_DPP %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064,GFX1064_DPP %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032,GFX1032_DPP %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164,GFX1164_DPP %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132,GFX1132_DPP %s declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll index e4ffedd686ac93..fe32a342f6df2d 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -1,23 +1,23 @@ -; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefix=CI -check-prefix=GCN %s -; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri | FileCheck -check-prefix=CI -check-prefix=GCN %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-xnack | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s -; RUN: llc < %s -mtriple=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s -; RUN: llc < %s -mtriple=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=carrizo -mattr=-xnack | FileCheck -check-prefixes=VI-NOXNACK,GCN %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=stoney -mattr=-xnack | FileCheck -check-prefixes=VI-NOXNACK,GCN %s -; RUN: llc < %s -mtriple=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s -; RUN: llc < %s -mtriple=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=carrizo -mattr=+xnack | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=stoney -mattr=+xnack | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck -check-prefixes=GCN %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch | FileCheck -check-prefixes=GCN %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch | FileCheck -check-prefixes=GCN %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s ; GCN-LABEL: {{^}}no_vcc_no_flat: diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index 8290942e46e6a1..97d642b991f705 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9-PAL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1010-PAL %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1030-PAL %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-PAL %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX12-PAL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX10 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX11 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX9-PAL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -mattr=-promote-alloca < %s | FileCheck -check-prefixes=GFX940 %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1010-PAL %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1030-PAL %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX11-PAL %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX12-PAL %s define amdgpu_kernel void @zero_init_kernel() { ; GFX9-LABEL: zero_init_kernel: diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll index 6727ebd10b92d5..e674b57aae3efc 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN1 %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN2 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN3 %s +; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefixes=GCN1 %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN2 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN3 %s define amdgpu_kernel void @atomic_add_i32_offset(ptr %out, i32 %in) { ; GCN1-LABEL: atomic_add_i32_offset: diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll index 9b11929de2c910..1311560715ddd7 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN1 %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN2 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN3 %s +; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefixes=GCN1 %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN2 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN3 %s ; --------------------------------------------------------------------- ; atomicrmw xchg diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll index f5433ca4da4cd4..b4d7ff8e7c526e 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN1 %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN2 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN1 %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN2 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s define amdgpu_kernel void @atomic_add_i64_offset(ptr %out, i64 %in) { ; GCN1-LABEL: atomic_add_i64_offset: diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll index 2989f08ac56e7b..36bddb7ac2fd68 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN1 %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN2 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN3 %s +; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN1 %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN2 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN3 %s ; --------------------------------------------------------------------- ; atomicrmw xchg diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll index 604fc732e7e1cd..fe47461ebf9569 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GFX7 %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s ; --------------------------------------------------------------------- ; atomicrmw xchg diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum.ll b/llvm/test/CodeGen/AMDGPU/fmaximum.ll index 11b7c51ef2d516..fe8150b3c21c4f 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-SDAG %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL %s define amdgpu_ps float @test_fmaximum_f32_vv(float %a, float %b) { ; GCN-LABEL: test_fmaximum_f32_vv: diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll index 08122cd0d89eab..209ae86b4dedce 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll @@ -14,19 +14,26 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) ret float %max1 @@ -43,19 +50,26 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v2, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v2, v0, v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %c, float %max0) ret float %max1 @@ -70,21 +84,30 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre ; GFX12-NEXT: v_readfirstlane_b32 s0, v0 ; GFX12-NEXT: ; return to shader part epilog ; -; GFX9-LABEL: s_fmaximum3_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: v_max_f32_e32 v1, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_max_f32_e32 v1, s2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: ; return to shader part epilog +; GFX940-LABEL: s_fmaximum3_f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: v_mov_b32_e32 v0, s1 +; GFX940-NEXT: v_max_f32_e32 v1, s0, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_max_f32_e32 v1, s2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_readfirstlane_b32 s0, v0 +; GFX940-NEXT: ; return to shader part epilog +; +; GFX950-LABEL: s_fmaximum3_f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s2, s2 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_readfirstlane_b32 s0, v0 +; GFX950-NEXT: ; return to shader part epilog %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) %cast = bitcast float %max1 to i32 @@ -103,19 +126,26 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, |v0|, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, |v0|, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, |v0|, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, |v0|, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %max0 = call float @llvm.maximum.f32(float %a.fabs, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) @@ -133,19 +163,26 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, |v1|, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, v0, |v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, v0, |v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v1|, |v1| +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %b.fabs = call float @llvm.fabs.f32(float %b) %max0 = call float @llvm.maximum.f32(float %a, float %b.fabs) %max1 = call float @llvm.maximum.f32(float %max0, float %c) @@ -163,19 +200,26 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %c.fabs = call float @llvm.fabs.f32(float %c) %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c.fabs) @@ -193,19 +237,26 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v1| +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %c.fabs = call float @llvm.fabs.f32(float %c) @@ -225,19 +276,26 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v1, -v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, -v2, -v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg float %a %b.fneg = fneg float %b %c.fneg = fneg float %c @@ -257,19 +315,26 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg_fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, -|v0|, -|v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, -|v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg_fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, -|v0|, -|v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, -|v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg_fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v1| +; GFX950-NEXT: v_maximum3_f32 v0, v0, -|v2|, -|v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %c.fabs = call float @llvm.fabs.f32(float %c) @@ -292,19 +357,26 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, -v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, -v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, -v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg float %a %max0 = call float @llvm.maximum.f32(float %a.fneg, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) @@ -322,19 +394,26 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, -v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, v0, -v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, v0, -v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, -v1, -v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %b.fneg = fneg float %b %max0 = call float @llvm.maximum.f32(float %a, float %b.fneg) %max1 = call float @llvm.maximum.f32(float %max0, float %c) @@ -352,19 +431,26 @@ define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, -v2, -v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %c.fneg = fneg float %c %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c.fneg) @@ -382,19 +468,27 @@ define float @v_fmaximum3_f32_const0(float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, 0x41000000, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_const0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v2, 0x41000000, v0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_const0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v2, 0x41000000, v0 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_const0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float 8.0, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) ret float %max1 @@ -411,19 +505,27 @@ define float @v_fmaximum3_f32__const2(float %a, float %b) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 0x41000000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32__const2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32__const2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v1, 0x41000000, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32__const2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float 8.0) ret float %max1 @@ -440,19 +542,26 @@ define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, 4.0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_inlineimm0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_inlineimm0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v0 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_inlineimm0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float 4.0, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) ret float %max1 @@ -469,19 +578,26 @@ define float @v_fmaximum3_f32__inlineimm(float %a, float %b) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32__inlineimm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v1, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32__inlineimm: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v1, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32__inlineimm: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float 4.0) ret float %max1 @@ -500,19 +616,28 @@ define float @v_fmaximum3_f32_const1_const2(float %a) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, s0, 0x41800000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_const1_const2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_max_f32_e32 v1, 0x41800000, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_const1_const2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v1, 0x41000000, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_max_f32_e32 v1, 0x41800000, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_const1_const2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_mov_b32 s0, 0x41800000 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float 8.0) %max1 = call float @llvm.maximum.f32(float %max0, float 16.0) ret float %max1 @@ -530,27 +655,36 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float ; GFX12-NEXT: v_maximum3_f32 v1, v5, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v6, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v4, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v5, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v6, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v4, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v5, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v0, v4, v0, v0 +; GFX950-NEXT: v_maximum3_f32 v1, v5, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %c, <2 x float> %max0) ret <2 x float> %max1 @@ -568,27 +702,36 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 ; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v6, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v0, v4 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v1, v5 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v6, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v0, v4 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v1, v5 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c) ret <2 x float> %max1 @@ -606,27 +749,36 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v5| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32__fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v6, |v1|, |v3| -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| -; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v2| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v2, v0, |v4| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_max_f32_e64 v2, v1, |v5| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32__fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v6, |v1|, |v3| +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| +; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v2| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v2, v0, |v4| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_max_f32_e64 v2, v1, |v5| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32__fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v3| +; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v2|, |v2| +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v4|, |v4| +; GFX950-NEXT: v_maximum3_f32 v1, v1, |v5|, |v5| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) %b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b) %c.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %c) @@ -647,27 +799,36 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v1, -v1, -v3, -v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32__fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v6, -v1, -v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 -; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v2, v0, -v4 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_max_f32_e64 v2, v1, -v5 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32__fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v6, -v1, -v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 +; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v2, v0, -v4 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_max_f32_e64 v2, v1, -v5 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32__fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v3, -v3 +; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v2, -v2 +; GFX950-NEXT: v_maximum3_f32 v0, v0, -v4, -v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, -v5, -v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg <2 x float> %a %b.fneg = fneg <2 x float> %b %c.fneg = fneg <2 x float> %c @@ -688,27 +849,36 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c ; GFX12-NEXT: v_maximum3_f32 v1, v1, 2.0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v1 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX9-NEXT: v_max_f32_e32 v4, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_max_f32_e32 v2, v1, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v4, 2.0, v1 +; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX940-NEXT: v_max_f32_e32 v4, 2.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX940-NEXT: v_max_f32_e32 v4, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: v_max_f32_e32 v2, v1, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, 2.0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, 2.0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> ) %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c) ret <2 x float> %max1 @@ -726,27 +896,36 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b ; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v4, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v4, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_maximum3_f32 v1, v1, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> ) ret <2 x float> %max1 @@ -765,35 +944,46 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float ; GFX12-NEXT: v_maximum3_f32 v2, v8, v2, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v9, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_max_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v6, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v7, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v8, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v9, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v6, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v7, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v8, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v6, v0, v0 +; GFX950-NEXT: v_maximum3_f32 v1, v7, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v2, v8, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %c, <3 x float> %max0) ret <3 x float> %max1 @@ -812,35 +1002,46 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 ; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v9, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_max_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v0, v6 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v1, v7 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v2, v8 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v9, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v0, v6 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v1, v7 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v2, v8 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v6, v6 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v7, v7 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v8, v8 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c) ret <3 x float> %max1 @@ -859,35 +1060,46 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v8| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32__fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v9, |v2|, |v5| -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| -; GFX9-NEXT: v_max_f32_e64 v5, |v1|, |v4| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| -; GFX9-NEXT: v_max_f32_e64 v4, |v0|, |v3| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v0, |v6| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v1, |v7| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v2, |v8| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32__fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v9, |v2|, |v5| +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| +; GFX940-NEXT: v_max_f32_e64 v5, |v1|, |v4| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| +; GFX940-NEXT: v_max_f32_e64 v4, |v0|, |v3| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v0, |v6| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v1, |v7| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v2, |v8| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32__fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v5| +; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v4|, |v4| +; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v3|, |v3| +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v6|, |v6| +; GFX950-NEXT: v_maximum3_f32 v1, v1, |v7|, |v7| +; GFX950-NEXT: v_maximum3_f32 v2, v2, |v8|, |v8| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a) %b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b) %c.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %c) @@ -909,35 +1121,46 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v2, -v2, -v5, -v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32__fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v9, -v2, -v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 -; GFX9-NEXT: v_max_f32_e64 v5, -v1, -v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 -; GFX9-NEXT: v_max_f32_e64 v4, -v0, -v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v0, -v6 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v1, -v7 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v2, -v8 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32__fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v9, -v2, -v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 +; GFX940-NEXT: v_max_f32_e64 v5, -v1, -v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 +; GFX940-NEXT: v_max_f32_e64 v4, -v0, -v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v0, -v6 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v1, -v7 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v2, -v8 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32__fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, -v2, -v5, -v5 +; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v4, -v4 +; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v3, -v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, -v6, -v6 +; GFX950-NEXT: v_maximum3_f32 v1, v1, -v7, -v7 +; GFX950-NEXT: v_maximum3_f32 v2, v2, -v8, -v8 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg <3 x float> %a %b.fneg = fneg <3 x float> %b %c.fneg = fneg <3 x float> %c @@ -959,35 +1182,46 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c ; GFX12-NEXT: v_maximum3_f32 v2, v2, 2.0, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v2 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX9-NEXT: v_max_f32_e32 v6, v0, v3 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v2, v5 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v2 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX940-NEXT: v_max_f32_e32 v6, v0, v3 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: v_max_f32_e32 v3, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v2, v5 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, v2, 2.0, 2.0 +; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, 2.0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, 2.0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> ) %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c) ret <3 x float> %max1 @@ -1006,35 +1240,46 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b ; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v6, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_max_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc -; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v6, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc +; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_maximum3_f32 v1, v1, 4.0, 4.0 +; GFX950-NEXT: v_maximum3_f32 v2, v2, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> ) ret <3 x float> %max1 @@ -3165,19 +3410,26 @@ define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c) ; GFX12-NEXT: v_maximum_f32 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_no_fmaximum3_f32__multi_use: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_no_fmaximum3_f32__multi_use: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_no_fmaximum3_f32__multi_use: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v1, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) %insert.0 = insertelement <2 x float> poison, float %max0, i32 0 @@ -3193,22 +3445,31 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float ; GFX12-NEXT: s_maximum_f32 s1, s0, s2 ; GFX12-NEXT: ; return to shader part epilog ; -; GFX9-LABEL: s_no_fmaximum3_f32__multi_use: -; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: v_max_f32_e32 v1, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_max_f32_e32 v1, s2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 -; GFX9-NEXT: ; return to shader part epilog +; GFX940-LABEL: s_no_fmaximum3_f32__multi_use: +; GFX940: ; %bb.0: +; GFX940-NEXT: v_mov_b32_e32 v0, s1 +; GFX940-NEXT: v_max_f32_e32 v1, s0, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_max_f32_e32 v1, s2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX940-NEXT: v_readfirstlane_b32 s0, v0 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_readfirstlane_b32 s1, v1 +; GFX940-NEXT: ; return to shader part epilog +; +; GFX950-LABEL: s_no_fmaximum3_f32__multi_use: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1 +; GFX950-NEXT: v_maximum3_f32 v1, v0, s2, s2 +; GFX950-NEXT: v_readfirstlane_b32 s0, v0 +; GFX950-NEXT: v_readfirstlane_b32 s1, v1 +; GFX950-NEXT: ; return to shader part epilog %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) %cast0 = bitcast float %max0 to i32 @@ -3372,6 +3633,3 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double %insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1 ret <2 x double> %insert.1 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX940: {{.*}} -; GFX950: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll index 38640a18b5aee6..09483194f08892 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_on: ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}} diff --git a/llvm/test/CodeGen/AMDGPU/fminimum.ll b/llvm/test/CodeGen/AMDGPU/fminimum.ll index 3bd82eca8ce955..ba536aade8c49c 100644 --- a/llvm/test/CodeGen/AMDGPU/fminimum.ll +++ b/llvm/test/CodeGen/AMDGPU/fminimum.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-SDAG %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL %s define amdgpu_ps float @test_fminimum_f32_vv(float %a, float %b) { ; GCN-LABEL: test_fminimum_f32_vv: diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll index 43293512c8c21d..000f6c190b9773 100644 --- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll @@ -14,19 +14,26 @@ define float @v_fminimum3_f32(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) ret float %max1 @@ -43,19 +50,26 @@ define float @v_fminimum3_f32_commute(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v2, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v2, v0, v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %c, float %max0) ret float %max1 @@ -70,21 +84,30 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre ; GFX12-NEXT: v_readfirstlane_b32 s0, v0 ; GFX12-NEXT: ; return to shader part epilog ; -; GFX9-LABEL: s_fminimum3_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: v_min_f32_e32 v1, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_min_f32_e32 v1, s2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: ; return to shader part epilog +; GFX940-LABEL: s_fminimum3_f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: v_mov_b32_e32 v0, s1 +; GFX940-NEXT: v_min_f32_e32 v1, s0, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_min_f32_e32 v1, s2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_readfirstlane_b32 s0, v0 +; GFX940-NEXT: ; return to shader part epilog +; +; GFX950-LABEL: s_fminimum3_f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s1, s1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s2, s2 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_readfirstlane_b32 s0, v0 +; GFX950-NEXT: ; return to shader part epilog %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) %cast = bitcast float %max1 to i32 @@ -103,19 +126,26 @@ define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, |v0|, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fabs0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, |v0|, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fabs0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, |v0|, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fabs0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, |v0|, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %max0 = call float @llvm.minimum.f32(float %a.fabs, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) @@ -133,19 +163,26 @@ define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, |v1|, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fabs1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, v0, |v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fabs1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, v0, |v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fabs1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, |v1|, |v1| +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %b.fabs = call float @llvm.fabs.f32(float %b) %max0 = call float @llvm.minimum.f32(float %a, float %b.fabs) %max1 = call float @llvm.minimum.f32(float %max0, float %c) @@ -163,19 +200,26 @@ define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fabs2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v1, v0, |v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fabs2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v1, v0, |v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fabs2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, |v2|, |v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %c.fabs = call float @llvm.fabs.f32(float %c) %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c.fabs) @@ -193,19 +237,26 @@ define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v1, v0, |v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, |v0|, |v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v1, v0, |v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v1| +; GFX950-NEXT: v_minimum3_f32 v0, v0, |v2|, |v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %c.fabs = call float @llvm.fabs.f32(float %c) @@ -225,19 +276,26 @@ define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -v0, -v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v1, v0, -v2 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, -v0, -v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v1, v0, -v2 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v1, -v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, -v2, -v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg float %a %b.fneg = fneg float %b %c.fneg = fneg float %c @@ -257,19 +315,26 @@ define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fneg_fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, -|v0|, -|v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v1, v0, -|v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fneg_fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, -|v0|, -|v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v1, v0, -|v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fneg_fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v1| +; GFX950-NEXT: v_minimum3_f32 v0, v0, -|v2|, -|v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %c.fabs = call float @llvm.fabs.f32(float %c) @@ -292,19 +357,26 @@ define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fneg0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, -v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fneg0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, -v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fneg0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, -v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg float %a %max0 = call float @llvm.minimum.f32(float %a.fneg, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) @@ -322,19 +394,26 @@ define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, -v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fneg1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, v0, -v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fneg1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, v0, -v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fneg1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, -v1, -v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %b.fneg = fneg float %b %max0 = call float @llvm.minimum.f32(float %a, float %b.fneg) %max1 = call float @llvm.minimum.f32(float %max0, float %c) @@ -352,19 +431,26 @@ define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fneg2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v1, v0, -v2 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fneg2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v1, v0, -v2 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fneg2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, -v2, -v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %c.fneg = fneg float %c %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c.fneg) @@ -382,19 +468,27 @@ define float @v_fminimum3_f32_const0(float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, 0x41000000, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_const0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v2, 0x41000000, v0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_const0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v2, 0x41000000, v0 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_const0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float 8.0, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) ret float %max1 @@ -411,19 +505,27 @@ define float @v_fminimum3_f32__const2(float %a, float %b) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 0x41000000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32__const2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v1, 0x41000000, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32__const2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v1, 0x41000000, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32__const2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float 8.0) ret float %max1 @@ -440,19 +542,26 @@ define float @v_fminimum3_f32_inlineimm0(float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, 4.0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_inlineimm0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_inlineimm0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v0 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_inlineimm0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float 4.0, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) ret float %max1 @@ -469,19 +578,26 @@ define float @v_fminimum3_f32__inlineimm(float %a, float %b) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32__inlineimm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v1, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32__inlineimm: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v1, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32__inlineimm: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float 4.0) ret float %max1 @@ -500,19 +616,28 @@ define float @v_fminimum3_f32_const1_const2(float %a) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, s0, 0x41800000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_const1_const2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v1, 0x41000000, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_min_f32_e32 v1, 0x41800000, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_const1_const2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v1, 0x41000000, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_min_f32_e32 v1, 0x41800000, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_const1_const2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_mov_b32 s0, 0x41800000 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float 8.0) %max1 = call float @llvm.minimum.f32(float %max0, float 16.0) ret float %max1 @@ -530,27 +655,36 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float ; GFX12-NEXT: v_minimum3_f32 v1, v5, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v6, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_min_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v4, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v5, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v6, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_min_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v4, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v5, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v0, v4, v0, v0 +; GFX950-NEXT: v_minimum3_f32 v1, v5, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %c, <2 x float> %max0) ret <2 x float> %max1 @@ -568,27 +702,36 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 ; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v6, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_min_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v0, v4 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v1, v5 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v6, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_min_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v0, v4 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v1, v5 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c) ret <2 x float> %max1 @@ -606,27 +749,36 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v5| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32__fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v6, |v1|, |v3| -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| -; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v2| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v2, v0, |v4| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_min_f32_e64 v2, v1, |v5| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32__fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v6, |v1|, |v3| +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| +; GFX940-NEXT: v_min_f32_e64 v3, |v0|, |v2| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v2, v0, |v4| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_min_f32_e64 v2, v1, |v5| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32__fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v3| +; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v2|, |v2| +; GFX950-NEXT: v_minimum3_f32 v0, v0, |v4|, |v4| +; GFX950-NEXT: v_minimum3_f32 v1, v1, |v5|, |v5| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) %b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b) %c.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %c) @@ -647,27 +799,36 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v1, -v1, -v3, -v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32__fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v6, -v1, -v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 -; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v2, v0, -v4 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_min_f32_e64 v2, v1, -v5 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32__fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v6, -v1, -v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 +; GFX940-NEXT: v_min_f32_e64 v3, -v0, -v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v2, v0, -v4 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_min_f32_e64 v2, v1, -v5 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32__fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v3, -v3 +; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v2, -v2 +; GFX950-NEXT: v_minimum3_f32 v0, v0, -v4, -v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, -v5, -v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg <2 x float> %a %b.fneg = fneg <2 x float> %b %c.fneg = fneg <2 x float> %c @@ -688,27 +849,36 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c ; GFX12-NEXT: v_minimum3_f32 v1, v1, 2.0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32__inlineimm1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v1 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX9-NEXT: v_min_f32_e32 v4, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_min_f32_e32 v2, v1, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32__inlineimm1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v4, 2.0, v1 +; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX940-NEXT: v_min_f32_e32 v4, 2.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX940-NEXT: v_min_f32_e32 v4, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: v_min_f32_e32 v2, v1, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32__inlineimm1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, 2.0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, 2.0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> ) %max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c) ret <2 x float> %max1 @@ -726,27 +896,36 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b ; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32__inlineimm2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v4, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_min_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32__inlineimm2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v4, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_min_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32__inlineimm2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_minimum3_f32 v1, v1, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> ) ret <2 x float> %max1 @@ -765,35 +944,46 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float ; GFX12-NEXT: v_minimum3_f32 v2, v8, v2, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v9, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_min_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_min_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v6, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v7, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v8, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v9, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_min_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_min_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v6, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v7, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v8, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v6, v0, v0 +; GFX950-NEXT: v_minimum3_f32 v1, v7, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v2, v8, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %c, <3 x float> %max0) ret <3 x float> %max1 @@ -812,35 +1002,46 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 ; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v9, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_min_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_min_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v0, v6 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v1, v7 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v2, v8 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v9, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_min_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_min_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v0, v6 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v1, v7 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v2, v8 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v6, v6 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v7, v7 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v8, v8 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c) ret <3 x float> %max1 @@ -859,35 +1060,46 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v8| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32__fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v9, |v2|, |v5| -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| -; GFX9-NEXT: v_min_f32_e64 v5, |v1|, |v4| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| -; GFX9-NEXT: v_min_f32_e64 v4, |v0|, |v3| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v0, |v6| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v1, |v7| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v2, |v8| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32__fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v9, |v2|, |v5| +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| +; GFX940-NEXT: v_min_f32_e64 v5, |v1|, |v4| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| +; GFX940-NEXT: v_min_f32_e64 v4, |v0|, |v3| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v0, |v6| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v1, |v7| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v2, |v8| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32__fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v5| +; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v4|, |v4| +; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v3|, |v3| +; GFX950-NEXT: v_minimum3_f32 v0, v0, |v6|, |v6| +; GFX950-NEXT: v_minimum3_f32 v1, v1, |v7|, |v7| +; GFX950-NEXT: v_minimum3_f32 v2, v2, |v8|, |v8| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a) %b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b) %c.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %c) @@ -909,35 +1121,46 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v2, -v2, -v5, -v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32__fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v9, -v2, -v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 -; GFX9-NEXT: v_min_f32_e64 v5, -v1, -v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 -; GFX9-NEXT: v_min_f32_e64 v4, -v0, -v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v0, -v6 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v1, -v7 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v2, -v8 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32__fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v9, -v2, -v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 +; GFX940-NEXT: v_min_f32_e64 v5, -v1, -v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 +; GFX940-NEXT: v_min_f32_e64 v4, -v0, -v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v0, -v6 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v1, -v7 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v2, -v8 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32__fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, -v2, -v5, -v5 +; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v4, -v4 +; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v3, -v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, -v6, -v6 +; GFX950-NEXT: v_minimum3_f32 v1, v1, -v7, -v7 +; GFX950-NEXT: v_minimum3_f32 v2, v2, -v8, -v8 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg <3 x float> %a %b.fneg = fneg <3 x float> %b %c.fneg = fneg <3 x float> %c @@ -959,35 +1182,46 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c ; GFX12-NEXT: v_minimum3_f32 v2, v2, 2.0, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32__inlineimm1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v2 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX9-NEXT: v_min_f32_e32 v6, v0, v3 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_min_f32_e32 v3, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v2, v5 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32__inlineimm1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v2 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX940-NEXT: v_min_f32_e32 v6, v0, v3 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: v_min_f32_e32 v3, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v2, v5 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32__inlineimm1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, v2, 2.0, 2.0 +; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, 2.0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, 2.0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> ) %max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c) ret <3 x float> %max1 @@ -1006,35 +1240,46 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b ; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32__inlineimm2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v6, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_min_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_min_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc -; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32__inlineimm2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v6, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_min_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_min_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc +; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32__inlineimm2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_minimum3_f32 v1, v1, 4.0, 4.0 +; GFX950-NEXT: v_minimum3_f32 v2, v2, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> ) ret <3 x float> %max1 @@ -3165,19 +3410,26 @@ define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c) ; GFX12-NEXT: v_minimum_f32 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_no_fminimum3_f32__multi_use: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_no_fminimum3_f32__multi_use: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_no_fminimum3_f32__multi_use: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v1, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) %insert.0 = insertelement <2 x float> poison, float %max0, i32 0 @@ -3193,22 +3445,31 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float ; GFX12-NEXT: s_minimum_f32 s1, s0, s2 ; GFX12-NEXT: ; return to shader part epilog ; -; GFX9-LABEL: s_no_fminimum3_f32__multi_use: -; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: v_min_f32_e32 v1, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_min_f32_e32 v1, s2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 -; GFX9-NEXT: ; return to shader part epilog +; GFX940-LABEL: s_no_fminimum3_f32__multi_use: +; GFX940: ; %bb.0: +; GFX940-NEXT: v_mov_b32_e32 v0, s1 +; GFX940-NEXT: v_min_f32_e32 v1, s0, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_min_f32_e32 v1, s2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX940-NEXT: v_readfirstlane_b32 s0, v0 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_readfirstlane_b32 s1, v1 +; GFX940-NEXT: ; return to shader part epilog +; +; GFX950-LABEL: s_no_fminimum3_f32__multi_use: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s1, s1 +; GFX950-NEXT: v_minimum3_f32 v1, v0, s2, s2 +; GFX950-NEXT: v_readfirstlane_b32 s0, v0 +; GFX950-NEXT: v_readfirstlane_b32 s1, v1 +; GFX950-NEXT: ; return to shader part epilog %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) %cast0 = bitcast float %max0 to i32 @@ -3372,6 +3633,3 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double %insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1 ret <2 x double> %insert.1 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX940: {{.*}} -; GFX950: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.ll b/llvm/test/CodeGen/AMDGPU/fminnum.ll index 65b311845a6b77..536c67561a0539 100644 --- a/llvm/test/CodeGen/AMDGPU/fminnum.ll +++ b/llvm/test/CodeGen/AMDGPU/fminnum.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_fmin_f32_ieee_mode_on: ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}} diff --git a/llvm/test/CodeGen/AMDGPU/fmul.ll b/llvm/test/CodeGen/AMDGPU/fmul.ll index cedf7c43ff7cf8..63dc674acd9e14 100644 --- a/llvm/test/CodeGen/AMDGPU/fmul.ll +++ b/llvm/test/CodeGen/AMDGPU/fmul.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fmul_f32: diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll index ab380dbef107ad..d8ea0ddf77b7a1 100644 --- a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-SDAG -; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-GISEL +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 | FileCheck %s -check-prefix=GFX950-SDAG +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 | FileCheck %s -check-prefix=GFX950-GISEL declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg) declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/global-alias.ll b/llvm/test/CodeGen/AMDGPU/global-alias.ll index 6e70664bad0ae1..4872a24002ddb2 100644 --- a/llvm/test/CodeGen/AMDGPU/global-alias.ll +++ b/llvm/test/CodeGen/AMDGPU/global-alias.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=amdgcn %s -o - | FileCheck %s @foo_a = alias void (ptr), ptr @foo @bar_a = alias void (ptr), ptr @foo_a diff --git a/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll b/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll index b38758bae537d8..f7437de216cc2b 100644 --- a/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll +++ b/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -memdep-block-scan-limit=1 -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -memdep-block-scan-limit=1 -amdgpu-scalarize-global-loads < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}unknown_memdep_analysis: ; GCN: flat_load_dword diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll index 30ae461d5de5ad..fbe06b3651b06c 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7LESS %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132 %s -; RUN: llc -mtriple=amdgcn -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7LESS-DPP %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-DPP %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064-DPP %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032-DPP %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164-DPP %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132-DPP %s +; RUN: llc -mtriple=amdgcn -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefix=GFX7LESS %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=Iterative < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132 %s +; RUN: llc -mtriple=amdgcn -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefix=GFX7LESS-DPP %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-DPP %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064-DPP %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032-DPP %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164-DPP %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132-DPP %s declare float @div.float.value() declare double @div.double.value() diff --git a/llvm/test/CodeGen/AMDGPU/idot2.ll b/llvm/test/CodeGen/AMDGPU/idot2.ll index 013eb8f59bbb9c..b443e654350c5e 100644 --- a/llvm/test/CodeGen/AMDGPU/idot2.ll +++ b/llvm/test/CodeGen/AMDGPU/idot2.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NODL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-DL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-DL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefixes=GFX9-NODL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck --check-prefixes=GFX9-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 < %s | FileCheck --check-prefixes=GFX10-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck --check-prefixes=GFX10-DL %s ; add(mul(S0.x, S1.y), ; add (mul (S0.y, S1.y), S3)) -> v_dot2_{I|U}32_{I|U}16(S1, S2, S3) diff --git a/llvm/test/CodeGen/AMDGPU/idot4s.ll b/llvm/test/CodeGen/AMDGPU/idot4s.ll index 1b181643b3d469..dd29970af52fde 100644 --- a/llvm/test/CodeGen/AMDGPU/idot4s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4s.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-NODL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-DL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-NODL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 < %s | FileCheck -check-prefixes=GFX10-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefixes=GFX10-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-DL %s define amdgpu_kernel void @idot4_acc32(ptr addrspace(1) %src1, ; GFX7-LABEL: idot4_acc32: diff --git a/llvm/test/CodeGen/AMDGPU/idot4u.ll b/llvm/test/CodeGen/AMDGPU/idot4u.ll index 23e19bbe97153b..8f82348d350e0a 100644 --- a/llvm/test/CodeGen/AMDGPU/idot4u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4u.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-NODL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-DL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-NODL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 < %s | FileCheck -check-prefixes=GFX10-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefixes=GFX10-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-DL %s define amdgpu_kernel void @udot4_acc32(ptr addrspace(1) %src1, ; GFX7-LABEL: udot4_acc32: diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll index 25aa623295fe16..add62a5c39cb14 100644 --- a/llvm/test/CodeGen/AMDGPU/idot8s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-DL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL-XNACK %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL-XNACK %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL-NOXNACK %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL-NOXNACK %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 < %s | FileCheck -check-prefixes=GFX10-DL-XNACK %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefixes=GFX10-DL-XNACK %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10-DL-NOXNACK %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck -check-prefixes=GFX10-DL-NOXNACK %s define amdgpu_kernel void @idot8_acc32(ptr addrspace(1) %src1, ; GFX7-LABEL: idot8_acc32: diff --git a/llvm/test/CodeGen/AMDGPU/idot8u.ll b/llvm/test/CodeGen/AMDGPU/idot8u.ll index d8491f322e69a0..069bebdf3c469d 100644 --- a/llvm/test/CodeGen/AMDGPU/idot8u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8u.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-DL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 < %s | FileCheck -check-prefixes=GFX10-DL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefixes=GFX10-DL %s define amdgpu_kernel void @udot8_acc32(ptr addrspace(1) %src1, ; GFX7-LABEL: udot8_acc32: diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll index 6bafaad582901b..7912d1cf8dc0d1 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s define amdgpu_kernel void @float4_inselt(ptr addrspace(1) %out, <4 x float> %vec, i32 %sel) { ; GCN-LABEL: float4_inselt: diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index 3aa0437f0466e3..72cda5c718f5b2 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-flat-for-global,+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s -; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-flat-for-global,+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s ; FIXME: Broken on evergreen ; FIXME: For some reason the 8 and 16 vectors are being stored as diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll index 7c3c66d7ac727f..441f3ded8f7f2c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll @@ -39,11 +39,7 @@ define float @v_maximum_f32(float %src0, float %src1) { ; GFX950-LABEL: v_maximum_f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32: @@ -89,11 +85,17 @@ define float @v_maximum_f32__nnan(float %src0, float %src1) { ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32__nnan: ; GFX10: ; %bb.0: @@ -151,11 +153,7 @@ define float @v_maximum_f32__nsz(float %src0, float %src1) { ; GFX950-LABEL: v_maximum_f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32__nsz: @@ -201,11 +199,17 @@ define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) { ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -267,11 +271,7 @@ define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0 -; GFX950-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32__nnan_src0: @@ -344,11 +344,7 @@ define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1 -; GFX950-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32__nnan_src1: @@ -429,12 +425,8 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) { ; GFX950-LABEL: s_maximum_f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_mov_b32_e32 v0, s1 -; GFX950-NEXT: v_max_f32_e32 v1, s0, v0 -; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v0 ; GFX950-NEXT: ;;#ASMEND @@ -521,15 +513,8 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) { ; GFX950-LABEL: v_maximum_v2f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v4, v0, v2 -; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX950-NEXT: v_max_f32_e32 v2, v1, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f32: @@ -583,12 +568,19 @@ define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1) ; GFX8-NEXT: v_max_f32_e32 v1, v1, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v2f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v2f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v2f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f32__nnan: ; GFX10: ; %bb.0: @@ -657,15 +649,8 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) { ; GFX950-LABEL: v_maximum_v2f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v4, v0, v2 -; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX950-NEXT: v_max_f32_e32 v2, v1, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f32__nsz: @@ -719,12 +704,19 @@ define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr ; GFX8-NEXT: v_max_f32_e32 v1, v1, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v2f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v2f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v2f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -808,16 +800,10 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) { ; GFX950-LABEL: s_maximum_v2f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_mov_b32_e32 v0, s3 -; GFX950-NEXT: v_max_f32_e32 v1, s1, v0 -; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s1, v0 -; GFX950-NEXT: v_mov_b32_e32 v0, s2 -; GFX950-NEXT: v_max_f32_e32 v3, s0, v0 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX950-NEXT: v_mov_b32_e32 v0, s1 +; GFX950-NEXT: v_maximum3_f32 v1, v0, s3, s3 +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s2, s2 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND @@ -920,19 +906,9 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) { ; GFX950-LABEL: v_maximum_v3f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v6, v0, v3 -; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX950-NEXT: v_max_f32_e32 v3, v1, v4 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX950-NEXT: v_max_f32_e32 v3, v2, v5 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f32: @@ -995,13 +971,21 @@ define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1) ; GFX8-NEXT: v_max_f32_e32 v2, v2, v5 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v3f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v3f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX900-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v3f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f32__nnan: ; GFX10: ; %bb.0: @@ -1082,19 +1066,9 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) { ; GFX950-LABEL: v_maximum_v3f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v6, v0, v3 -; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX950-NEXT: v_max_f32_e32 v3, v1, v4 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX950-NEXT: v_max_f32_e32 v3, v2, v5 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f32__nsz: @@ -1157,13 +1131,21 @@ define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr ; GFX8-NEXT: v_max_f32_e32 v2, v2, v5 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v3f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v3f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX900-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v3f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -1253,23 +1235,10 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) { ; GFX950-LABEL: v_maximum_v4f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v8, v0, v4 -; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX950-NEXT: v_max_f32_e32 v4, v1, v5 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc -; GFX950-NEXT: v_max_f32_e32 v4, v2, v6 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc -; GFX950-NEXT: v_max_f32_e32 v4, v3, v7 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f32: @@ -1341,14 +1310,23 @@ define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1) ; GFX8-NEXT: v_max_f32_e32 v3, v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v4f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v5 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX9-NEXT: v_max_f32_e32 v3, v3, v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v4f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX900-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX900-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v4f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f32__nnan: ; GFX10: ; %bb.0: @@ -1440,23 +1418,10 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) { ; GFX950-LABEL: v_maximum_v4f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v8, v0, v4 -; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX950-NEXT: v_max_f32_e32 v4, v1, v5 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc -; GFX950-NEXT: v_max_f32_e32 v4, v2, v6 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc -; GFX950-NEXT: v_max_f32_e32 v4, v3, v7 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f32__nsz: @@ -1528,14 +1493,23 @@ define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr ; GFX8-NEXT: v_max_f32_e32 v3, v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v4f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v5 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX9-NEXT: v_max_f32_e32 v3, v3, v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v4f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX900-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX900-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v4f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -1663,39 +1637,14 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) { ; GFX950-LABEL: v_maximum_v8f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v16, v0, v8 -; GFX950-NEXT: v_mov_b32_e32 v17, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v8 -; GFX950-NEXT: v_max_f32_e32 v8, v1, v9 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v9 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v2, v10 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v10 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v3, v11 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v11 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v4, v12 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v12 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v5, v13 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v13 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v6, v14 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v14 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v7, v15 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v15 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v8, v8 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v9, v9 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v10, v10 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v11, v11 +; GFX950-NEXT: v_maximum3_f32 v4, v4, v12, v12 +; GFX950-NEXT: v_maximum3_f32 v5, v5, v13, v13 +; GFX950-NEXT: v_maximum3_f32 v6, v6, v14, v14 +; GFX950-NEXT: v_maximum3_f32 v7, v7, v15, v15 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v8f32: @@ -1980,64 +1929,23 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: scratch_load_dword v31, off, s32 -; GFX950-NEXT: v_mov_b32_e32 v32, 0x7fc00000 -; GFX950-NEXT: v_max_f32_e32 v33, v0, v16 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v16 -; GFX950-NEXT: v_max_f32_e32 v34, v1, v17 -; GFX950-NEXT: v_max_f32_e32 v35, v2, v18 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 -; GFX950-NEXT: v_max_f32_e32 v36, v3, v19 -; GFX950-NEXT: v_max_f32_e32 v37, v4, v20 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v18 -; GFX950-NEXT: v_max_f32_e32 v38, v5, v21 -; GFX950-NEXT: v_max_f32_e32 v39, v6, v22 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v19 -; GFX950-NEXT: v_max_f32_e32 v48, v7, v23 -; GFX950-NEXT: v_max_f32_e32 v49, v8, v24 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v20 -; GFX950-NEXT: v_max_f32_e32 v50, v9, v25 -; GFX950-NEXT: v_max_f32_e32 v51, v10, v26 -; GFX950-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v21 -; GFX950-NEXT: v_max_f32_e32 v52, v11, v27 -; GFX950-NEXT: v_max_f32_e32 v53, v12, v28 -; GFX950-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v22 -; GFX950-NEXT: v_max_f32_e32 v54, v13, v29 -; GFX950-NEXT: v_max_f32_e32 v55, v14, v30 -; GFX950-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v23 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v16, v16 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v17, v17 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v18, v18 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v19, v19 +; GFX950-NEXT: v_maximum3_f32 v4, v4, v20, v20 +; GFX950-NEXT: v_maximum3_f32 v5, v5, v21, v21 +; GFX950-NEXT: v_maximum3_f32 v6, v6, v22, v22 +; GFX950-NEXT: v_maximum3_f32 v7, v7, v23, v23 +; GFX950-NEXT: v_maximum3_f32 v8, v8, v24, v24 +; GFX950-NEXT: v_maximum3_f32 v9, v9, v25, v25 +; GFX950-NEXT: v_maximum3_f32 v10, v10, v26, v26 +; GFX950-NEXT: v_maximum3_f32 v11, v11, v27, v27 +; GFX950-NEXT: v_maximum3_f32 v12, v12, v28, v28 +; GFX950-NEXT: v_maximum3_f32 v13, v13, v29, v29 +; GFX950-NEXT: v_maximum3_f32 v14, v14, v30, v30 ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v16, v15, v31 -; GFX950-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v8, v24 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v9, v25 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v10, v26 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v11, v27 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v12, v28 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v13, v29 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v14, v30 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v15, v31 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc +; GFX950-NEXT: v_maximum3_f32 v15, v15, v31, v31 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v16f32: @@ -2176,3 +2084,4 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GCN: {{.*}} +; GFX9: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll index 777d5000a2c42f..14ab5bded2d3a3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll @@ -39,11 +39,7 @@ define float @v_minimum_f32(float %src0, float %src1) { ; GFX950-LABEL: v_minimum_f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32: @@ -89,11 +85,17 @@ define float @v_minimum_f32__nnan(float %src0, float %src1) { ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32__nnan: ; GFX10: ; %bb.0: @@ -151,11 +153,7 @@ define float @v_minimum_f32__nsz(float %src0, float %src1) { ; GFX950-LABEL: v_minimum_f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32__nsz: @@ -201,11 +199,17 @@ define float @v_minimum_f32__nnan_nsz(float %src0, float %src1) { ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -267,11 +271,7 @@ define float @v_minimum_f32__nnan_src0(float %arg0, float %src1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0 -; GFX950-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32__nnan_src0: @@ -344,11 +344,7 @@ define float @v_minimum_f32__nnan_src1(float %src0, float %arg1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1 -; GFX950-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32__nnan_src1: @@ -429,12 +425,8 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) { ; GFX950-LABEL: s_minimum_f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_mov_b32_e32 v0, s1 -; GFX950-NEXT: v_min_f32_e32 v1, s0, v0 -; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s1, s1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v0 ; GFX950-NEXT: ;;#ASMEND @@ -521,15 +513,8 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) { ; GFX950-LABEL: v_minimum_v2f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v4, v0, v2 -; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX950-NEXT: v_min_f32_e32 v2, v1, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f32: @@ -583,12 +568,19 @@ define <2 x float> @v_minimum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1) ; GFX8-NEXT: v_min_f32_e32 v1, v1, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v2f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v2f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v2f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f32__nnan: ; GFX10: ; %bb.0: @@ -657,15 +649,8 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) { ; GFX950-LABEL: v_minimum_v2f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v4, v0, v2 -; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX950-NEXT: v_min_f32_e32 v2, v1, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f32__nsz: @@ -719,12 +704,19 @@ define <2 x float> @v_minimum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr ; GFX8-NEXT: v_min_f32_e32 v1, v1, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v2f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v2f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v2f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -808,16 +800,10 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) { ; GFX950-LABEL: s_minimum_v2f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_mov_b32_e32 v0, s3 -; GFX950-NEXT: v_min_f32_e32 v1, s1, v0 -; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s1, v0 -; GFX950-NEXT: v_mov_b32_e32 v0, s2 -; GFX950-NEXT: v_min_f32_e32 v3, s0, v0 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX950-NEXT: v_mov_b32_e32 v0, s1 +; GFX950-NEXT: v_minimum3_f32 v1, v0, s3, s3 +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s2, s2 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND @@ -920,19 +906,9 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) { ; GFX950-LABEL: v_minimum_v3f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v6, v0, v3 -; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX950-NEXT: v_min_f32_e32 v3, v1, v4 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX950-NEXT: v_min_f32_e32 v3, v2, v5 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f32: @@ -995,13 +971,21 @@ define <3 x float> @v_minimum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1) ; GFX8-NEXT: v_min_f32_e32 v2, v2, v5 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v3f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v3f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX900-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v3f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f32__nnan: ; GFX10: ; %bb.0: @@ -1082,19 +1066,9 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) { ; GFX950-LABEL: v_minimum_v3f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v6, v0, v3 -; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX950-NEXT: v_min_f32_e32 v3, v1, v4 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX950-NEXT: v_min_f32_e32 v3, v2, v5 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f32__nsz: @@ -1157,13 +1131,21 @@ define <3 x float> @v_minimum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr ; GFX8-NEXT: v_min_f32_e32 v2, v2, v5 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v3f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v3f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX900-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v3f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -1253,23 +1235,10 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) { ; GFX950-LABEL: v_minimum_v4f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v8, v0, v4 -; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX950-NEXT: v_min_f32_e32 v4, v1, v5 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc -; GFX950-NEXT: v_min_f32_e32 v4, v2, v6 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc -; GFX950-NEXT: v_min_f32_e32 v4, v3, v7 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f32: @@ -1341,14 +1310,23 @@ define <4 x float> @v_minimum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1) ; GFX8-NEXT: v_min_f32_e32 v3, v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v4f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v5 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX9-NEXT: v_min_f32_e32 v3, v3, v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v4f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX900-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX900-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v4f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f32__nnan: ; GFX10: ; %bb.0: @@ -1440,23 +1418,10 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) { ; GFX950-LABEL: v_minimum_v4f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v8, v0, v4 -; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX950-NEXT: v_min_f32_e32 v4, v1, v5 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc -; GFX950-NEXT: v_min_f32_e32 v4, v2, v6 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc -; GFX950-NEXT: v_min_f32_e32 v4, v3, v7 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f32__nsz: @@ -1528,14 +1493,23 @@ define <4 x float> @v_minimum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr ; GFX8-NEXT: v_min_f32_e32 v3, v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v4f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v5 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX9-NEXT: v_min_f32_e32 v3, v3, v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v4f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX900-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX900-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v4f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -1663,39 +1637,14 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) { ; GFX950-LABEL: v_minimum_v8f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v16, v0, v8 -; GFX950-NEXT: v_mov_b32_e32 v17, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v8 -; GFX950-NEXT: v_min_f32_e32 v8, v1, v9 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v9 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v2, v10 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v10 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v3, v11 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v11 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v4, v12 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v12 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v5, v13 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v13 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v6, v14 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v14 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v7, v15 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v15 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v8, v8 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v9, v9 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v10, v10 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v11, v11 +; GFX950-NEXT: v_minimum3_f32 v4, v4, v12, v12 +; GFX950-NEXT: v_minimum3_f32 v5, v5, v13, v13 +; GFX950-NEXT: v_minimum3_f32 v6, v6, v14, v14 +; GFX950-NEXT: v_minimum3_f32 v7, v7, v15, v15 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v8f32: @@ -1980,64 +1929,23 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: scratch_load_dword v31, off, s32 -; GFX950-NEXT: v_mov_b32_e32 v32, 0x7fc00000 -; GFX950-NEXT: v_min_f32_e32 v33, v0, v16 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v16 -; GFX950-NEXT: v_min_f32_e32 v34, v1, v17 -; GFX950-NEXT: v_min_f32_e32 v35, v2, v18 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 -; GFX950-NEXT: v_min_f32_e32 v36, v3, v19 -; GFX950-NEXT: v_min_f32_e32 v37, v4, v20 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v18 -; GFX950-NEXT: v_min_f32_e32 v38, v5, v21 -; GFX950-NEXT: v_min_f32_e32 v39, v6, v22 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v19 -; GFX950-NEXT: v_min_f32_e32 v48, v7, v23 -; GFX950-NEXT: v_min_f32_e32 v49, v8, v24 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v20 -; GFX950-NEXT: v_min_f32_e32 v50, v9, v25 -; GFX950-NEXT: v_min_f32_e32 v51, v10, v26 -; GFX950-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v21 -; GFX950-NEXT: v_min_f32_e32 v52, v11, v27 -; GFX950-NEXT: v_min_f32_e32 v53, v12, v28 -; GFX950-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v22 -; GFX950-NEXT: v_min_f32_e32 v54, v13, v29 -; GFX950-NEXT: v_min_f32_e32 v55, v14, v30 -; GFX950-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v23 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v16, v16 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v17, v17 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v18, v18 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v19, v19 +; GFX950-NEXT: v_minimum3_f32 v4, v4, v20, v20 +; GFX950-NEXT: v_minimum3_f32 v5, v5, v21, v21 +; GFX950-NEXT: v_minimum3_f32 v6, v6, v22, v22 +; GFX950-NEXT: v_minimum3_f32 v7, v7, v23, v23 +; GFX950-NEXT: v_minimum3_f32 v8, v8, v24, v24 +; GFX950-NEXT: v_minimum3_f32 v9, v9, v25, v25 +; GFX950-NEXT: v_minimum3_f32 v10, v10, v26, v26 +; GFX950-NEXT: v_minimum3_f32 v11, v11, v27, v27 +; GFX950-NEXT: v_minimum3_f32 v12, v12, v28, v28 +; GFX950-NEXT: v_minimum3_f32 v13, v13, v29, v29 +; GFX950-NEXT: v_minimum3_f32 v14, v14, v30, v30 ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v16, v15, v31 -; GFX950-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v8, v24 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v9, v25 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v10, v26 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v11, v27 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v12, v28 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v13, v29 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v14, v30 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v15, v31 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc +; GFX950-NEXT: v_minimum3_f32 v15, v15, v31, v31 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v16f32: @@ -2176,3 +2084,4 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GCN: {{.*}} +; GFX9: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/load-global-f32.ll b/llvm/test/CodeGen/AMDGPU/load-global-f32.ll index 7b1355425729e9..ca24b78f62c2e5 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-f32.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-f32.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC,SI-NOHSA %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN-HSA,FUNC,GCNX3-HSA %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC,GCNX3-NOHSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC,SI-NOHSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa -mcpu=kaveri < %s | FileCheck --check-prefixes=GCN-HSA,FUNC,GCNX3-HSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC,GCNX3-NOHSA %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood < %s | FileCheck --check-prefixes=R600,FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=cayman < %s | FileCheck --check-prefixes=R600,FUNC %s diff --git a/llvm/test/CodeGen/AMDGPU/load-global-f64.ll b/llvm/test/CodeGen/AMDGPU/load-global-f64.ll index ed3618dfd64745..31998b466919c5 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-f64.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-f64.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN-HSA,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa -mcpu=kaveri < %s | FileCheck --check-prefixes=GCN-HSA,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC %s ; FUNC-LABEL: {{^}}global_load_f64: ; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll index dac928d70c6502..92add00f84b40e 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn-- < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}global_load_i1: diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll index 7411712da31bd8..64f1f45bf734cf 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-HSA %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=EG %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=CM %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-HSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=EG %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=cayman < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=CM %s ; FIXME: r600 is broken because the bigger testcases spill and it's not implemented diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll index 629343b47bc16f..8f6a1f8c01ec34 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCNX3-HSA %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCNX3-NOHSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -check-prefix=SI-NOHSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=GCNX3-HSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNX3-NOHSA %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-HSA -check-prefix=GCN-GFX900-HSA %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-HSA -check-prefix=GCN-GFX908-HSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN-HSA -check-prefix=GCN-GFX900-HSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GCN-HSA -check-prefix=GCN-GFX908-HSA %s define amdgpu_kernel void @global_load_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-NOHSA-LABEL: global_load_i32: diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i64.ll b/llvm/test/CodeGen/AMDGPU/load-global-i64.ll index 61ccc17d59eb00..00a2435e39207a 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i64.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i64.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN-HSA,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck --check-prefixes=GCN-HSA,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCN-NOHSA,FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood < %s | FileCheck --check-prefixes=EG,FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=cayman < %s | FileCheck --check-prefixes=EG,FUNC %s diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll index add5f13bd2d996..fb34b5e1f3af6e 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll @@ -1,8 +1,8 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-HSA,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-HSA,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=cayman < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}global_load_i8: diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll new file mode 100644 index 00000000000000..b7745ff5e0c42f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 -mattr=+d,-div32 < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch64 -mattr=+d,+div32 < %s | FileCheck %s --check-prefix=LA64-DIV32 + +; TODO: Use div.w/mod.w for sdiv/srem i32 + +define i32 @divw(i64 %a, i64 %b) { +; LA64-LABEL: divw: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: div.d $a0, $a0, $a1 +; LA64-NEXT: ret +; +; LA64-DIV32-LABEL: divw: +; LA64-DIV32: # %bb.0: +; LA64-DIV32-NEXT: addi.w $a0, $a0, 0 +; LA64-DIV32-NEXT: addi.w $a1, $a1, 0 +; LA64-DIV32-NEXT: div.d $a0, $a0, $a1 +; LA64-DIV32-NEXT: ret + %conv1 = trunc i64 %a to i32 + %conv2 = trunc i64 %b to i32 + %r = sdiv i32 %conv1, %conv2 + ret i32 %r +} + +define i32 @divwu(i64 %a, i64 %b) { +; LA64-LABEL: divwu: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: div.wu $a0, $a0, $a1 +; LA64-NEXT: ret +; +; LA64-DIV32-LABEL: divwu: +; LA64-DIV32: # %bb.0: +; LA64-DIV32-NEXT: div.wu $a0, $a0, $a1 +; LA64-DIV32-NEXT: ret + %conv1 = trunc i64 %a to i32 + %conv2 = trunc i64 %b to i32 + %r = udiv i32 %conv1, %conv2 + ret i32 %r +} + +define i32 @modw(i64 %a, i64 %b) { +; LA64-LABEL: modw: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: mod.d $a0, $a0, $a1 +; LA64-NEXT: ret +; +; LA64-DIV32-LABEL: modw: +; LA64-DIV32: # %bb.0: +; LA64-DIV32-NEXT: addi.w $a0, $a0, 0 +; LA64-DIV32-NEXT: addi.w $a1, $a1, 0 +; LA64-DIV32-NEXT: mod.d $a0, $a0, $a1 +; LA64-DIV32-NEXT: ret + %conv1 = trunc i64 %a to i32 + %conv2 = trunc i64 %b to i32 + %r = srem i32 %conv1, %conv2 + ret i32 %r +} + +define i32 @modwu(i64 %a, i64 %b) { +; LA64-LABEL: modwu: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: mod.wu $a0, $a0, $a1 +; LA64-NEXT: ret +; +; LA64-DIV32-LABEL: modwu: +; LA64-DIV32: # %bb.0: +; LA64-DIV32-NEXT: mod.wu $a0, $a0, $a1 +; LA64-DIV32-NEXT: ret + %conv1 = trunc i64 %a to i32 + %conv2 = trunc i64 %b to i32 + %r = urem i32 %conv1, %conv2 + ret i32 %r +} diff --git a/llvm/test/CodeGen/PowerPC/gcov_ctr_ref_init.ll b/llvm/test/CodeGen/PowerPC/gcov_ctr_ref_init.ll index 4969aec0a14942..fc7dacd873b07d 100644 --- a/llvm/test/CodeGen/PowerPC/gcov_ctr_ref_init.ll +++ b/llvm/test/CodeGen/PowerPC/gcov_ctr_ref_init.ll @@ -12,11 +12,16 @@ target triple = "powerpc-ibm-aix" ; CHECK-NEXT: L..__llvm_covinit_functions: ; CHECK-NEXT: .vbyte 4, __llvm_gcov_writeout[DS] ; CHECK-NEXT: .vbyte 4, __llvm_gcov_reset[DS] -; The first .csect directive below is specifying the content of the csect. -; The second .csect directive below is used to insert the .ref pseudo -; instruction. -; CHECK: .csect __llvm_gcov_ctr_section[RW],3 ; CHECK: .csect __llvm_gcov_ctr_section[RW],3 +; CHECK-NEXT: .lglobl __llvm_gcov_ctr # @_MergedGlobals +; CHECK-NEXT: .lglobl __llvm_gcov_ctr.1 +; CHECK-NEXT: .align 3 +; CHECK-NEXT: L.._MergedGlobals: +; CHECK-NEXT: __llvm_gcov_ctr: +; CHECK-NEXT: .space 8 +; CHECK-NEXT: __llvm_gcov_ctr.1: +; CHECK-NEXT: .space 8 +; CHECK: .csect __llvm_gcov_ctr_section[RW],3 ; CHECK-RW-NEXT: .ref __llvm_covinit[RW] ; CHECK-RO-NEXT: .ref __llvm_covinit[RO] diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index e46587f58b4eb6..dbfe7bb51dbffa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -744,3 +744,200 @@ define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) { %s = shufflevector <64 x i8> %wide.vec, <64 x i8> poison, <8 x i32> ret <8 x i8> %s } + +define <8 x i8> @shuffle_compress_singlesrc_e8(<8 x i8> %v) { +; CHECK-LABEL: shuffle_compress_singlesrc_e8: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI49_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> + ret <8 x i8> %out +} + +define <8 x i16> @shuffle_compress_singlesrc_e16(<8 x i16> %v) { +; CHECK-LABEL: shuffle_compress_singlesrc_e16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI50_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %out = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> + ret <8 x i16> %out +} + +define <8 x i32> @shuffle_compress_singlesrc_e32(<8 x i32> %v) { +; CHECK-LABEL: shuffle_compress_singlesrc_e32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI51_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0) +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> + ret <8 x i32> %out +} + +define <8 x i64> @shuffle_compress_singlesrc_e64(<8 x i64> %v) { +; CHECK-LABEL: shuffle_compress_singlesrc_e64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI52_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0) +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %out = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> + ret <8 x i64> %out +} + +define <8 x i32> @shuffle_compress_singlesrc_gaps_e32(<8 x i32> %v) { +; CHECK-LABEL: shuffle_compress_singlesrc_gaps_e32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI53_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0) +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> + ret <8 x i32> %out +} + +define <8 x i32> @shuffle_decompress2_singlesrc_e32(<8 x i32> %v) { +; CHECK-LABEL: shuffle_decompress2_singlesrc_e32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v10, v8, v8 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vwmaccu.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> + ret <8 x i32> %out +} + +define <8 x i32> @shuffle_decompress3_singlesrc_e32(<8 x i32> %v) { +; RV32-LABEL: shuffle_decompress3_singlesrc_e32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI55_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0) +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vle16.v v12, (a0) +; RV32-NEXT: vrgatherei16.vv v10, v8, v12 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_decompress3_singlesrc_e32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, 32769 +; RV64-NEXT: slli a0, a0, 21 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vrgatherei16.vv v10, v8, v12 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> + ret <8 x i32> %out +} + +; TODO: This should be a single vslideup.vi +define <8 x i32> @shuffle_decompress4_singlesrc_e32(<8 x i32> %v) { +; CHECK-LABEL: shuffle_decompress4_singlesrc_e32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsrl.vi v12, v10, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> + ret <8 x i32> %out +} + +; TODO: This should be either a single vslideup.vi or two widening interleaves. +define <8 x i8> @shuffle_decompress4_singlesrc_e8(<8 x i8> %v) { +; CHECK-LABEL: shuffle_decompress4_singlesrc_e8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vsrl.vi v10, v9, 2 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> + ret <8 x i8> %out +} + +define <8 x i32> @shuffle_decompress_singlesrc_e32(<8 x i32> %v) { +; CHECK-LABEL: shuffle_decompress_singlesrc_e32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI58_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0) +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> + ret <8 x i32> %out +} + +define <8 x i32> @shuffle_repeat2_singlesrc_e32(<8 x i32> %v) { +; CHECK-LABEL: shuffle_repeat2_singlesrc_e32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v10, v8, v8 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vwmaccu.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> + ret <8 x i32> %out +} + +define <8 x i32> @shuffle_repeat3_singlesrc_e32(<8 x i32> %v) { +; CHECK-LABEL: shuffle_repeat3_singlesrc_e32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v0, 7 +; CHECK-NEXT: vmv.v.i v11, 1 +; CHECK-NEXT: li a0, 192 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmerge.vim v11, v11, 0, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vim v12, v11, 2, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> + ret <8 x i32> %out +} + +define <8 x i32> @shuffle_repeat4_singlesrc_e32(<8 x i32> %v) { +; CHECK-LABEL: shuffle_repeat4_singlesrc_e32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsrl.vi v12, v10, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> + ret <8 x i32> %out +} diff --git a/llvm/test/DebugInfo/MIR/X86/dbg-prologue-backup-loc2.mir b/llvm/test/DebugInfo/MIR/X86/dbg-prologue-backup-loc2.mir index c27655ac801316..29cdbc0853365b 100644 --- a/llvm/test/DebugInfo/MIR/X86/dbg-prologue-backup-loc2.mir +++ b/llvm/test/DebugInfo/MIR/X86/dbg-prologue-backup-loc2.mir @@ -31,6 +31,13 @@ # CHECK-NEXT: .LBB0_1: # CHECK-LABEL: addl %esi, %edx +## Second function in this file: test that we don't crash when having trailing +## empty blocks and no location for a prologue. Test that a .loc is produced, +## with an implicit-not check for there being no prologue_end. +# +# CHECK-LABEL: f: +# CHECK: .loc 0 1234 0 + --- | ; ModuleID = 'out2.ll' @@ -66,6 +73,17 @@ ret i32 0, !dbg !17 } + define void @f() !dbg !18 { + entry: + %0 = call ptr @llvm.returnaddress(i32 0) + br label %do.body + + do.body: + unreachable + } + + declare ptr @llvm.returnaddress(i32 immarg) + !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!6, !7} !llvm.ident = !{!8} @@ -88,6 +106,7 @@ !15 = distinct !DILexicalBlock(scope: !9, file: !3, line: 4, column: 15) !16 = !DILocation(line: 6, column: 9, scope: !15) !17 = !DILocation(line: 8, column: 3, scope: !9) + !18 = distinct !DISubprogram(name: "f", scope: !3, file: !3, line: 37, type: !10, scopeLine: 1234, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !2) ... --- @@ -132,3 +151,33 @@ body: | RET64 $eax, debug-location !17 ... +--- +name: f +alignment: 16 +tracksRegLiveness: true +noPhis: true +isSSA: false +noVRegs: true +hasFakeUses: false +tracksDebugUserValues: true +frameInfo: + stackSize: 8 + offsetAdjustment: -8 + maxAlignment: 1 + maxCallFrameSize: 0 + isCalleeSavedInfoValid: true +fixedStack: + - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 } +machineFunctionInfo: + amxProgModel: None +body: | + bb.0.entry: + frame-setup PUSH64r killed $rbp, implicit-def $rsp, implicit $rsp + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup CFI_INSTRUCTION offset $rbp, -16 + $rbp = frame-setup MOV64rr $rsp + frame-setup CFI_INSTRUCTION def_cfa_register $rbp + + bb.1.do.body: + +... diff --git a/llvm/test/MC/AArch64/local-bounds-single-trap.ll b/llvm/test/MC/AArch64/local-bounds-single-trap.ll index 6a017e24ab3cdf..8b8a3e430df692 100644 --- a/llvm/test/MC/AArch64/local-bounds-single-trap.ll +++ b/llvm/test/MC/AArch64/local-bounds-single-trap.ll @@ -1,7 +1,8 @@ ; RUN: llc -O3 -mtriple arm64-linux -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-ASM -; What this test does is check that even with nomerge, the functions still get merged in -; compiled code as the ubsantrap call gets lowered to a single instruction: brk. - +; This test checks that nomerge correctly prevents the traps from being merged +; in the compiled code. +; Prior to ae6dc64ec670891cb15049277e43133d4df7fb4b, this test showed that +; nomerge did not work correctly. @B = dso_local global [10 x i8] zeroinitializer, align 1 @B2 = dso_local global [10 x i8] zeroinitializer, align 1 diff --git a/llvm/test/Transforms/EarlyCSE/noalias-addrspace.ll b/llvm/test/Transforms/EarlyCSE/noalias-addrspace.ll new file mode 100644 index 00000000000000..0a001b55f684cf --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/noalias-addrspace.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='early-cse' -S < %s | FileCheck %s + +declare void @use(i1) +declare void @use.ptr(ptr) memory(read) + +define void @load_first_noalias_addrspace(ptr %p) { +; CHECK-LABEL: define void @load_first_noalias_addrspace( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0:![0-9]+]], !noundef [[META0]], !noalias.addrspace [[META1:![0-9]+]] +; CHECK-NEXT: call void @use.ptr(ptr [[V1]]) +; CHECK-NEXT: call void @use.ptr(ptr [[V1]]) +; CHECK-NEXT: ret void +; + %v1 = load ptr, ptr %p, !nonnull !{}, !noundef !{}, !noalias.addrspace !0 + call void @use.ptr(ptr %v1) + %v2 = load ptr, ptr %p + call void @use.ptr(ptr %v2) + ret void +} + +define void @load_both_same_noalias_addrspace(ptr %p) { +; CHECK-LABEL: define void @load_both_same_noalias_addrspace( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]], !noundef [[META0]], !noalias.addrspace [[META1]] +; CHECK-NEXT: call void @use.ptr(ptr [[V1]]) +; CHECK-NEXT: call void @use.ptr(ptr [[V1]]) +; CHECK-NEXT: ret void +; + %v1 = load ptr, ptr %p, !nonnull !{}, !noundef !{}, !noalias.addrspace !0 + call void @use.ptr(ptr %v1) + %v2 = load ptr, ptr %p, !noalias.addrspace !0 + call void @use.ptr(ptr %v2) + ret void +} + +define void @load_both_disjoint_noalias_addrspace(ptr %p) { +; CHECK-LABEL: define void @load_both_disjoint_noalias_addrspace( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]], !noundef [[META0]], !noalias.addrspace [[META1]] +; CHECK-NEXT: call void @use.ptr(ptr [[V1]]) +; CHECK-NEXT: call void @use.ptr(ptr [[V1]]) +; CHECK-NEXT: ret void +; + %v1 = load ptr, ptr %p, !nonnull !{}, !noundef !{}, !noalias.addrspace !0 + call void @use.ptr(ptr %v1) + %v2 = load ptr, ptr %p, !noalias.addrspace !1 + call void @use.ptr(ptr %v2) + ret void +} + +define void @load_both_overlap_noalias_addrspace(ptr %p) { +; CHECK-LABEL: define void @load_both_overlap_noalias_addrspace( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]], !noundef [[META0]], !noalias.addrspace [[META1]] +; CHECK-NEXT: call void @use.ptr(ptr [[V1]]) +; CHECK-NEXT: call void @use.ptr(ptr [[V1]]) +; CHECK-NEXT: ret void +; + %v1 = load ptr, ptr %p, !nonnull !{}, !noundef !{}, !noalias.addrspace !0 + call void @use.ptr(ptr %v1) + %v2 = load ptr, ptr %p, !noalias.addrspace !2 + call void @use.ptr(ptr %v2) + ret void +} + +!0 = !{i32 5, i32 6} +!1 = !{i32 7, i32 8} +!2 = !{i32 5, i32 7} +;. +; CHECK: [[META0]] = !{} +; CHECK: [[META1]] = !{i32 5, i32 6} +;. diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll index 3fdba7cfae67e1..4976eace72a9f9 100644 --- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll +++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll @@ -285,8 +285,8 @@ define double @preserve_load_metadata_after_select_transform_metadata_missing_4( ; CHECK-NEXT: ret double [[L_SEL]] ; entry: - %l.a = load double, ptr %a, align 8, !tbaa !0, !llvm.access.group !7 - %l.b = load double, ptr %b, align 8, !tbaa !0, !llvm.access.group !12 + %l.a = load double, ptr %a, align 8, !tbaa !0, !llvm.access.group !7, !alias.scope !3, !noalias !3 + %l.b = load double, ptr %b, align 8, !tbaa !0, !llvm.access.group !12, !alias.scope !14, !noalias !14 %cmp.i = fcmp fast olt double %l.a, %l.b %ptr.sel = select i1 %cmp.i, ptr %b, ptr %a %l.sel = load double, ptr %ptr.sel, align 8, !tbaa !0, !llvm.access.group !13 @@ -307,6 +307,10 @@ entry: !11 = !{i32 5, i32 6} !12 = distinct !{} !13 = distinct !{} +!14 = !{!15} +!15 = distinct !{!15, !16} +!16 = distinct !{!16} + ;. ; CHECK: [[TBAA0]] = !{[[LOOP1]], [[LOOP1]], i64 0} ; CHECK: [[LOOP1]] = !{!"scalar type", [[META2:![0-9]+]]} diff --git a/llvm/test/Transforms/InstCombine/select-value-equivalence.ll b/llvm/test/Transforms/InstCombine/select-value-equivalence.ll index da2e59d760f96f..d55766ddf40405 100644 --- a/llvm/test/Transforms/InstCombine/select-value-equivalence.ll +++ b/llvm/test/Transforms/InstCombine/select-value-equivalence.ll @@ -309,3 +309,28 @@ define <2 x float> @select_fcmp_fadd_une_zero_vec(<2 x float> %x, <2 x float> %y %retval = select <2 x i1> %fcmp, <2 x float> %x, <2 x float> %fadd ret <2 x float> %retval } + +define <2 x i8> @select_vec_op_const_no_undef(<2 x i8> %x) { +; CHECK-LABEL: define <2 x i8> @select_vec_op_const_no_undef( +; CHECK-SAME: <2 x i8> [[X:%.*]]) { +; CHECK-NEXT: [[XZ:%.*]] = icmp eq <2 x i8> [[X]], +; CHECK-NEXT: [[XR:%.*]] = select <2 x i1> [[XZ]], <2 x i8> , <2 x i8> +; CHECK-NEXT: ret <2 x i8> [[XR]] +; + %xz = icmp eq <2 x i8> %x, + %xr = select <2 x i1> %xz, <2 x i8> %x, <2 x i8> + ret <2 x i8> %xr +} + +; FIXME: This is a miscompile. +define <2 x i8> @select_vec_op_const_undef(<2 x i8> %x) { +; CHECK-LABEL: define <2 x i8> @select_vec_op_const_undef( +; CHECK-SAME: <2 x i8> [[X:%.*]]) { +; CHECK-NEXT: [[XZ:%.*]] = icmp eq <2 x i8> [[X]], +; CHECK-NEXT: [[XR:%.*]] = select <2 x i1> [[XZ]], <2 x i8> , <2 x i8> +; CHECK-NEXT: ret <2 x i8> [[XR]] +; + %xz = icmp eq <2 x i8> %x, + %xr = select <2 x i1> %xz, <2 x i8> %x, <2 x i8> + ret <2 x i8> %xr +} diff --git a/llvm/test/Transforms/LICM/hoist-metadata.ll b/llvm/test/Transforms/LICM/hoist-metadata.ll index 60b61944b33ae0..f25cb966a37fc7 100644 --- a/llvm/test/Transforms/LICM/hoist-metadata.ll +++ b/llvm/test/Transforms/LICM/hoist-metadata.ll @@ -77,7 +77,7 @@ define void @noalias_metadata_load_may_not_execute() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 16 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]] -; CHECK-NEXT: [[GEP_PROMOTED:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[TBAA3:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: [[GEP_PROMOTED:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[ADD1:%.*]] = phi i32 [ [[GEP_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP_LATCH:%.*]] ] @@ -92,7 +92,7 @@ define void @noalias_metadata_load_may_not_execute() { ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[ADD2:%.*]] = phi i32 [ [[ADD]], [[LOOP_LATCH]] ], [ [[ADD1]], [[LOOP_HEADER]] ] -; CHECK-NEXT: store i32 [[ADD2]], ptr [[GEP]], align 4, !tbaa [[TBAA3]], !noalias [[META7]] +; CHECK-NEXT: store i32 [[ADD2]], ptr [[GEP]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -132,11 +132,4 @@ exit: ; CHECK: [[RNG0]] = !{i32 0, i32 10} ; CHECK: [[META1]] = !{} ; CHECK: [[META2]] = !{i64 4} -; CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} -; CHECK: [[META4]] = !{!"short", [[META5:![0-9]+]], i64 0} -; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} -; CHECK: [[META7]] = !{[[META8:![0-9]+]]} -; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]]} -; CHECK: [[META9]] = distinct !{[[META9]]} ;. diff --git a/llvm/test/Transforms/LICM/hoisting-preheader-debugloc.ll b/llvm/test/Transforms/LICM/hoisting-preheader-debugloc.ll index 570f4230c1a90d..61f0eb19a9bd1b 100644 --- a/llvm/test/Transforms/LICM/hoisting-preheader-debugloc.ll +++ b/llvm/test/Transforms/LICM/hoisting-preheader-debugloc.ll @@ -1,6 +1,8 @@ ; RUN: opt -passes=licm %s -S | FileCheck %s -; CHECK: %arrayidx4.promoted = load i32, ptr %arrayidx4, align 4, !tbaa !{{[0-9]+$}} +; CHECK: %arrayidx4.promoted = load i32, ptr %arrayidx4, align 4 +; CHECK-NOT: !dbg +; CHECK: br label %for.body target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-117393.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-117393.ll new file mode 100644 index 00000000000000..c40e32baad7b31 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-117393.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=systemz-unknown -mcpu=z15 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s + +define void @h() { +; CHECK-LABEL: @h( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> zeroinitializer, zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> splat (i32 1), zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> zeroinitializer, zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i32> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> zeroinitializer, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: ret void +; +entry: + %0 = shl <4 x i32> zeroinitializer, zeroinitializer + %1 = or <4 x i32> %0, zeroinitializer + %2 = or <4 x i32> splat (i32 1), zeroinitializer + %3 = or <4 x i32> zeroinitializer, zeroinitializer + %4 = shl <4 x i32> zeroinitializer, zeroinitializer + %5 = or <4 x i32> %4, zeroinitializer + %6 = and <4 x i32> %2, %1 + %7 = and <4 x i32> %3, %6 + %8 = and <4 x i32> %5, %7 + %9 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %8) + ret void +} diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll index 18aa5c9e044a98..d34ac2bb300407 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll @@ -319,7 +319,7 @@ out: define void @hoist_noalias_addrspace_both(i1 %c, ptr %p, i64 %val) { ; CHECK-LABEL: @hoist_noalias_addrspace_both( ; CHECK-NEXT: if: -; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8 +; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8, !noalias.addrspace [[META7:![0-9]+]] ; CHECK-NEXT: ret void ; if: @@ -361,7 +361,7 @@ out: define void @hoist_noalias_addrspace_switch(i64 %i, ptr %p, i64 %val) { ; CHECK-LABEL: @hoist_noalias_addrspace_switch( ; CHECK-NEXT: out: -; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8 +; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8, !noalias.addrspace [[META7]] ; CHECK-NEXT: ret void ; switch i64 %i, label %bb0 [ @@ -381,6 +381,48 @@ out: ret void } +define void @hoist_noalias_addrspace_switch_multiple(i64 %i, ptr %p, i64 %val) { +; CHECK-LABEL: @hoist_noalias_addrspace_switch_multiple( +; CHECK-NEXT: out: +; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8, !noalias.addrspace [[META8:![0-9]+]] +; CHECK-NEXT: ret void +; + switch i64 %i, label %bb0 [ + i64 1, label %bb1 + i64 2, label %bb2 + ] +bb0: + %t = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !7 + br label %out +bb1: + %e = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !8 + br label %out +bb2: + %f = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !9 + br label %out +out: + ret void +} + +; !noalias_addrspace is not safe to speculate as it causes immediate undefined behavior. +define ptr @speculate_noalias_addrspace(i1 %c, ptr dereferenceable(8) align 8 %p) { +; CHECK-LABEL: @speculate_noalias_addrspace( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull [[META2]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], ptr [[V]], ptr null +; CHECK-NEXT: ret ptr [[SPEC_SELECT]] +; +entry: + br i1 %c, label %if, label %join + +if: + %v = load ptr, ptr %p, !nonnull !{}, !noundef !{}, !noalias.addrspace !4 + br label %join + +join: + %phi = phi ptr [ %v, %if ], [ null, %entry ] + ret ptr %phi +} !0 = !{ i8 0, i8 1 } !1 = !{ i8 3, i8 5 } @@ -389,6 +431,9 @@ out: !4 = !{i32 5, i32 6} !5 = !{i32 5, i32 7} !6 = !{i32 4, i32 8} +!7 = !{i32 4, i32 8, i32 20, i32 31} +!8 = !{i32 2, i32 5} +!9 = !{i32 2, i32 5, i32 22, i32 42, i32 45, i32 50} ;. ; CHECK: [[RNG0]] = !{i8 0, i8 1, i8 3, i8 5} @@ -398,4 +443,6 @@ out: ; CHECK: [[RNG4]] = !{i32 0, i32 10} ; CHECK: [[META5]] = !{i64 4} ; CHECK: [[META6]] = !{float 2.500000e+00} +; CHECK: [[META7]] = !{i32 5, i32 6} +; CHECK: [[META8]] = !{i32 4, i32 5} ;. diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s index ca0a81e4207d53..72d7de33533467 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s @@ -1736,140 +1736,140 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 5 0.33 * vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.33 * vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.33 * vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: 1 5 1.00 vpmovdb %zmm19, %xmm16 # CHECK-NEXT: 1 11 1.50 * vpmovdb %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovdb %zmm19, %xmm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovdb %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovdb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: 1 5 1.00 vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovdw %zmm19, %ymm16 # CHECK-NEXT: 1 11 1.50 * vpmovdw %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovdw %zmm19, %ymm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovdw %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovdw %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: 1 5 1.00 vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovqb %zmm19, %xmm16 # CHECK-NEXT: 1 11 1.50 * vpmovqb %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovqb %zmm19, %xmm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovqb %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovqb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: 1 5 1.00 vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovqd %zmm19, %ymm16 # CHECK-NEXT: 1 11 1.50 * vpmovqd %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovqd %zmm19, %ymm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovqd %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovqd %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: 1 5 1.00 vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovqw %zmm19, %xmm16 # CHECK-NEXT: 1 11 1.50 * vpmovqw %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovqw %zmm19, %xmm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovqw %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovqw %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: 1 5 1.00 vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovsdb %zmm19, %xmm16 # CHECK-NEXT: 1 11 1.50 * vpmovsdb %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovsdb %zmm19, %xmm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovsdb %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovsdb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: 1 5 1.00 vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovsdw %zmm19, %ymm16 # CHECK-NEXT: 1 11 1.50 * vpmovsdw %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovsdw %zmm19, %ymm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovsdw %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovsdw %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: 1 5 1.00 vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovsqb %zmm19, %xmm16 # CHECK-NEXT: 1 11 1.50 * vpmovsqb %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovsqb %zmm19, %xmm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovsqb %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovsqb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: 1 5 1.00 vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovsqd %zmm19, %ymm16 # CHECK-NEXT: 1 11 1.50 * vpmovsqd %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovsqd %zmm19, %ymm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovsqd %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovsqd %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: 1 5 1.00 vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovsqw %zmm19, %xmm16 # CHECK-NEXT: 1 11 1.50 * vpmovsqw %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovsqw %zmm19, %xmm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovsqw %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovsqw %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: 1 4 2.00 vpmovsxbd %xmm16, %zmm19 +# CHECK-NEXT: 1 5 1.00 vpmovsqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovsxbd %xmm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovsxbd (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovsxbd %xmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovsxbd %xmm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovsxbd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovsxbd %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovsxbd %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovsxbd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 2.00 vpmovsxbq %xmm16, %zmm19 +# CHECK-NEXT: 1 4 1.00 vpmovsxbq %xmm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovsxbq (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovsxbq %xmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovsxbq %xmm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovsxbq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovsxbq %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovsxbq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovsxbq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 2.00 vpmovsxdq %ymm16, %zmm19 +# CHECK-NEXT: 1 4 1.00 vpmovsxdq %ymm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovsxdq (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovsxdq %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovsxdq %ymm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovsxdq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovsxdq %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovsxdq %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovsxdq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 2.00 vpmovsxwd %ymm16, %zmm19 +# CHECK-NEXT: 1 4 1.00 vpmovsxwd %ymm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovsxwd (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovsxwd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovsxwd %ymm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovsxwd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovsxwd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovsxwd %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovsxwd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 2.00 vpmovsxwq %xmm16, %zmm19 +# CHECK-NEXT: 1 4 1.00 vpmovsxwq %xmm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovsxwq (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovsxwq %xmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovsxwq %xmm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovsxwq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovsxwq %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovsxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovsxwq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: 1 5 1.00 vpmovusdb %zmm19, %xmm16 # CHECK-NEXT: 1 11 1.50 * vpmovusdb %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovusdb %zmm19, %xmm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovusdb %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovusdb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: 1 5 1.00 vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovusdw %zmm19, %ymm16 # CHECK-NEXT: 1 11 1.50 * vpmovusdw %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovusdw %zmm19, %ymm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovusdw %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovusdw %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: 1 5 1.00 vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovusqb %zmm19, %xmm16 # CHECK-NEXT: 1 11 1.50 * vpmovusqb %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovusqb %zmm19, %xmm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovusqb %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovusqb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: 1 5 1.00 vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovusqd %zmm19, %ymm16 # CHECK-NEXT: 1 11 1.50 * vpmovusqd %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovusqd %zmm19, %ymm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovusqd %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovusqd %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: 1 5 1.00 vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovusqw %zmm19, %xmm16 # CHECK-NEXT: 1 11 1.50 * vpmovusqw %zmm19, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovusqw %zmm19, %xmm16 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovusqw %zmm19, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovusqw %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: 1 4 2.00 vpmovzxbd %xmm16, %zmm19 +# CHECK-NEXT: 1 5 1.00 vpmovusqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovzxbd %xmm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovzxbd (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovzxbd %xmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovzxbd %xmm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovzxbd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovzxbd %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovzxbd %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovzxbd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 2.00 vpmovzxbq %xmm16, %zmm19 +# CHECK-NEXT: 1 4 1.00 vpmovzxbq %xmm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovzxbq (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovzxbq %xmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovzxbq %xmm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovzxbq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovzxbq %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovzxbq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovzxbq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 2.00 vpmovzxdq %ymm16, %zmm19 +# CHECK-NEXT: 1 4 1.00 vpmovzxdq %ymm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovzxdq (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovzxdq %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovzxdq %ymm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovzxdq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovzxdq %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovzxdq %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovzxdq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 2.00 vpmovzxwd %ymm16, %zmm19 +# CHECK-NEXT: 1 4 1.00 vpmovzxwd %ymm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovzxwd (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovzxwd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovzxwd %ymm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovzxwd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovzxwd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovzxwd %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovzxwd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 2.00 vpmovzxwq %xmm16, %zmm19 +# CHECK-NEXT: 1 4 1.00 vpmovzxwq %xmm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovzxwq (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovzxwq %xmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovzxwq %xmm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovzxwq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovzxwq %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovzxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovzxwq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vpmulld %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 1 10 1.00 * vpmulld (%rax), %zmm17, %zmm19 @@ -2233,7 +2233,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 5.33 5.33 5.33 - - - - - 219.50 1216.50 774.00 351.00 312.50 312.50 17.00 215.67 215.67 215.67 204.67 204.67 204.67 16.50 16.50 +# CHECK-NEXT: 5.33 5.33 5.33 - - - - - 219.50 1119.00 676.50 351.00 312.50 312.50 17.00 215.67 215.67 215.67 204.67 204.67 204.67 16.50 16.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -2793,140 +2793,140 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovdb %zmm19, %xmm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdb %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovdb %zmm19, %xmm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdb %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovdb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovdw %zmm19, %ymm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdw %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovdw %zmm19, %ymm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdw %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovdw %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovqb %zmm19, %xmm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqb %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovqb %zmm19, %xmm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqb %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovqd %zmm19, %ymm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqd %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovqd %zmm19, %ymm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqd %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqd %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovqw %zmm19, %xmm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqw %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovqw %zmm19, %xmm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqw %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqw %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsdb %zmm19, %xmm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdb %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsdb %zmm19, %xmm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdb %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsdb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsdw %zmm19, %ymm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdw %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsdw %zmm19, %ymm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdw %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsdw %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsqb %zmm19, %xmm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqb %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsqb %zmm19, %xmm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqb %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsqd %zmm19, %ymm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqd %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsqd %zmm19, %ymm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqd %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqd %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsqw %zmm19, %xmm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqw %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsqw %zmm19, %xmm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqw %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqw %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbd %xmm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbd %xmm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbd (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbd %xmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbd %xmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbd (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbd %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbd %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbq %xmm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbq %xmm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbq (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbq %xmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbq %xmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbq (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbq %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxdq %ymm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxdq %ymm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxdq (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxdq %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxdq %ymm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxdq (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxdq %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxdq %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxdq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxwd %ymm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxwd %ymm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwd (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxwd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxwd %ymm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwd (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxwd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxwd %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxwq %xmm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxwq %xmm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxwq %xmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxwq %xmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxwq %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusdb %zmm19, %xmm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdb %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusdb %zmm19, %xmm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdb %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusdb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusdw %zmm19, %ymm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdw %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusdw %zmm19, %ymm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdw %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusdw %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusqb %zmm19, %xmm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqb %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusqb %zmm19, %xmm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqb %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqb %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusqd %zmm19, %ymm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqd %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusqd %zmm19, %ymm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqd %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqd %zmm19, %ymm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusqw %zmm19, %xmm16 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqw %zmm19, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusqw %zmm19, %xmm16 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqw %zmm19, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqw %zmm19, %xmm16 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbd %xmm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovusqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbd %xmm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbd (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbd %xmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbd %xmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbd (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbd %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbd %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbq %xmm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbq %xmm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbq (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbq %xmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbq %xmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbq (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbq %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxdq %ymm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxdq %ymm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxdq (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxdq %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxdq %ymm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxdq (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxdq %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxdq %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxdq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxwd %ymm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxwd %ymm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwd (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxwd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxwd %ymm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwd (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxwd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxwd %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxwq %xmm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxwq %xmm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwq (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxwq %xmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxwq %xmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwq (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxwq %xmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 - - - - - - - - - - - vpmulld %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulld (%rax), %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bw.s index 3280ed3b8aab6f..ffdfe3fe98955e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bw.s @@ -912,32 +912,32 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 1 1.00 vpmovw2m %zmm0, %k0 # CHECK-NEXT: 1 0 0.17 vpmovm2b %k0, %zmm0 # CHECK-NEXT: 1 0 0.17 vpmovm2w %k0, %zmm0 -# CHECK-NEXT: 1 4 2.00 vpmovsxbw %ymm16, %zmm19 +# CHECK-NEXT: 1 4 1.00 vpmovsxbw %ymm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovsxbw (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovsxbw %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovsxbw %ymm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovsxbw (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovsxbw %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovsxbw %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovsxbw (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovswb %zmm16, %ymm19 +# CHECK-NEXT: 1 5 1.00 vpmovswb %zmm16, %ymm19 # CHECK-NEXT: 1 11 1.50 * vpmovswb %zmm16, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovswb %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovswb %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovswb %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovuswb %zmm16, %ymm19 +# CHECK-NEXT: 1 5 1.00 vpmovswb %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: 1 11 1.50 * vpmovuswb %zmm16, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovuswb %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovuswb %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovuswb %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1 5 2.50 vpmovwb %zmm16, %ymm19 +# CHECK-NEXT: 1 5 1.00 vpmovuswb %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 5 1.00 vpmovwb %zmm16, %ymm19 # CHECK-NEXT: 1 11 1.50 * vpmovwb %zmm16, (%rax) -# CHECK-NEXT: 1 5 2.50 vpmovwb %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 1 5 1.00 vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovwb %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 5 2.50 vpmovwb %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1 4 2.00 vpmovzxbw %ymm16, %zmm19 +# CHECK-NEXT: 1 5 1.00 vpmovwb %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovzxbw %ymm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovzxbw (%rax), %zmm19 -# CHECK-NEXT: 1 4 2.00 vpmovzxbw %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vpmovzxbw %ymm16, %zmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovzxbw (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1 4 2.00 vpmovzxbw %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vpmovzxbw %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovzxbw (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vpmulhrsw %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 1 10 1.00 * vpmulhrsw (%rax), %zmm17, %zmm19 @@ -1145,7 +1145,7 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 0.67 0.67 0.67 2.00 2.00 2.00 2.00 - 177.50 322.50 288.00 161.00 123.00 123.00 6.00 77.33 77.33 77.33 75.33 75.33 75.33 3.00 3.00 +# CHECK-NEXT: 0.67 0.67 0.67 2.00 2.00 2.00 2.00 - 177.50 303.00 268.50 161.00 123.00 123.00 6.00 77.33 77.33 77.33 75.33 75.33 75.33 3.00 3.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -1447,32 +1447,32 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - vpmovw2m %zmm0, %k0 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpmovm2b %k0, %zmm0 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpmovm2w %k0, %zmm0 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbw %ymm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbw %ymm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbw (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbw %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbw %ymm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbw (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbw %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbw %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbw (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovswb %zmm16, %ymm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovswb %zmm16, %ymm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovswb %zmm16, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovswb %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovswb %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovswb %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovuswb %zmm16, %ymm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovswb %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovuswb %zmm16, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovuswb %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovuswb %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovuswb %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovwb %zmm16, %ymm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovuswb %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovwb %zmm16, %ymm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovwb %zmm16, (%rax) -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovwb %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovwb %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovwb %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbw %ymm16, %zmm19 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovwb %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbw %ymm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbw (%rax), %zmm19 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbw %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbw %ymm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbw (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbw %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbw %ymm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbw (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 - - - - - - - - - - - vpmulhrsw %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulhrsw (%rax), %zmm17, %zmm19 diff --git a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn index 945d31afca10f0..51dc24481a513b 100644 --- a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn @@ -15,7 +15,6 @@ unittest("CoreTests") { "BinaryContext.cpp", "DynoStats.cpp", "MCPlusBuilder.cpp", - "MemoryMaps.cpp", ] defines = [] diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index cf4a31a144557e..67343297fae41d 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -285,6 +285,7 @@ static_library("builtins") { if (long_double_is_80_bits) { sources += [ "divxc3.c", + "extendhfxf2.c", "extendxftf2.c", "fixunsxfdi.c", "fixunsxfsi.c", diff --git a/mlir/docs/Bufferization.md b/mlir/docs/Bufferization.md index e16fe91212a1a5..02cfee5f2b8dcd 100644 --- a/mlir/docs/Bufferization.md +++ b/mlir/docs/Bufferization.md @@ -223,8 +223,8 @@ func.func @test_matmul(%A: memref<1x17x19xf32>, %B: memref<1x19x29xf32>, %C: memref<1x17x29xf32>) { - %A_tensor = bufferization.to_tensor %A restrict : memref<1x17x19xf32> - %B_tensor = bufferization.to_tensor %B restrict : memref<1x19x29xf32> + %A_tensor = bufferization.to_tensor %A restrict : memref<1x17x19xf32> to tensor<1x17x19xf32> + %B_tensor = bufferization.to_tensor %B restrict : memref<1x19x29xf32> to tensor<1x19x29xf32> %0 = tosa.matmul %A_tensor, %B_tensor : (tensor<1x17x19xf32>, tensor<1x19x29xf32>) -> diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td index 76d97f106dcb88..56fbe9cdc2d21d 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td @@ -964,7 +964,7 @@ def AffineVectorLoadOp : AffineLoadOpBase<"vector_load"> { (see [vector.transfer_read](../Vector/#vectortransfer_read-mlirvectortransferreadop)). }]; - let results = (outs AnyVector:$result); + let results = (outs AnyVectorOfNonZeroRank:$result); let builders = [ /// Builds an affine vector load op with the specified map and operands. @@ -1031,7 +1031,7 @@ def AffineVectorStoreOp : AffineStoreOpBase<"vector_store"> { (see [vector.transfer_write](../Vector/#vectortransfer_write-mlirvectortransferwriteop)). }]; - let arguments = (ins AnyVector:$value, + let arguments = (ins AnyVectorOfNonZeroRank:$value, Arg:$memref, Variadic:$indices, diff --git a/mlir/include/mlir/Dialect/ArmNeon/ArmNeon.td b/mlir/include/mlir/Dialect/ArmNeon/ArmNeon.td index 9cc792093bf836..475b11f12c5f01 100644 --- a/mlir/include/mlir/Dialect/ArmNeon/ArmNeon.td +++ b/mlir/include/mlir/Dialect/ArmNeon/ArmNeon.td @@ -35,7 +35,7 @@ def ArmNeon_Dialect : Dialect { //===----------------------------------------------------------------------===// class NeonVectorOfLength : ShapedContainerType< - [elementType], And<[IsVectorOfShape<[length]>, IsFixedVectorTypePred]>, + [elementType], And<[IsVectorOfShape<[length]>, IsFixedVectorOfAnyRankTypePred]>, "a vector with length " # length, "::mlir::VectorType">; diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td index 9a058ae4fe7647..6fd992afbf0436 100644 --- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td +++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td @@ -371,7 +371,7 @@ def TileLoadOp : ArmSME_Op<"tile_load", [ let arguments = (ins Arg:$base, Variadic:$indices, - Optional:$padding, Optional:$mask, + Optional:$padding, Optional:$mask, ArmSME_TileSliceLayoutAttr:$layout ); let results = (outs SMETile:$result); @@ -444,7 +444,7 @@ def TileStoreOp : ArmSME_Op<"tile_store", [ }]; let arguments = (ins SMETile:$valueToStore, Arg:$base, - Variadic:$indices, Optional:$mask, + Variadic:$indices, Optional:$mask, ArmSME_TileSliceLayoutAttr:$layout ); let extraClassDeclaration = [{ @@ -799,9 +799,9 @@ class OuterProductWideningBase { let arguments = (ins - AnyTypeOf:$lhs, AnyVector:$rhs, - Optional:$lhsMask, Optional:$rhsMask, - Optional:$acc); + AnyTypeOf:$lhs, AnyVectorOfNonZeroRank:$rhs, + Optional:$lhsMask, Optional:$rhsMask, + Optional:$acc); let results = (outs AnyTypeOf:$result); let assemblyFormat = [{ diff --git a/mlir/include/mlir/Dialect/ArmSVE/IR/ArmSVE.td b/mlir/include/mlir/Dialect/ArmSVE/IR/ArmSVE.td index d7e8b22fbd2d35..cdcf4d8752e874 100644 --- a/mlir/include/mlir/Dialect/ArmSVE/IR/ArmSVE.td +++ b/mlir/include/mlir/Dialect/ArmSVE/IR/ArmSVE.td @@ -100,11 +100,11 @@ class ScalableMaskedFOp:$mask, - ScalableVectorOf<[AnyFloat]>:$src1, - ScalableVectorOf<[AnyFloat]>:$src2 + ScalableVectorOfAnyRank<[I1]>:$mask, + ScalableVectorOfAnyRank<[AnyFloat]>:$src1, + ScalableVectorOfAnyRank<[AnyFloat]>:$src2 ); - let results = (outs ScalableVectorOf<[AnyFloat]>:$res); + let results = (outs ScalableVectorOfAnyRank<[AnyFloat]>:$res); let assemblyFormat = "$mask `,` $src1 `,` $src2 attr-dict `:` type($mask) `,` type($res)"; } @@ -123,11 +123,11 @@ class ScalableMaskedIOp:$mask, - ScalableVectorOf<[I8, I16, I32, I64]>:$src1, - ScalableVectorOf<[I8, I16, I32, I64]>:$src2 + ScalableVectorOfAnyRank<[I1]>:$mask, + ScalableVectorOfAnyRank<[I8, I16, I32, I64]>:$src1, + ScalableVectorOfAnyRank<[I8, I16, I32, I64]>:$src2 ); - let results = (outs ScalableVectorOf<[I8, I16, I32, I64]>:$res); + let results = (outs ScalableVectorOfAnyRank<[I8, I16, I32, I64]>:$res); let assemblyFormat = "$mask `,` $src1 `,` $src2 attr-dict `:` type($mask) `,` type($res)"; } @@ -511,55 +511,55 @@ def ScalableMaskedDivFOp : ScalableMaskedFOp<"masked.divf", "division">; def UmmlaIntrOp : ArmSVE_IntrBinaryOverloadedOp<"ummla">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def SmmlaIntrOp : ArmSVE_IntrBinaryOverloadedOp<"smmla">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def SdotIntrOp : ArmSVE_IntrBinaryOverloadedOp<"sdot">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def UdotIntrOp : ArmSVE_IntrBinaryOverloadedOp<"udot">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def ScalableMaskedAddIIntrOp : ArmSVE_IntrBinaryOverloadedOp<"add">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def ScalableMaskedAddFIntrOp : ArmSVE_IntrBinaryOverloadedOp<"fadd">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def ScalableMaskedMulIIntrOp : ArmSVE_IntrBinaryOverloadedOp<"mul">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def ScalableMaskedMulFIntrOp : ArmSVE_IntrBinaryOverloadedOp<"fmul">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def ScalableMaskedSubIIntrOp : ArmSVE_IntrBinaryOverloadedOp<"sub">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def ScalableMaskedSubFIntrOp : ArmSVE_IntrBinaryOverloadedOp<"fsub">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def ScalableMaskedSDivIIntrOp : ArmSVE_IntrBinaryOverloadedOp<"sdiv">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def ScalableMaskedUDivIIntrOp : ArmSVE_IntrBinaryOverloadedOp<"udiv">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def ScalableMaskedDivFIntrOp : ArmSVE_IntrBinaryOverloadedOp<"fdiv">, - Arguments<(ins AnyScalableVector, AnyScalableVector, AnyScalableVector)>; + Arguments<(ins AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank, AnyScalableVectorOfAnyRank)>; def ConvertFromSvboolIntrOp : ArmSVE_IntrOp<"convert.from.svbool", @@ -581,8 +581,8 @@ def ZipX2IntrOp : ArmSVE_IntrOp<"zip.x2", /*overloadedOperands=*/[0], /*overloadedResults=*/[], /*numResults=*/2>, - Arguments<(ins Arg:$v1, - Arg:$v2)>; + Arguments<(ins Arg:$v1, + Arg:$v2)>; // Note: This multi-vector intrinsic requires SME2. def ZipX4IntrOp : ArmSVE_IntrOp<"zip.x4", @@ -590,10 +590,10 @@ def ZipX4IntrOp : ArmSVE_IntrOp<"zip.x4", /*overloadedOperands=*/[0], /*overloadedResults=*/[], /*numResults=*/4>, - Arguments<(ins Arg:$v1, - Arg:$v2, - Arg:$v3, - Arg:$v4)>; + Arguments<(ins Arg:$v1, + Arg:$v2, + Arg:$v3, + Arg:$v4)>; // Note: This intrinsic requires SME or SVE2.1. def PselIntrOp : ArmSVE_IntrOp<"psel", diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td index 7bcc3b9e799865..fad78a63444b91 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td @@ -387,9 +387,7 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [ BufferizableOpInterface, SameOperandsAndResultShape, SameOperandsAndResultElementType, - TypesMatchWith<"result type matches tensor equivalent of 'memref'", - "memref", "result", - "memref::getTensorTypeFromMemRefType($_self)"> + AllElementTypesMatch<["memref", "result"]> ]> { let summary = "create a tensor from a `memref`"; let description = [{ @@ -404,7 +402,7 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [ ```mlir // Produces a value of tensor<4x?xf32> type. - %t = bufferization.to_tensor %m : memref<4x?xf32, #layout, 0> + %t = bufferization.to_tensor %m : memref<4x?xf32, #layout, 0> to tensor<4x?xf32> ``` If the `writable` unit attribute is set, the produced tensor is considered @@ -427,7 +425,7 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [ Example: ``` - %t = bufferization.to_tensor %m restrict writable : memref<4xf32> + %t = bufferization.to_tensor %m restrict writable : memref<4xf32> to tensor<4xf32> // %t is writable, so the tensor.insert may bufferize in-place in the // absence of other conflicts. @@ -476,9 +474,16 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [ let assemblyFormat = [{ $memref (`restrict` $restrict^)? (`writable` $writable^)? attr-dict - `:` type($memref) + `:` type($memref) `to` type($result) }]; + let builders = [ + OpBuilder<(ins "Value":$memref, CArg<"bool", "false">:$restrict, CArg<"bool", "false">:$writeable), [{ + auto rtt = memref::getTensorTypeFromMemRefType(memref.getType()); + build($_builder, $_state, rtt, memref, restrict, writeable); + }]> + ]; + let hasCanonicalizer = 1; let hasFolder = 1; } @@ -493,9 +498,8 @@ def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref", [ SameOperandsAndResultShape, SameOperandsAndResultElementType, Pure, - TypesMatchWith<"type of 'tensor' is the tensor equivalent of 'memref'", - "memref", "tensor", - "memref::getTensorTypeFromMemRefType($_self)"> + AllShapesMatch<["memref", "tensor"]>, + AllElementTypesMatch<["memref", "tensor"]> ]> { let summary = "cast a tensor to memref"; let description = [{ @@ -503,7 +507,7 @@ def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref", [ ```mlir // Result type is memref<4x?xf32, #layout, 0> - %m = bufferization.to_memref %t : memref<4x?xf32, #layout, 0> + %m = bufferization.to_memref %t : tensor<4x?xf32> to memref<4x?xf32, #layout, 0> ``` This operation is a specialized variant of the built-in @@ -550,7 +554,7 @@ def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref", [ }]; let assemblyFormat = [{ - $tensor (`read_only` $read_only^)? attr-dict `:` type($memref) + $tensor (`read_only` $read_only^)? attr-dict `:` type($tensor) `to` type($memref) }]; let hasFolder = 1; diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td index 3e93f33ffe0fb4..3bcde8edde5098 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -250,8 +250,8 @@ def OptimizeAllocationLiveness let summary = "This pass optimizes the liveness of temp allocations in the " "input function"; let description = - [{This pass will find all operations that have a memory allocation effect. - It will search for the corresponding deallocation and move it right after + [{This pass will find all operations that have a memory allocation effect. + It will search for the corresponding deallocation and move it right after the last user of the allocation. This will optimize the liveness of the allocations. @@ -510,6 +510,10 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> { /*default=*/"false", "The memory space of an memref types must always be inferred. If " "unset, a default memory space of 0 is used otherwise.">, + Option<"useEncodingForMemorySpace", "use-encoding-for-memory-space", "bool", + /*default=*/"false", + "Use the Tensor encoding attribute for the memory space. Exclusive to" + " the 'must-infer-memory-space' option">, Option<"testAnalysisOnly", "test-analysis-only", "bool", /*default=*/"false", "Test only: Only run inplaceability analysis and annotate IR">, diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td index 1f52f6b91617c1..b39f2ee594cd4a 100644 --- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td +++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td @@ -255,7 +255,7 @@ def NVGPU_LdMatrixOp : NVGPU_Op<"ldmatrix", [ let arguments = (ins Arg]>:$srcMemref, Variadic:$indices, BoolAttr:$transpose, I32Attr:$numTiles); - let results = (outs AnyVector:$res); + let results = (outs AnyVectorOfNonZeroRank:$res); let assemblyFormat = [{ $srcMemref`[` $indices `]` attr-dict `:` type($srcMemref) `->` type($res) }]; @@ -301,13 +301,13 @@ def NVGPU_MmaSyncOp : NVGPU_MmaSyncOp<"mma.sync"> { (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf32>) -> vector<2x2xf32> ``` }]; - let arguments = (ins AnyVector:$matrixA, - AnyVector:$matrixB, - AnyVector:$matrixC, + let arguments = (ins AnyVectorOfNonZeroRank:$matrixA, + AnyVectorOfNonZeroRank:$matrixB, + AnyVectorOfNonZeroRank:$matrixC, I64ArrayAttr:$mmaShape, OptionalAttr:$tf32Enabled); - let results = (outs AnyVector:$res); + let results = (outs AnyVectorOfNonZeroRank:$res); let builders = [ OpBuilder<(ins "Value":$matrixA, @@ -357,16 +357,16 @@ def NVGPU_MmaSparseSyncOp : NVGPU_MmaSyncOp<"mma.sp.sync"> { ``` }]; - let arguments = (ins AnyVector:$matrixA, - AnyVector:$matrixB, - AnyVector:$matrixC, + let arguments = (ins AnyVectorOfNonZeroRank:$matrixA, + AnyVectorOfNonZeroRank:$matrixB, + AnyVectorOfNonZeroRank:$matrixC, NVGPU_MmaSparseSyncMetadataType:$sparseMetadata, I64ArrayAttr:$mmaShape, DefaultValuedAttr:$sparsitySelector, OptionalAttr:$tf32Enabled ); - let results = (outs AnyVector:$res); + let results = (outs AnyVectorOfNonZeroRank:$res); let builders = [ OpBuilder<(ins "Value":$matrixA, @@ -825,10 +825,10 @@ def NVGPU_RcpOp : NVGPU_Op<"rcp", [Pure, The input and output must be of the same vector type and shape. }]; - let arguments = (ins VectorOf<[F32]>:$in, + let arguments = (ins VectorOfNonZeroRankOf<[F32]>:$in, DefaultValuedAttr:$rounding, UnitAttr:$ftz); - let results = (outs VectorOf<[F32]>:$out); + let results = (outs VectorOfNonZeroRankOf<[F32]>:$out); let assemblyFormat = [{ $in `{` `rounding` `=` $rounding (`,` `ftz` $ftz^)? `}` attr-dict `:` type($out) diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td index 8ff7d0d63469fd..82d26e365fb243 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td @@ -131,7 +131,7 @@ class SPIRV_IntelSplitBarrierOp let results = (outs); let assemblyFormat = [{ - $execution_scope `,` $memory_scope `,` $memory_semantics attr-dict + $execution_scope $memory_scope $memory_semantics attr-dict }]; let hasVerifier = 0; @@ -160,7 +160,7 @@ def SPIRV_INTELControlBarrierArriveOp #### Example: ```mlir - spirv.ControlBarrierArrive , , + spirv.ControlBarrierArrive ``` }]; } @@ -194,7 +194,7 @@ def SPIRV_INTELControlBarrierWaitOp #### Example: ```mlir - spirv.ControlBarrierWait , , + spirv.ControlBarrierWait ``` }]; } diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td index a4b43d656fe43e..a6d3163d4446fa 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td @@ -166,7 +166,7 @@ def Tosa_Int32TensorUpto4D : AnyTypeOf<[ class Tosa_TypeLike types, string description = ""> : TypeConstraint.predicate, - VectorOf.predicate, + VectorOfNonZeroRankOf.predicate, TosaTensorOf.predicate]>, description>; diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index 41d7ce6610085c..88c1b94412241e 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -40,7 +40,7 @@ def Vector_ContractionOp : DeclareOpInterfaceMethods, DeclareOpInterfaceMethods ]>, - Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyType:$acc, + Arguments<(ins AnyVectorOfNonZeroRank:$lhs, AnyVectorOfNonZeroRank:$rhs, AnyType:$acc, ArrayAttr:$indexing_maps, Vector_IteratorTypeArrayAttr:$iterator_types, DefaultValuedAttr]>, Arguments<(ins Vector_CombiningKindAttr:$kind, - AnyVector:$source, + AnyVectorOfNonZeroRank:$source, AnyType:$acc, DenseI64ArrayAttr:$reduction_dims)>, Results<(outs AnyType:$dest)> { @@ -417,16 +417,18 @@ def Vector_BroadcastOp : let hasVerifier = 1; } -def Vector_ShuffleOp : - Vector_Op<"shuffle", [Pure, - PredOpTrait<"first operand v1 and result have same element type", - TCresVTEtIsSameAsOpBase<0, 0>>, - PredOpTrait<"second operand v2 and result have same element type", - TCresVTEtIsSameAsOpBase<0, 1>>, - InferTypeOpAdaptor]>, - Arguments<(ins AnyFixedVector:$v1, AnyFixedVector:$v2, - DenseI64ArrayAttr:$mask)>, - Results<(outs AnyVector:$vector)> { +def Vector_ShuffleOp + : Vector_Op< + "shuffle", + [Pure, + PredOpTrait<"first operand v1 and result have same element type", + TCresVTEtIsSameAsOpBase<0, 0>>, + PredOpTrait<"second operand v2 and result have same element type", + TCresVTEtIsSameAsOpBase<0, 1>>, + InferTypeOpAdaptor]>, + Arguments<(ins AnyFixedVectorOfAnyRank:$v1, AnyFixedVectorOfAnyRank:$v2, + DenseI64ArrayAttr:$mask)>, + Results<(outs AnyVectorOfNonZeroRank:$vector)> { let summary = "shuffle operation"; let description = [{ The shuffle operation constructs a permutation (or duplication) of elements @@ -531,7 +533,7 @@ def Vector_InterleaveOp : }]; let arguments = (ins AnyVectorOfAnyRank:$lhs, AnyVectorOfAnyRank:$rhs); - let results = (outs AnyVector:$result); + let results = (outs AnyVectorOfNonZeroRank:$result); let assemblyFormat = [{ $lhs `,` $rhs attr-dict `:` type($lhs) `->` type($result) @@ -610,8 +612,8 @@ def Vector_DeinterleaveOp : ``` }]; - let arguments = (ins AnyVector:$source); - let results = (outs AnyVector:$res1, AnyVector:$res2); + let arguments = (ins AnyVectorOfNonZeroRank:$source); + let results = (outs AnyVectorOfNonZeroRank:$res1, AnyVectorOfNonZeroRank:$res2); let assemblyFormat = [{ $source attr-dict `:` type($source) `->` type($res1) @@ -1048,9 +1050,9 @@ def Vector_InsertStridedSliceOp : PredOpTrait<"operand #0 and result have same element type", TCresVTEtIsSameAsOpBase<0, 0>>, AllTypesMatch<["dest", "res"]>]>, - Arguments<(ins AnyVector:$source, AnyVector:$dest, I64ArrayAttr:$offsets, + Arguments<(ins AnyVectorOfNonZeroRank:$source, AnyVectorOfNonZeroRank:$dest, I64ArrayAttr:$offsets, I64ArrayAttr:$strides)>, - Results<(outs AnyVector:$res)> { + Results<(outs AnyVectorOfNonZeroRank:$res)> { let summary = "strided_slice operation"; let description = [{ Takes a k-D source vector, an n-D destination vector (n >= k), n-sized @@ -1107,10 +1109,10 @@ def Vector_OuterProductOp : PredOpTrait<"rhs operand and result have same element type", TCresVTEtIsSameAsOpBase<0, 1>>, DeclareOpInterfaceMethods]>, - Arguments<(ins AnyVector:$lhs, AnyType:$rhs, - Optional:$acc, + Arguments<(ins AnyVectorOfNonZeroRank:$lhs, AnyType:$rhs, + Optional:$acc, DefaultValuedAttr:$kind)>, - Results<(outs AnyVector)> { + Results<(outs AnyVectorOfNonZeroRank)> { let summary = "vector outerproduct with optional fused add"; let description = [{ Takes 2 1-D vectors and returns the 2-D vector containing the outer-product, @@ -1190,9 +1192,9 @@ def Vector_ExtractStridedSliceOp : Vector_Op<"extract_strided_slice", [Pure, PredOpTrait<"operand and result have same element type", TCresVTEtIsSameAsOpBase<0, 0>>]>, - Arguments<(ins AnyVector:$vector, I64ArrayAttr:$offsets, + Arguments<(ins AnyVectorOfNonZeroRank:$vector, I64ArrayAttr:$offsets, I64ArrayAttr:$sizes, I64ArrayAttr:$strides)>, - Results<(outs AnyVector)> { + Results<(outs AnyVectorOfNonZeroRank)> { let summary = "extract_strided_slice operation"; let description = [{ Takes an n-D vector, k-D `offsets` integer array attribute, a k-sized @@ -1254,7 +1256,7 @@ def Vector_TransferReadOp : Variadic:$indices, AffineMapAttr:$permutation_map, AnyType:$padding, - Optional>:$mask, + Optional>:$mask, BoolArrayAttr:$in_bounds)>, Results<(outs AnyVectorOfAnyRank:$vector)> { @@ -1502,7 +1504,7 @@ def Vector_TransferWriteOp : AnyShaped:$source, Variadic:$indices, AffineMapAttr:$permutation_map, - Optional>:$mask, + Optional>:$mask, BoolArrayAttr:$in_bounds)>, Results<(outs Optional:$result)> { @@ -1825,9 +1827,9 @@ def Vector_MaskedLoadOp : Vector_Op<"maskedload">, Arguments<(ins Arg:$base, Variadic:$indices, - VectorOf<[I1]>:$mask, - AnyVector:$pass_thru)>, - Results<(outs AnyVector:$result)> { + VectorOfNonZeroRankOf<[I1]>:$mask, + AnyVectorOfNonZeroRank:$pass_thru)>, + Results<(outs AnyVectorOfNonZeroRank:$result)> { let summary = "loads elements from memory into a vector as defined by a mask vector"; @@ -1888,8 +1890,8 @@ def Vector_MaskedStoreOp : Vector_Op<"maskedstore">, Arguments<(ins Arg:$base, Variadic:$indices, - VectorOf<[I1]>:$mask, - AnyVector:$valueToStore)> { + VectorOfNonZeroRankOf<[I1]>:$mask, + AnyVectorOfNonZeroRank:$valueToStore)> { let summary = "stores elements from a vector into memory as defined by a mask vector"; @@ -1951,10 +1953,10 @@ def Vector_GatherOp : ]>, Arguments<(ins Arg:$base, Variadic:$indices, - VectorOf<[AnyInteger, Index]>:$index_vec, - VectorOf<[I1]>:$mask, - AnyVector:$pass_thru)>, - Results<(outs AnyVector:$result)> { + VectorOfNonZeroRankOf<[AnyInteger, Index]>:$index_vec, + VectorOfNonZeroRankOf<[I1]>:$mask, + AnyVectorOfNonZeroRank:$pass_thru)>, + Results<(outs AnyVectorOfNonZeroRank:$result)> { let summary = [{ gathers elements from memory or ranked tensor into a vector as defined by an @@ -2082,9 +2084,9 @@ def Vector_ExpandLoadOp : Vector_Op<"expandload">, Arguments<(ins Arg:$base, Variadic:$indices, - VectorOf<[I1]>:$mask, - AnyVector:$pass_thru)>, - Results<(outs AnyVector:$result)> { + VectorOfNonZeroRankOf<[I1]>:$mask, + AnyVectorOfNonZeroRank:$pass_thru)>, + Results<(outs AnyVectorOfNonZeroRank:$result)> { let summary = "reads elements from memory and spreads them into a vector as defined by a mask"; @@ -2149,8 +2151,8 @@ def Vector_CompressStoreOp : Vector_Op<"compressstore">, Arguments<(ins Arg:$base, Variadic:$indices, - VectorOf<[I1]>:$mask, - AnyVector:$valueToStore)> { + VectorOfNonZeroRankOf<[I1]>:$mask, + AnyVectorOfNonZeroRank:$valueToStore)> { let summary = "writes elements selectively from a vector as defined by a mask"; @@ -2508,7 +2510,7 @@ def Vector_MaskOp : Vector_Op<"mask", [ }]; // TODO: Support multiple passthru values. - let arguments = (ins VectorOf<[I1]>:$mask, + let arguments = (ins VectorOfNonZeroRankOf<[I1]>:$mask, Optional:$passthru); let results = (outs Variadic:$results); let regions = (region SizedRegion<1>:$maskRegion); @@ -2891,11 +2893,11 @@ def Vector_ScanOp : AllTypesMatch<["source", "dest"]>, AllTypesMatch<["initial_value", "accumulated_value"]> ]>, Arguments<(ins Vector_CombiningKindAttr:$kind, - AnyVector:$source, + AnyVectorOfNonZeroRank:$source, AnyVectorOfAnyRank:$initial_value, I64Attr:$reduction_dim, BoolAttr:$inclusive)>, - Results<(outs AnyVector:$dest, + Results<(outs AnyVectorOfNonZeroRank:$dest, AnyVectorOfAnyRank:$accumulated_value)> { let summary = "Scan operation"; let description = [{ diff --git a/mlir/include/mlir/IR/CommonTypeConstraints.td b/mlir/include/mlir/IR/CommonTypeConstraints.td index 48e4c24f838652..fc4383d08422cb 100644 --- a/mlir/include/mlir/IR/CommonTypeConstraints.td +++ b/mlir/include/mlir/IR/CommonTypeConstraints.td @@ -22,15 +22,15 @@ include "mlir/IR/DialectBase.td" // Whether a type is a VectorType. // Explicitly disallow 0-D vectors for now until we have good enough coverage. -def IsVectorTypePred : And<[CPred<"::llvm::isa<::mlir::VectorType>($_self)">, - CPred<"::llvm::cast<::mlir::VectorType>($_self).getRank() > 0">]>; +def IsVectorOfNonZeroRankTypePred : And<[CPred<"::llvm::isa<::mlir::VectorType>($_self)">, + CPred<"::llvm::cast<::mlir::VectorType>($_self).getRank() > 0">]>; // Temporary vector type clone that allows gradual transition to 0-D vectors. // TODO: Remove this when all ops support 0-D vectors. def IsVectorOfAnyRankTypePred : CPred<"::llvm::isa<::mlir::VectorType>($_self)">; // Whether a type is a fixed-length VectorType. -def IsFixedVectorTypePred : CPred<[{::llvm::isa<::mlir::VectorType>($_self) && +def IsFixedVectorOfAnyRankTypePred : CPred<[{::llvm::isa<::mlir::VectorType>($_self) && !::llvm::cast($_self).isScalable()}]>; // Whether a type is a scalable VectorType. @@ -53,7 +53,7 @@ def IsVectorTypeWithOnlyTrailingDimScalablePred : And<[ // Whether a type is a VectorType and all dimensions are scalable. def IsVectorTypeWithAllDimsScalablePred : And<[ - IsVectorTypePred, + IsVectorOfNonZeroRankTypePred, CPred<[{::llvm::cast<::mlir::VectorType>($_self).allDimsScalable()}]> ]>; @@ -428,8 +428,8 @@ class ValueSemanticsContainerOf allowedTypes> : // Vector types. -class VectorOf allowedTypes> : - ShapedContainerType allowedTypes> : + ShapedContainerType; // Temporary vector type clone that allows gradual transition to 0-D vectors. @@ -438,11 +438,11 @@ class VectorOfAnyRankOf allowedTypes> : ShapedContainerType; -class FixedVectorOf allowedTypes> : - ShapedContainerType allowedTypes> : + ShapedContainerType; -class ScalableVectorOf allowedTypes> : +class ScalableVectorOfAnyRank allowedTypes> : ShapedContainerType; @@ -458,7 +458,7 @@ class VectorWithTrailingDimScalableOf allowedTypes> : // Whether the number of elements of a vector is from the given // `allowedRanks` list class IsVectorOfRankPred allowedRanks> : - And<[IsVectorTypePred, + And<[IsVectorOfNonZeroRankTypePred, Or($_self).getRank() == }] @@ -467,7 +467,7 @@ class IsVectorOfRankPred allowedRanks> : // Whether the number of elements of a fixed-length vector is from the given // `allowedRanks` list class IsFixedVectorOfRankPred allowedRanks> : - And<[IsFixedVectorTypePred, + And<[IsFixedVectorOfAnyRankTypePred, Or($_self).getRank() == }] @@ -501,22 +501,22 @@ class ScalableVectorOfRank allowedRanks> : Type< // is from the given `allowedTypes` list class VectorOfRankAndType allowedRanks, list allowedTypes> : AllOfType< - [VectorOf, VectorOfRank], - VectorOf.summary # VectorOfRank.summary, + [VectorOfNonZeroRankOf, VectorOfRank], + VectorOfNonZeroRankOf.summary # VectorOfRank.summary, "::mlir::VectorType">; // Fixed-width vector where the rank is from the given `allowedRanks` list and // the type is from the given `allowedTypes` list class FixedVectorOfRankAndType allowedRanks, list allowedTypes> : AllOfType< - [FixedVectorOf, VectorOfRank], - FixedVectorOf.summary # VectorOfRank.summary, + [FixedVectorOfAnyRank, VectorOfRank], + FixedVectorOfAnyRank.summary # VectorOfRank.summary, "::mlir::VectorType">; // Whether the number of elements of a vector is from the given // `allowedLengths` list class IsVectorOfLengthPred allowedLengths> : - And<[IsVectorTypePred, + And<[IsVectorOfNonZeroRankTypePred, Or($_self).getNumElements() == }] @@ -525,7 +525,7 @@ class IsVectorOfLengthPred allowedLengths> : // Whether the number of elements of a fixed-length vector is from the given // `allowedLengths` list class IsFixedVectorOfLengthPred allowedLengths> : - And<[IsFixedVectorTypePred, + And<[IsFixedVectorOfAnyRankTypePred, Or($_self).getNumElements() == }] @@ -604,16 +604,16 @@ class ScalableVectorOfLength allowedLengths> : Type< // list class VectorOfLengthAndType allowedLengths, list allowedTypes> : AllOfType< - [VectorOf, VectorOfLength], - VectorOf.summary # VectorOfLength.summary, + [VectorOfNonZeroRankOf, VectorOfLength], + VectorOfNonZeroRankOf.summary # VectorOfLength.summary, "::mlir::VectorType">; // Any fixed-length vector where the number of elements is from the given // `allowedLengths` list and the type is from the given `allowedTypes` list class FixedVectorOfLengthAndType allowedLengths, list allowedTypes> : AllOfType< - [FixedVectorOf, FixedVectorOfLength], - FixedVectorOf.summary # + [FixedVectorOfAnyRank, FixedVectorOfLength], + FixedVectorOfAnyRank.summary # FixedVectorOfLength.summary, "::mlir::VectorType">; @@ -621,8 +621,8 @@ class FixedVectorOfLengthAndType allowedLengths, // `allowedLengths` list and the type is from the given `allowedTypes` list class ScalableVectorOfLengthAndType allowedLengths, list allowedTypes> : AllOfType< - [ScalableVectorOf, ScalableVectorOfLength], - ScalableVectorOf.summary # + [ScalableVectorOfAnyRank, ScalableVectorOfLength], + ScalableVectorOfAnyRank.summary # ScalableVectorOfLength.summary, "::mlir::VectorType">; @@ -632,10 +632,10 @@ class ScalableVectorOfLengthAndType allowedLengths, class ScalableVectorOfRankAndLengthAndType allowedRanks, list allowedLengths, list allowedTypes> : AllOfType< - [ScalableVectorOfRank, ScalableVectorOf, + [ScalableVectorOfRank, ScalableVectorOfAnyRank, ScalableVectorOfLength], ScalableVectorOfRank.summary # - ScalableVectorOf.summary # + ScalableVectorOfAnyRank.summary # ScalableVectorOfLength.summary, "::mlir::VectorType">; @@ -657,13 +657,14 @@ class VectorWithTrailingDimScalableOfSizeAndType allowedTrailingSizes, ShapedTypeWithNthDimOfSize<-1, allowedTrailingSizes>.summary, "::mlir::VectorType">; -def AnyVector : VectorOf<[AnyType]>; -// Temporary vector type clone that allows gradual transition to 0-D vectors. +// Unlike the following definitions, this one excludes 0-D vectors +def AnyVectorOfNonZeroRank : VectorOfNonZeroRankOf<[AnyType]>; + def AnyVectorOfAnyRank : VectorOfAnyRankOf<[AnyType]>; -def AnyFixedVector : FixedVectorOf<[AnyType]>; +def AnyFixedVectorOfAnyRank : FixedVectorOfAnyRank<[AnyType]>; -def AnyScalableVector : ScalableVectorOf<[AnyType]>; +def AnyScalableVectorOfAnyRank : ScalableVectorOfAnyRank<[AnyType]>; // Shaped types. diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 5291f95d371442..cdbcd3013e139a 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -701,9 +701,9 @@ computeTargetSize(PatternRewriter &rewriter, Location loc, IndexPool &indexPool, // Filter operands with dynamic dimension auto operandsWithDynamicDim = - llvm::to_vector(llvm::make_filter_range(operands, [&](Value operand) { + llvm::filter_to_vector(operands, [&](Value operand) { return cast(operand.getType()).isDynamicDim(dim); - })); + }); // If no operand has a dynamic dimension, it means all sizes were 1 if (operandsWithDynamicDim.empty()) diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index 47766f36ad05cf..16b4e8eb4f022c 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -2322,7 +2322,8 @@ OpFoldResult arith::SelectOp::fold(FoldAdaptor adaptor) { return trueVal; // select %x, true, false => %x - if (getType().isInteger(1) && matchPattern(adaptor.getTrueValue(), m_One()) && + if (getType().isSignlessInteger(1) && + matchPattern(adaptor.getTrueValue(), m_One()) && matchPattern(adaptor.getFalseValue(), m_Zero())) return condition; diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index 85604eef2f2830..065739ea8e5951 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -718,7 +718,7 @@ void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter, // loose all of its users and eventually DCE away. rewriter.setInsertionPointAfter(op); replacement = rewriter.create( - replacement.getLoc(), replacement); + replacement.getLoc(), opResult.getType(), replacement); } replacements.push_back(replacement); } diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index 62ce2583f4fa1d..c6a0320d24b5eb 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -69,7 +69,7 @@ BufferizeTypeConverter::BufferizeTypeConverter() { if (auto inputType = dyn_cast(inputs[0].getType())) { // MemRef to MemRef cast. assert(inputType != type && "expected different types"); - // Unranked to ranked and ranked to unranked casts must be explicit. + // Ranked to unranked casts must be explicit. auto rankedDestType = dyn_cast(type); if (!rankedDestType) return nullptr; @@ -147,12 +147,31 @@ struct OneShotBufferizePass opt.dumpAliasSets = dumpAliasSets; opt.setFunctionBoundaryTypeConversion( parseLayoutMapOption(functionBoundaryTypeConversion)); + + if (mustInferMemorySpace && useEncodingForMemorySpace) { + emitError(getOperation()->getLoc()) + << "only one of 'must-infer-memory-space' and " + "'use-encoding-for-memory-space' are allowed in " + << getArgument(); + return signalPassFailure(); + } + if (mustInferMemorySpace) { opt.defaultMemorySpaceFn = [](TensorType t) -> std::optional { return std::nullopt; }; } + + if (useEncodingForMemorySpace) { + opt.defaultMemorySpaceFn = + [](TensorType t) -> std::optional { + if (auto rtt = dyn_cast(t)) + return rtt.getEncoding(); + return std::nullopt; + }; + } + opt.printConflicts = printConflicts; opt.bufferAlignment = bufferAlignment; opt.testAnalysisOnly = testAnalysisOnly; diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp index a59900745d026e..ca1277c09323b8 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp @@ -99,10 +99,9 @@ SmallVector mlir::LLVM::MemsetInlineOp::getAccessedOperands() { } SmallVector mlir::LLVM::CallOp::getAccessedOperands() { - return llvm::to_vector( - llvm::make_filter_range(getArgOperands(), [](Value arg) { - return isa(arg.getType()); - })); + return llvm::filter_to_vector(getArgOperands(), [](Value arg) { + return isa(arg.getType()); + }); } #include "mlir/Dialect/LLVMIR/LLVMInterfaces.cpp.inc" diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 31f37334ce3978..61bab2ed675307 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -375,10 +375,8 @@ static void calculateTileOffsetsAndSizes( b.setInsertionPointToStart(forallOp.getBody(0)); SmallVector threadIds = forallOp.getInductionVars(); - SmallVector nonZeroNumThreads = - llvm::to_vector(llvm::make_filter_range(numThreads, [](OpFoldResult ofr) { - return !isConstantIntValue(ofr, 0); - })); + SmallVector nonZeroNumThreads = llvm::filter_to_vector( + numThreads, [](OpFoldResult ofr) { return !isConstantIntValue(ofr, 0); }); int64_t nLoops = loopRanges.size(); tiledOffsets.reserve(nLoops); tiledSizes.reserve(nLoops); @@ -656,10 +654,8 @@ FailureOr linalg::tileReductionUsingForall( Operation *tiledOp = nullptr; - SmallVector nonZeroNumThreads = - llvm::to_vector(llvm::make_filter_range(numThreads, [](OpFoldResult ofr) { - return !isConstantIntValue(ofr, 0); - })); + SmallVector nonZeroNumThreads = llvm::filter_to_vector( + numThreads, [](OpFoldResult ofr) { return !isConstantIntValue(ofr, 0); }); SmallVector materializedNonZeroNumThreads = getValueOrCreateConstantIndexOp(b, loc, nonZeroNumThreads); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index c3e176299317ef..eeaa70c0b65892 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -1090,8 +1090,8 @@ getPackUnpackNormalizedPerm(int rank, ArrayRef perm) { SmallVector vec(rank, kNonTiledMarker); for (auto [index, value] : llvm::enumerate(perm)) vec[value] = index; - SmallVector normalizedPerm = llvm::to_vector(llvm::make_filter_range( - vec, [&](int64_t v) { return v != kNonTiledMarker; })); + SmallVector normalizedPerm = llvm::filter_to_vector( + vec, [&](int64_t v) { return v != kNonTiledMarker; }); // This inverts the permutation in addition to normalizing so invert back. return invertPermutationVector(normalizedPerm); } diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp index 779c41a22e9ee2..e9d7dc1b847c61 100644 --- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp @@ -203,7 +203,8 @@ struct ExecuteRegionOpInterface for (const auto &it : llvm::enumerate(executeRegionOp->getResultTypes())) { if (isa(it.value())) { newResults.push_back(rewriter.create( - executeRegionOp.getLoc(), newOp->getResult(it.index()))); + executeRegionOp.getLoc(), it.value(), + newOp->getResult(it.index()))); } else { newResults.push_back(newOp->getResult(it.index())); } @@ -485,15 +486,17 @@ getBuffers(RewriterBase &rewriter, const MutableOperandRange &operands, /// ToTensorOps, so that the block body can be moved over to the new op. static SmallVector getBbArgReplacements(RewriterBase &rewriter, Block::BlockArgListType bbArgs, + Block::BlockArgListType oldBbArgs, const DenseSet &tensorIndices) { SmallVector result; for (const auto &it : llvm::enumerate(bbArgs)) { size_t idx = it.index(); Value val = it.value(); if (tensorIndices.contains(idx)) { - result.push_back( - rewriter.create(val.getLoc(), val) - .getResult()); + result.push_back(rewriter + .create( + val.getLoc(), oldBbArgs[idx].getType(), val) + .getResult()); } else { result.push_back(val); } @@ -763,7 +766,8 @@ struct ForOpInterface // iter_args of the new loop in ToTensorOps. rewriter.setInsertionPointToStart(loopBody); SmallVector iterArgs = - getBbArgReplacements(rewriter, newForOp.getRegionIterArgs(), indices); + getBbArgReplacements(rewriter, newForOp.getRegionIterArgs(), + forOp.getRegionIterArgs(), indices); iterArgs.insert(iterArgs.begin(), newForOp.getInductionVar()); // Move loop body to new loop. @@ -1000,16 +1004,18 @@ struct WhileOpInterface // The old block uses tensors, so wrap the (memref) bbArgs of the new block // in ToTensorOps. rewriter.setInsertionPointToStart(newBeforeBody); - SmallVector newBeforeArgs = getBbArgReplacements( - rewriter, newWhileOp.getBeforeArguments(), indicesBefore); + SmallVector newBeforeArgs = + getBbArgReplacements(rewriter, newWhileOp.getBeforeArguments(), + whileOp.getBeforeArguments(), indicesBefore); rewriter.mergeBlocks(whileOp.getBeforeBody(), newBeforeBody, newBeforeArgs); // Set up new iter_args and move the loop body block to the new op. // The old block uses tensors, so wrap the (memref) bbArgs of the new block // in ToTensorOps. rewriter.setInsertionPointToStart(newAfterBody); - SmallVector newAfterArgs = getBbArgReplacements( - rewriter, newWhileOp.getAfterArguments(), indicesAfter); + SmallVector newAfterArgs = + getBbArgReplacements(rewriter, newWhileOp.getAfterArguments(), + whileOp.getAfterArguments(), indicesAfter); rewriter.mergeBlocks(whileOp.getAfterBody(), newAfterBody, newAfterArgs); // Replace loop results. @@ -1255,8 +1261,8 @@ struct ForallOpInterface forallOp.getBody()->getArguments().drop_front(rank), buffers)) { BlockArgument bbArg = std::get<0>(it); Value buffer = std::get<1>(it); - Value bufferAsTensor = - rewriter.create(forallOp.getLoc(), buffer); + Value bufferAsTensor = rewriter.create( + forallOp.getLoc(), bbArg.getType(), buffer); bbArg.replaceAllUsesWith(bufferAsTensor); } diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index 8eb8e579954faa..bebfaa8c1ea822 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -695,8 +695,8 @@ struct RemoveEmptyShapeOperandsPattern : public OpRewritePattern { } return true; }; - auto newOperands = llvm::to_vector<8>( - llvm::make_filter_range(op->getOperands(), isPotentiallyNonEmptyShape)); + auto newOperands = llvm::filter_to_vector<8>(op->getOperands(), + isPotentiallyNonEmptyShape); // Reduce op to equivalent without empty shape operands. if (newOperands.size() < op.getNumOperands()) { diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp index c2b8614148bf25..9797b73f534a96 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp @@ -480,10 +480,6 @@ struct FromElementsOpInterface auto fromElementsOp = cast(op); auto tensorType = cast(fromElementsOp.getType()); - // TODO: Implement memory space for this op. - if (options.defaultMemorySpaceFn(tensorType) != Attribute()) - return op->emitError("memory space not implemented yet"); - // Allocate a buffer for the result. Location loc = op->getLoc(); auto shape = tensorType.getShape(); @@ -493,10 +489,12 @@ struct FromElementsOpInterface /*copy=*/false); if (failed(tensorAlloc)) return failure(); - auto memrefType = - MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + FailureOr memrefType = + bufferization::getBufferType(*tensorAlloc, options); + if (failed(memrefType)) + return failure(); Value buffer = rewriter.create( - op->getLoc(), memrefType, *tensorAlloc); + op->getLoc(), *memrefType, *tensorAlloc); // Case: tensor<0xelem_type>. if (fromElementsOp.getElements().empty()) { diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index 3272ece65ba531..fe0fee0f8db2ce 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -1309,10 +1309,10 @@ LogicalResult OpTrait::impl::verifyNoRegionArguments(Operation *op) { LogicalResult OpTrait::impl::verifyElementwise(Operation *op) { auto isMappableType = llvm::IsaPred; - auto resultMappableTypes = llvm::to_vector<1>( - llvm::make_filter_range(op->getResultTypes(), isMappableType)); - auto operandMappableTypes = llvm::to_vector<2>( - llvm::make_filter_range(op->getOperandTypes(), isMappableType)); + auto resultMappableTypes = + llvm::filter_to_vector<1>(op->getResultTypes(), isMappableType); + auto operandMappableTypes = + llvm::filter_to_vector<2>(op->getOperandTypes(), isMappableType); // If the op only has scalar operand/result types, then we have nothing to // check. diff --git a/mlir/lib/IR/TypeUtilities.cpp b/mlir/lib/IR/TypeUtilities.cpp index e569d440ca95c4..ec646cad841ae5 100644 --- a/mlir/lib/IR/TypeUtilities.cpp +++ b/mlir/lib/IR/TypeUtilities.cpp @@ -141,8 +141,8 @@ LogicalResult mlir::verifyCompatibleShapes(TypeRange types) { } // Remove all unranked shapes - auto shapes = llvm::to_vector<8>(llvm::make_filter_range( - shapedTypes, [](auto shapedType) { return shapedType.hasRank(); })); + auto shapes = llvm::filter_to_vector<8>( + shapedTypes, [](auto shapedType) { return shapedType.hasRank(); }); if (shapes.empty()) return success(); diff --git a/mlir/lib/Interfaces/DataLayoutInterfaces.cpp b/mlir/lib/Interfaces/DataLayoutInterfaces.cpp index 9469780129d644..1c661e3beea48e 100644 --- a/mlir/lib/Interfaces/DataLayoutInterfaces.cpp +++ b/mlir/lib/Interfaces/DataLayoutInterfaces.cpp @@ -304,11 +304,11 @@ mlir::detail::getDevicePropertyValue(DataLayoutEntryInterface entry) { DataLayoutEntryList mlir::detail::filterEntriesForType(DataLayoutEntryListRef entries, TypeID typeID) { - return llvm::to_vector<4>(llvm::make_filter_range( + return llvm::filter_to_vector<4>( entries, [typeID](DataLayoutEntryInterface entry) { auto type = llvm::dyn_cast_if_present(entry.getKey()); return type && type.getTypeID() == typeID; - })); + }); } DataLayoutEntryInterface @@ -393,9 +393,9 @@ static DataLayoutSpecInterface getCombinedDataLayout(Operation *leaf) { // Create the list of non-null specs (null/missing specs can be safely // ignored) from the outermost to the innermost. - auto nonNullSpecs = llvm::to_vector<2>(llvm::make_filter_range( + auto nonNullSpecs = llvm::filter_to_vector<2>( llvm::reverse(specs), - [](DataLayoutSpecInterface iface) { return iface != nullptr; })); + [](DataLayoutSpecInterface iface) { return iface != nullptr; }); // Combine the specs using the innermost as anchor. if (DataLayoutSpecInterface current = getSpec(leaf)) diff --git a/mlir/test/Conversion/SPIRVToLLVM/barrier-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/barrier-ops-to-llvm.mlir index a5cae67a3d5c5d..359aa350ac90c6 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/barrier-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/barrier-ops-to-llvm.mlir @@ -37,12 +37,12 @@ spirv.func @split_barrier() "None" { // CHECK: [[MEMORY:%.*]] = llvm.mlir.constant(2 : i32) : i32 // CHECK: [[SEMANTICS:%.*]] = llvm.mlir.constant(768 : i32) : i32 // CHECK: llvm.call spir_funccc @_Z33__spirv_ControlBarrierArriveINTELiii([[EXECUTION]], [[MEMORY]], [[SEMANTICS]]) {convergent, no_unwind, will_return} : (i32, i32, i32) -> () - spirv.INTEL.ControlBarrierArrive , , + spirv.INTEL.ControlBarrierArrive // CHECK: [[EXECUTION:%.*]] = llvm.mlir.constant(2 : i32) : i32 // CHECK: [[MEMORY:%.*]] = llvm.mlir.constant(2 : i32) : i32 // CHECK: [[SEMANTICS:%.*]] = llvm.mlir.constant(256 : i32) : i32 // CHECK: llvm.call spir_funccc @_Z31__spirv_ControlBarrierWaitINTELiii([[EXECUTION]], [[MEMORY]], [[SEMANTICS]]) {convergent, no_unwind, will_return} : (i32, i32, i32) -> () - spirv.INTEL.ControlBarrierWait , , + spirv.INTEL.ControlBarrierWait spirv.Return } diff --git a/mlir/test/Dialect/Affine/loop-fusion-4.mlir b/mlir/test/Dialect/Affine/loop-fusion-4.mlir index f46ad0f5e4c232..ea144f73bb21c6 100644 --- a/mlir/test/Dialect/Affine/loop-fusion-4.mlir +++ b/mlir/test/Dialect/Affine/loop-fusion-4.mlir @@ -242,7 +242,7 @@ module { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): tensor.yield %cst_f32 : f32 } : tensor<1x32x32x8xf32> to tensor<1x40x8229x8xf32> - %1 = bufferization.to_memref %padded : memref<1x40x8229x8xf32> + %1 = bufferization.to_memref %padded : tensor<1x40x8229x8xf32> to memref<1x40x8229x8xf32> %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref<1x32x32x8xf32> affine.for %arg1 = 0 to 1 { affine.for %arg2 = 0 to 32 { @@ -280,7 +280,7 @@ module { // SPIRV-NOT: affine.for %{{.*}} // SPIRV: ReturnValue - %2 = bufferization.to_tensor %alloc_1 : memref<1x32x32x8xf32> + %2 = bufferization.to_tensor %alloc_1 : memref<1x32x32x8xf32> to tensor<1x32x32x8xf32> %3 = builtin.unrealized_conversion_cast %2 : tensor<1x32x32x8xf32> to !spirv.array<8192 x f32> spirv.ReturnValue %3 : !spirv.array<8192 x f32> } diff --git a/mlir/test/Dialect/Arith/bufferize.mlir b/mlir/test/Dialect/Arith/bufferize.mlir index a3b1454fb68f66..0b7838e1471d3d 100644 --- a/mlir/test/Dialect/Arith/bufferize.mlir +++ b/mlir/test/Dialect/Arith/bufferize.mlir @@ -7,7 +7,7 @@ func.func @index_cast(%tensor: tensor, %scalar: i32) -> (tensor, ind %index_scalar = arith.index_cast %scalar : i32 to index return %index_tensor, %index_scalar : tensor, index } -// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref +// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : tensor // CHECK-NEXT: %[[INDEX_MEMREF:.*]] = arith.index_cast %[[MEMREF]] // CHECK-SAME: memref to memref // CHECK-NEXT: %[[INDEX_TENSOR:.*]] = bufferization.to_tensor %[[INDEX_MEMREF]] @@ -83,8 +83,8 @@ func.func @non_tensor() { // CHECK-SAME: %[[PRED:.*]]: i1, // CHECK-SAME: %[[TRUE_VAL:.*]]: tensor, // CHECK-SAME: %[[FALSE_VAL:.*]]: tensor) -> tensor { -// CHECK-DAG: %[[TRUE_VAL_MEMREF:.*]] = bufferization.to_memref %[[TRUE_VAL]] : memref -// CHECK-DAG: %[[FALSE_VAL_MEMREF:.*]] = bufferization.to_memref %[[FALSE_VAL]] : memref +// CHECK-DAG: %[[TRUE_VAL_MEMREF:.*]] = bufferization.to_memref %[[TRUE_VAL]] : tensor +// CHECK-DAG: %[[FALSE_VAL_MEMREF:.*]] = bufferization.to_memref %[[FALSE_VAL]] : tensor // CHECK: %[[RET_MEMREF:.*]] = arith.select %[[PRED]], %[[TRUE_VAL_MEMREF]], %[[FALSE_VAL_MEMREF]] : memref // CHECK: %[[RET:.*]] = bufferization.to_tensor %[[RET_MEMREF]] : memref // CHECK: return %[[RET]] : tensor diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index f56bf0980b13c1..1d4d5fc6f8319a 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -54,6 +54,18 @@ func.func @select_extui_i1(%arg0: i1) -> i1 { return %res : i1 } +// CHECK-LABEL: @select_no_fold_ui1 +// CHECK: %[[CONST_0:.+]] = "test.constant"() <{value = 0 : i32}> : () -> ui1 +// CHECK: %[[CONST_1:.+]] = "test.constant"() <{value = 1 : i32}> : () -> ui1 +// CHECK-NEXT: %[[RES:.+]] = arith.select %arg0, %[[CONST_1]], %[[CONST_0]] : ui1 +// CHECK-NEXT: return %[[RES]] +func.func @select_no_fold_ui1(%arg0: i1) -> ui1 { + %c0_i1 = "test.constant"() {value = 0 : i32} : () -> ui1 + %c1_i1 = "test.constant"() {value = 1 : i32} : () -> ui1 + %res = arith.select %arg0, %c1_i1, %c0_i1 : ui1 + return %res : ui1 +} + // CHECK-LABEL: @select_cst_false_scalar // CHECK-SAME: (%[[ARG0:.+]]: i32, %[[ARG1:.+]]: i32) // CHECK-NEXT: return %[[ARG1]] diff --git a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-other.mlir b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-other.mlir index 5293977fe733f5..5d0657eb38baa6 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-other.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-other.mlir @@ -9,7 +9,7 @@ // CHECK-NEXT: %[[clone:.*]] = bufferization.clone %[[m]] // CHECK-NEXT: return %[[clone]] func.func private @no_interface_no_operands(%t : tensor) -> memref { - %0 = bufferization.to_memref %t : memref + %0 = bufferization.to_memref %t : tensor to memref return %0 : memref } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir index c3e44c426797f3..7d429e48401144 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir @@ -96,7 +96,7 @@ func.func @to_memref_not_read_only(%idx : index, %f: f32) -> f32 { // Some op may write into the result of to_memref later. // CHECK: bufferization.to_memref // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} - %m = bufferization.to_memref %t : memref<5xf32> + %m = bufferization.to_memref %t : tensor<5xf32> to memref<5xf32> %2 = tensor.extract %t[%idx] : tensor<5xf32> return %2 : f32 } @@ -112,7 +112,7 @@ func.func @to_memref_read_only(%idx : index, %f: f32) -> f32 { // Some op may write into the result of to_memref later. // CHECK: bufferization.to_memref // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - %m = bufferization.to_memref %t {read_only} : memref<5xf32> + %m = bufferization.to_memref %t {read_only} : tensor<5xf32> to memref<5xf32> %2 = tensor.extract %t[%idx] : tensor<5xf32> return %2 : f32 } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-encodings.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-encodings.mlir new file mode 100644 index 00000000000000..c26f1681e4d96b --- /dev/null +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-encodings.mlir @@ -0,0 +1,111 @@ +// RUN: mlir-opt %s -one-shot-bufferize="use-encoding-for-memory-space" -split-input-file | FileCheck %s + +func.func @alloc_tesor_with_space_no_encoding() -> tensor<128xf32> { + %0 = bufferization.alloc_tensor() {memory_space = 1 : i64} : tensor<128xf32> + return %0 : tensor<128xf32> +} + +// CHECK-LABEL: @alloc_tesor_with_space_no_encoding +// CHECK-SAME: () -> tensor<128xf32> { +// CHECK: %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 1> +// CHECK: %[[v0:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 1> to tensor<128xf32> +// CHECK: return %[[v0]] : tensor<128xf32> + +// ----- + +func.func @alloc_tesor_with_space_and_cast() -> tensor<128xf32, 1> { + %0 = bufferization.alloc_tensor() {memory_space = 1 : i64} : tensor<128xf32> + %1 = tensor.cast %0 : tensor<128xf32> to tensor<128xf32, 1> + return %1 : tensor<128xf32, 1> +} + +// CHECK-LABEL: @alloc_tesor_with_space_and_cast +// CHECK-SAME: () -> tensor<128xf32, 1 : i64> { +// CHECK: %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 1> +// CHECK: %[[v0:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 1> to tensor<128xf32, 1 : i64> +// CHECK: return %[[v0]] : tensor<128xf32, 1 : i64> + +// ----- + +func.func @alloc_tesor_with_space_with_encoding() -> tensor<128xf32, 1 : i64> { + %0 = bufferization.alloc_tensor() {memory_space = 1 : i64} : tensor<128xf32, 1 : i64> + return %0 : tensor<128xf32, 1 : i64> +} + +// CHECK-LABEL: @alloc_tesor_with_space_with_encoding +// CHECK-SAME: () -> tensor<128xf32, 1 : i64> { +// CHECK: %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 1> +// CHECK: %[[v0:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 1> to tensor<128xf32, 1 : i64> +// CHECK: return %[[v0]] : tensor<128xf32, 1 : i64> + +// ----- + +func.func @alloc_tesor_copy_from_default_space(%arg0: tensor<128xf32>) -> tensor<128xf32> { + %0 = bufferization.alloc_tensor() copy(%arg0) {memory_space = 1 : i64} : tensor<128xf32> + return %0 : tensor<128xf32> +} + +// CHECK-LABEL: @alloc_tesor_copy_from_default_space +// CHECK-SAME: (%[[arg0:.+]]: tensor<128xf32>) -> tensor<128xf32> { +// CHECK: %[[v0:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32> to memref<128xf32, strided<[?], offset: ?>> +// CHECK: %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 1> +// CHECK: memref.copy %[[v0]], %[[alloc]] : memref<128xf32, strided<[?], offset: ?>> to memref<128xf32, 1> +// CHECK: %[[v1:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 1> to tensor<128xf32> +// CHECK: return %[[v1]] : tensor<128xf32> + +// ----- + +func.func @alloc_tesor_copy_from_non_default_space(%arg0: tensor<128xf32, 1>) -> tensor<128xf32, 2> { + %0 = bufferization.alloc_tensor() copy(%arg0) {memory_space = 2 : i64} : tensor<128xf32, 1> + %1 = tensor.cast %0 : tensor<128xf32, 1> to tensor<128xf32, 2> + return %1 : tensor<128xf32, 2> +} + +// CHECK-LABEL: @alloc_tesor_copy_from_non_default_space +// CHECK-SAME: (%[[arg0:.+]]: tensor<128xf32, 1 : i64>) -> tensor<128xf32, 2 : i64> { +// CHECK: %[[v0:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32, 1 : i64> to memref<128xf32, strided<[?], offset: ?>, 1> +// CHECK: %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 2> +// CHECK: memref.copy %[[v0]], %[[alloc]] : memref<128xf32, strided<[?], offset: ?>, 1> to memref<128xf32, 2> +// CHECK: %[[v1:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 2> to tensor<128xf32, 2 : i64> +// CHECK: return %[[v1]] : tensor<128xf32, 2 : i64> + +// ----- + +// TODO: this should be illegal since ultimately we can not eliminate the `bufferization.to_tensor` when we +// bufferize function boundaries. +func.func @alloc_tesor_copy_from_non_default_space_no_cast(%arg0: tensor<128xf32, 1>, + %arg1: tensor<4xf32, 1>) -> tensor<128xf32, 1> { + %0 = bufferization.alloc_tensor() copy(%arg0) {memory_space = 2 : i64} : tensor<128xf32, 1> + %1 = tensor.insert_slice %arg1 into %arg0 [0][4][1] : tensor<4xf32, 1> into tensor<128xf32, 1> + return %0 : tensor<128xf32, 1> +} + +// CHECK-LABEL: @alloc_tesor_copy_from_non_default_space_no_cast +// CHECK-SAME: (%[[arg0:.+]]: tensor<128xf32, 1 : i64>, %[[arg1:.+]]: tensor<4xf32, 1 : i64>) -> tensor<128xf32, 1 : i64> { +// CHECK: %[[v0:.+]] = bufferization.to_memref %[[arg1]] : tensor<4xf32, 1 : i64> to memref<4xf32, strided<[?], offset: ?>, 1> +// CHECK: %[[v1:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32, 1 : i64> to memref<128xf32, strided<[?], offset: ?>, 1> +// CHECK: %[[v2:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32, 1 : i64> to memref<128xf32, strided<[?], offset: ?>, 1> +// CHECK: %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 2> +// CHECK: memref.copy %[[v2]], %[[alloc]] : memref<128xf32, strided<[?], offset: ?>, 1> to memref<128xf32, 2> +// CHECK: %[[v3:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 2> to tensor<128xf32, 1 : i64> +// CHECK: %[[alloc_0:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 1> +// CHECK: memref.copy %[[v1]], %[[alloc_0]] : memref<128xf32, strided<[?], offset: ?>, 1> to memref<128xf32, 1> +// CHECK: %[[subview:.+]] = memref.subview %[[alloc_0]][0] [4] [1] : memref<128xf32, 1> to memref<4xf32, strided<[1]>, 1> +// CHECK: memref.copy %[[v0]], %[[subview]] : memref<4xf32, strided<[?], offset: ?>, 1> to memref<4xf32, strided<[1]>, 1> +// CHECK: return %[[v3]] : tensor<128xf32, 1 : i64> + +// ----- + +func.func @materialize_in_destination(%arg0: tensor<128xf32, 1>) -> tensor<128xf32, 2> { + %0 = bufferization.alloc_tensor () {memory_space = 2 : i64} : tensor<128xf32, 2> + %1 = bufferization.materialize_in_destination %arg0 in %0 : (tensor<128xf32, 1>, tensor<128xf32, 2>) -> tensor<128xf32, 2> + return %1 : tensor<128xf32, 2> +} + +// CHECK-LABEL: @materialize_in_destination +// CHECK-SAME: (%[[arg0:.+]]: tensor<128xf32, 1 : i64>) -> tensor<128xf32, 2 : i64> { +// CHECK: %[[v0:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32, 1 : i64> to memref<128xf32, strided<[?], offset: ?>, 1> +// CHECK: %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 2> +// CHECK: memref.copy %[[v0]], %[[alloc]] : memref<128xf32, strided<[?], offset: ?>, 1> to memref<128xf32, 2> +// CHECK: %[[v1:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 2> to tensor<128xf32, 2 : i64> +// CHECK: return %[[v1]] : tensor<128xf32, 2 : i64> diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir index 9380c81ce235cd..194c3278c78a1d 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir @@ -25,9 +25,9 @@ func.func @use_of_unknown_op_1(%t1: tensor) %idx = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : memref> + // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : tensor to memref> // CHECK: vector.transfer_read %[[dummy_memref]][%{{.*}}], %{{.*}} : memref> - // CHECK-NO-LAYOUT-MAP: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : memref + // CHECK-NO-LAYOUT-MAP: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : tensor to memref // CHECK-NO-LAYOUT-MAP: vector.transfer_read %[[dummy_memref]][%{{.*}}], %{{.*}} : memref %1 = vector.transfer_read %0[%idx], %cst : tensor, vector<5xf32> return %1 : vector<5xf32> @@ -61,7 +61,7 @@ func.func @use_of_unknown_op_3(%t1: tensor) // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1]]) %0 = "test.dummy_op"(%t1) : (tensor) -> tensor - // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : memref> + // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : tensor to memref> // CHECK: %[[v2:.*]] = vector.transfer_read %[[dummy_memref]] %2 = vector.transfer_read %0[%idx], %cst : tensor, vector<5xf32> diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir index dbf8d6563477b5..e65c5b92949f6e 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir @@ -134,7 +134,7 @@ func.func @copy_deallocated() -> tensor<10xf32> { // CHECK-LABEL: func @select_different_tensors( // CHECK-SAME: %[[t:.*]]: tensor func.func @select_different_tensors(%t: tensor, %sz: index, %pos: index, %c: i1) -> f32 { - // CHECK-DAG: %[[m:.*]] = bufferization.to_memref %[[t]] : memref + // CHECK-DAG: %[[m:.*]] = bufferization.to_memref %[[t]] : tensor to memref // CHECK-DAG: %[[alloc:.*]] = memref.alloc(%{{.*}}) {{.*}} : memref %0 = bufferization.alloc_tensor(%sz) : tensor @@ -200,7 +200,7 @@ func.func @read_of_alias(%t: tensor<100xf32>, %pos1: index, %pos2: index, // CHECK-LABEL: func @from_unranked_to_unranked( // CHECK-SAME: %[[arg0:.*]]: tensor<*xi32> func.func @from_unranked_to_unranked(%arg0: tensor<*xi32>) -> tensor<*xi32> { - // CHECK: %[[m:.*]] = bufferization.to_memref %[[arg0]] : memref<*xi32> + // CHECK: %[[m:.*]] = bufferization.to_memref %[[arg0]] : tensor<*xi32> to memref<*xi32> // CHECK: %[[t:.*]] = bufferization.to_tensor %[[m]] // CHECK: return %[[t]] : tensor<*xi32> %0 = tensor.cast %arg0 : tensor<*xi32> to tensor<*xi32> @@ -227,7 +227,7 @@ func.func @tensor_copy(%arg0: tensor<5xf32>) -> tensor<5xf32> { // CHECK-LABEL: func @materialize_in_destination_buffer( // CHECK-SAME: %[[t:.*]]: tensor<5xf32>, %[[m:.*]]: memref<5xf32>) -// CHECK: %[[b:.*]] = bufferization.to_memref %[[t]] : memref<5xf32, strided<[?], offset: ?>> +// CHECK: %[[b:.*]] = bufferization.to_memref %[[t]] : tensor<5xf32> to memref<5xf32, strided<[?], offset: ?>> // CHECK: memref.copy %[[b]], %[[m]] func.func @materialize_in_destination_buffer(%t: tensor<5xf32>, %m: memref<5xf32>) { bufferization.materialize_in_destination %t in restrict writable %m diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir index 35b28f7ec83919..2ca7f7109005cc 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir @@ -1064,7 +1064,7 @@ func.func @main_func(%A : tensor {bufferization.writable = true}, func.func @to_tensor_op_not_writable(%m: memref, %v: vector<5xf32>, %idx1: index, %idx2: index) -> vector<10xf32> { - %0 = bufferization.to_tensor %m restrict : memref + %0 = bufferization.to_tensor %m restrict : memref to tensor // Write to the tensor. Cannot be inplace due to tensor_load. // CHECK: vector.transfer_write diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-force-copy-before-write.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-force-copy-before-write.mlir index 7685f2ef3aafe5..230a0ed4294899 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-force-copy-before-write.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-force-copy-before-write.mlir @@ -28,9 +28,9 @@ module { // CHECK_COPY: memref.copy func.func @contains_to_memref_op(%arg0: tensor {bufferization.writable = true}, %arg1: index) -> vector<5xf32> { - %0 = bufferization.to_memref %arg0 : memref + %0 = bufferization.to_memref %arg0 : tensor to memref %cst = arith.constant 0.000000e+00 : f32 %1 = vector.transfer_read %0[%arg1], %cst : memref, vector<5xf32> return %1 : vector<5xf32> } -} \ No newline at end of file +} diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir index d773e1af43a76e..29714e61d336ac 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir @@ -76,7 +76,7 @@ func.func @scf_while_non_equiv_yield(%arg0: tensor<5xi1>, func.func @to_tensor_op_unsupported(%m: memref, %idx: index) -> (f32) { // expected-error @+1 {{to_tensor ops without `restrict` are not supported by One-Shot Analysis}} - %0 = bufferization.to_tensor %m : memref + %0 = bufferization.to_tensor %m : memref to tensor %1 = tensor.extract %0[%idx] : tensor return %1 : f32 diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir index 65557a68d243a2..ec2fb58ee03f8a 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -679,7 +679,7 @@ func.func @to_memref_op_unsupported( // to_memref op. // CHECK: %[[alloc:.*]] = memref.alloc // CHECK: memref.copy %[[arg0]], %[[alloc]] - %0 = bufferization.to_memref %t1 : memref + %0 = bufferization.to_memref %t1 : tensor to memref // CHECK: "test.foo"(%[[alloc]]) "test.foo"(%0) : (memref) -> () diff --git a/mlir/test/Dialect/Bufferization/canonicalize.mlir b/mlir/test/Dialect/Bufferization/canonicalize.mlir index b6c0a0e25efe0e..3ebc1e4fa8dea3 100644 --- a/mlir/test/Dialect/Bufferization/canonicalize.mlir +++ b/mlir/test/Dialect/Bufferization/canonicalize.mlir @@ -6,8 +6,8 @@ // Basic folding of to_tensor(to_memref(t)) -> t // CHECK-LABEL: func @tensor_load_of_buffer_cast( func.func @tensor_load_of_buffer_cast(%arg0: tensor) -> tensor { - %0 = bufferization.to_memref %arg0 : memref - %1 = bufferization.to_tensor %0 : memref + %0 = bufferization.to_memref %arg0 : tensor to memref + %1 = bufferization.to_tensor %0 : memref to tensor return %1 : tensor } // CHECK-SAME: %[[TENSOR:.*]]: tensor) -> tensor { @@ -18,8 +18,8 @@ func.func @tensor_load_of_buffer_cast(%arg0: tensor) -> tensor { // Basic folding of to_memref(to_tensor(m)) -> m // CHECK-LABEL: func @buffer_cast_of_tensor_load( func.func @buffer_cast_of_tensor_load(%arg0: memref) -> memref { - %0 = bufferization.to_tensor %arg0 : memref - %1 = bufferization.to_memref %0 : memref + %0 = bufferization.to_tensor %arg0 : memref to tensor + %1 = bufferization.to_memref %0 : tensor to memref return %1 : memref } // CHECK-SAME: %[[MEMREF:.*]]: memref) -> memref { @@ -34,14 +34,14 @@ func.func @buffer_cast_of_tensor_load(%arg0: memref) -> memref { // CHECK-SAME: %[[MEMREF_ADDRSPACE2:.*]]: memref) // CHECK-SAME: -> memref { // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor -// CHECK-SAME: %[[MEMREF_ADDRSPACE2]] : memref +// CHECK-SAME: %[[MEMREF_ADDRSPACE2]] : memref to tensor // CHECK: %[[MEMREF_ADDRSPACE7:.*]] = bufferization.to_memref -// CHECK-SAME: %[[TENSOR]] : memref +// CHECK-SAME: %[[TENSOR]] : tensor to memref // CHECK: return %[[MEMREF_ADDRSPACE7]] func.func @no_fold_buffer_cast_of_tensor_load(%arg0: memref) -> memref { - %0 = bufferization.to_tensor %arg0 : memref - %1 = bufferization.to_memref %0 : memref + %0 = bufferization.to_tensor %arg0 : memref to tensor + %1 = bufferization.to_memref %0 : tensor to memref return %1 : memref } @@ -61,8 +61,8 @@ func.func @canonicalize_buffer_cast_of_tensor_load( %arg0: memref>) -> memref> { - %0 = bufferization.to_tensor %arg0 : memref> - %1 = bufferization.to_memref %0 : memref> + %0 = bufferization.to_tensor %arg0 : memref> to tensor + %1 = bufferization.to_memref %0 : tensor to memref> return %1 : memref> } @@ -74,8 +74,8 @@ func.func @canonicalize_buffer_cast_of_tensor_load( func.func @canonicalize_buffer_cast_of_tensor_load_to_copy( %arg0: memref>) -> memref> { - %0 = bufferization.to_tensor %arg0 : memref> - %1 = bufferization.to_memref %0 : memref> + %0 = bufferization.to_tensor %arg0 : memref> to tensor + %1 = bufferization.to_memref %0 : tensor to memref> return %1 : memref> } // CHECK-SAME: %[[M:.*]]: memref>) @@ -100,7 +100,7 @@ func.func @canonicalize_buffer_cast_of_tensor_load_to_copy( // CHECK: return %[[D]] : index func.func @dim_of_tensor_load(%arg0: memref) -> index { %c0 = arith.constant 0 : index - %0 = bufferization.to_tensor %arg0 : memref + %0 = bufferization.to_tensor %arg0 : memref to tensor %1 = tensor.dim %0, %c0 : tensor return %1 : index } @@ -252,10 +252,10 @@ func.func @clone_and_preceding_dealloc(%arg0: memref) -> memref<32xf32> { func.func @tensor_cast_to_memref(%arg0 : tensor<4x6x16x32xi8>) -> memref { %0 = tensor.cast %arg0 : tensor<4x6x16x32xi8> to tensor - %1 = bufferization.to_memref %0 : memref + %1 = bufferization.to_memref %0 : tensor to memref return %1 : memref } -// CHECK: %[[M:.+]] = bufferization.to_memref %[[ARG0]] : memref<4x6x16x32xi8> +// CHECK: %[[M:.+]] = bufferization.to_memref %[[ARG0]] : tensor<4x6x16x32xi8> // CHECK: %[[M1:.+]] = memref.cast %[[M]] // CHECK-SAME: memref<4x6x16x32xi8> to memref // CHECK: return %[[M1]] : memref @@ -266,7 +266,7 @@ func.func @tensor_cast_to_memref(%arg0 : tensor<4x6x16x32xi8>) -> // CHECK-LABEL: func @load_from_buffer_cast( func.func @load_from_buffer_cast(%arg0: index, %arg1: index, %arg2: tensor) -> f32 { - %0 = bufferization.to_memref %arg2 : memref + %0 = bufferization.to_memref %arg2 : tensor to memref %1 = memref.load %0[%arg0, %arg1] : memref return %1 : f32 } diff --git a/mlir/test/Dialect/Bufferization/ops.mlir b/mlir/test/Dialect/Bufferization/ops.mlir index ad4a66c1b79782..7b6a6f492d0698 100644 --- a/mlir/test/Dialect/Bufferization/ops.mlir +++ b/mlir/test/Dialect/Bufferization/ops.mlir @@ -15,15 +15,15 @@ func.func @test_clone(%buf : memref<*xf32>) -> memref<*xf32> { func.func @test_to_memref(%arg0: tensor, %arg1: tensor<*xi64>) -> (memref (d0 + 7)>>, memref<*xi64, 1>) { %0 = bufferization.to_memref %arg0 - : memref (d0 + 7)>> + : tensor to memref (d0 + 7)>> %1 = bufferization.to_memref %arg1 - : memref<*xi64, 1> + : tensor<*xi64> to memref<*xi64, 1> return %0, %1 : memref (d0 + 7)>>, memref<*xi64, 1> } // CHECK-LABEL: func @test_to_tensor func.func @test_to_tensor(%buf : memref<2xf32>) -> tensor<2xf32> { - %tensor = bufferization.to_tensor %buf restrict writable : memref<2xf32> + %tensor = bufferization.to_tensor %buf restrict writable : memref<2xf32> to tensor<2xf32> return %tensor : tensor<2xf32> } diff --git a/mlir/test/Dialect/ControlFlow/one-shot-bufferize.mlir b/mlir/test/Dialect/ControlFlow/one-shot-bufferize.mlir index b82ebdde63a1c3..f5c9f81a189973 100644 --- a/mlir/test/Dialect/ControlFlow/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/ControlFlow/one-shot-bufferize.mlir @@ -3,7 +3,7 @@ // CHECK-NO-FUNC-LABEL: func @br( // CHECK-NO-FUNC-SAME: %[[t:.*]]: tensor<5xf32>) -// CHECK-NO-FUNC: %[[m:.*]] = bufferization.to_memref %[[t]] : memref<5xf32, strided<[?], offset: ?>> +// CHECK-NO-FUNC: %[[m:.*]] = bufferization.to_memref %[[t]] : tensor<5xf32> to memref<5xf32, strided<[?], offset: ?>> // CHECK-NO-FUNC: %[[r:.*]] = scf.execute_region -> memref<5xf32, strided<[?], offset: ?>> { // CHECK-NO-FUNC: cf.br ^[[block:.*]](%[[m]] // CHECK-NO-FUNC: ^[[block]](%[[arg1:.*]]: memref<5xf32, strided<[?], offset: ?>>): @@ -23,7 +23,7 @@ func.func @br(%t: tensor<5xf32>) { // CHECK-NO-FUNC-LABEL: func @cond_br( // CHECK-NO-FUNC-SAME: %[[t1:.*]]: tensor<5xf32>, -// CHECK-NO-FUNC: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref<5xf32, strided<[?], offset: ?>> +// CHECK-NO-FUNC: %[[m1:.*]] = bufferization.to_memref %[[t1]] : tensor<5xf32> to memref<5xf32, strided<[?], offset: ?>> // CHECK-NO-FUNC: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32> // CHECK-NO-FUNC: %[[r:.*]] = scf.execute_region -> memref<5xf32, strided<[?], offset: ?>> { // CHECK-NO-FUNC: cf.cond_br %{{.*}}, ^[[block1:.*]](%[[m1]] : {{.*}}), ^[[block2:.*]](%[[alloc]] : {{.*}}) diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir index f416cd9fcf0b29..530badebd5c70c 100644 --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -12,7 +12,7 @@ // CHECK: #map = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @basic( // CHECK-SAME: %[[TENSOR:.*]]: tensor<4xf32>) -> tensor<4xf32> { -// CHECK-DAG: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<4xf32> +// CHECK-DAG: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : tensor<4xf32> to memref<4xf32> // CHECK-DAG: %[[RESULT_MEMREF:.*]] = memref.alloc() {{.*}} : memref<4xf32> // CHECK: linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} // CHECK-SAME: ins(%[[MEMREF]] : memref<4xf32>) @@ -46,7 +46,7 @@ func.func @basic(%arg0: tensor<4xf32>) -> tensor<4xf32> { // CHECK: #map = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @empty_tensor( // CHECK-SAME: %[[IN:.*]]: tensor, %[[SIZE:.*]]: index) -// CHECK-DAG: %[[MEMREF:.*]] = bufferization.to_memref %[[IN]] : memref +// CHECK-DAG: %[[MEMREF:.*]] = bufferization.to_memref %[[IN]] : tensor to memref // CHECK-DAG: %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) {{.*}} : memref // CHECK: linalg.generic // CHECK-SAME: ins(%[[MEMREF]] : memref) @@ -105,7 +105,7 @@ func.func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf3 // CHECK-DAG: %[[DIM1:.*]] = tensor.dim %[[ARG]], %[[C1]] : tensor // CHECK-DAG: %[[RESULT0:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {{.*}} : memref // CHECK-DAG: %[[RESULT1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {{.*}} : memref -// CHECK-DAG: %[[MEMREF_ARG:.*]] = bufferization.to_memref %[[ARG]] : memref +// CHECK-DAG: %[[MEMREF_ARG:.*]] = bufferization.to_memref %[[ARG]] : tensor to memref // CHECK: linalg.generic // CHECK-SAME: ins(%[[MEMREF_ARG]] : memref) // CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref, memref) @@ -141,8 +141,8 @@ func.func @dynamic_results(%arg0: tensor) // CHECK-SAME: %[[ARG0_TENSOR:.*]]: tensor<2x3x4xvector<3x4xi4>>, // CHECK-SAME: %[[ARG1_TENSOR:.*]]: tensor<3x2xf32>) -> tensor<3x2xf32> { // CHECK-DAG: %[[INIT_BUFFER:.*]] = memref.alloc() {{.*}} : memref<3x2xf32> -// CHECK-DAG: %[[ARG0_MEMREF:.*]] = bufferization.to_memref %[[ARG0_TENSOR]] : memref<2x3x4xvector<3x4xi4>> -// CHECK-DAG: %[[ARG1_MEMREF:.*]] = bufferization.to_memref %[[ARG1_TENSOR]] : memref<3x2xf32> +// CHECK-DAG: %[[ARG0_MEMREF:.*]] = bufferization.to_memref %[[ARG0_TENSOR]] : tensor<2x3x4xvector<3x4xi4>> +// CHECK-DAG: %[[ARG1_MEMREF:.*]] = bufferization.to_memref %[[ARG1_TENSOR]] : tensor<3x2xf32> // CHECK: memref.copy %[[ARG1_MEMREF]], %[[INIT_BUFFER]] : memref<3x2xf32> to memref<3x2xf32> // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0_MEMREF]] : memref<2x3x4xvector<3x4xi4>>) diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index d8633f7bc59271..cd439cd23ecd0c 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -373,7 +373,7 @@ func.func @fill_pack_general() -> tensor<1x1x8x4x4x8xi32>{ %9 = tensor.empty() : tensor<1x1x16x64xi32> %extracted_slice_15 = tensor.extract_slice %9[0, 0, 0, 0] [1, 1, 16, 64] [1, 1, 1, 1] : tensor<1x1x16x64xi32> to tensor<1x1x16x64xi32> %16 = linalg.fill ins(%c0_i32 : i32) outs(%extracted_slice_15 : tensor<1x1x16x64xi32>) -> tensor<1x1x16x64xi32> - %0 = bufferization.to_tensor %alloc restrict writable : memref<1x1x8x4x4x8xi32> + %0 = bufferization.to_tensor %alloc restrict writable : memref<1x1x8x4x4x8xi32> to tensor<1x1x8x4x4x8xi32> %pack_18 = tensor.pack %16 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %0 : tensor<1x1x16x64xi32> -> tensor<1x1x8x4x4x8xi32> return %pack_18 : tensor<1x1x8x4x4x8xi32> } @@ -921,7 +921,7 @@ func.func @erase_non_identity_noop(%arg0 : tensor, %arg1: tensor tensor - return %0 : tensor + return %0 : tensor } // Do not erase ops with buffer semantics. @@ -1073,8 +1073,8 @@ func.func @transpose_identity_perm(%input: tensor<16x32x64xf32>, // ----- -func.func @transpose_transpose_cancel(%input: tensor<5x4x3xf32>, - %init1: tensor<4x3x5xf32>, +func.func @transpose_transpose_cancel(%input: tensor<5x4x3xf32>, + %init1: tensor<4x3x5xf32>, %init2: tensor<5x4x3xf32>) -> tensor<5x4x3xf32> { // CHECK-LABEL: @transpose_transpose_cancel // CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor<5x4x3xf32> diff --git a/mlir/test/Dialect/MemRef/normalize-memrefs.mlir b/mlir/test/Dialect/MemRef/normalize-memrefs.mlir index 11114bcf2b1ab1..6d20ccbf2ca055 100644 --- a/mlir/test/Dialect/MemRef/normalize-memrefs.mlir +++ b/mlir/test/Dialect/MemRef/normalize-memrefs.mlir @@ -360,11 +360,11 @@ func.func @neg_map() -> memref<2x3xf32, #neg> { // CHECK-LABEL: func @memref_with_strided_offset func.func @memref_with_strided_offset(%arg0: tensor<128x512xf32>, %arg1: index, %arg2: index) -> tensor<16x512xf32> { %c0 = arith.constant 0 : index - %0 = bufferization.to_memref %arg0 : memref<128x512xf32, strided<[?, ?], offset: ?>> + %0 = bufferization.to_memref %arg0 : tensor<128x512xf32> to memref<128x512xf32, strided<[?, ?], offset: ?>> %subview = memref.subview %0[%arg2, 0] [%arg1, 512] [1, 1] : memref<128x512xf32, strided<[?, ?], offset: ?>> to memref> // CHECK: %{{.*}} = memref.cast %{{.*}} : memref> to memref<16x512xf32, strided<[?, ?], offset: ?>> %cast = memref.cast %subview : memref> to memref<16x512xf32, strided<[?, ?], offset: ?>> - %1 = bufferization.to_tensor %cast : memref<16x512xf32, strided<[?, ?], offset: ?>> + %1 = bufferization.to_tensor %cast : memref<16x512xf32, strided<[?, ?], offset: ?>> to tensor<16x512xf32> return %1 : tensor<16x512xf32> } diff --git a/mlir/test/Dialect/SCF/bufferize.mlir b/mlir/test/Dialect/SCF/bufferize.mlir index 53fcee692226cb..6c08d9f68e8a9f 100644 --- a/mlir/test/Dialect/SCF/bufferize.mlir +++ b/mlir/test/Dialect/SCF/bufferize.mlir @@ -4,8 +4,8 @@ // CHECK-SAME: %[[PRED:.*]]: i1, // CHECK-SAME: %[[TRUE_TENSOR:.*]]: tensor, // CHECK-SAME: %[[FALSE_TENSOR:.*]]: tensor) -> tensor { -// CHECK-DAG: %[[TRUE_MEMREF:.*]] = bufferization.to_memref %[[TRUE_TENSOR]] : memref -// CHECK-DAG: %[[FALSE_MEMREF:.*]] = bufferization.to_memref %[[FALSE_TENSOR]] : memref +// CHECK-DAG: %[[TRUE_MEMREF:.*]] = bufferization.to_memref %[[TRUE_TENSOR]] : tensor to memref +// CHECK-DAG: %[[FALSE_MEMREF:.*]] = bufferization.to_memref %[[FALSE_TENSOR]] : tensor to memref // CHECK: %[[RESULT_MEMREF:.*]] = scf.if %[[PRED]] -> (memref) { // CHECK: scf.yield %[[TRUE_MEMREF]] : memref // CHECK: } else { @@ -29,7 +29,7 @@ func.func @if(%pred: i1, %true_val: tensor, %false_val: tensor) -> // CHECK-SAME: %[[TENSOR:.*]]: tensor, // CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, // CHECK-SAME: %[[STEP:.*]]: index) -> tensor { -// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref +// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : tensor to memref // Note: scf.for iter_args always bufferize to a memory write. This could be // optimized by analyzing the loop body. // CHECK: %[[MEMREF_COPY:.*]] = memref.alloc() @@ -70,7 +70,7 @@ func.func @if_correct_recursive_legalization_behavior(%pred: i1, %tensor: tensor // CHECK-LABEL: func @for_correct_recursive_legalization_behavior( // CHECK-SAME: %[[TENSOR:.*]]: tensor, // CHECK-SAME: %[[INDEX:.*]]: index) -> tensor { -// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref +// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : tensor to memref // Note: scf.for iter_args always bufferize to a memory write. This could be // optimized by analyzing the loop body. // CHECK: %[[MEMREF_COPY:.*]] = memref.alloc() @@ -78,7 +78,7 @@ func.func @if_correct_recursive_legalization_behavior(%pred: i1, %tensor: tensor // CHECK: %[[RESULT:.*]] = scf.for %{{.*}} = %[[INDEX]] to %[[INDEX]] step %[[INDEX]] iter_args(%[[MEMREF_ITER:.*]] = %[[MEMREF_COPY]]) -> (memref) { // CHECK: %[[TENSOR_ITER:.*]] = bufferization.to_tensor %[[MEMREF_ITER]] : memref // CHECK: %[[TENSOR_MUNGED:.*]] = "test.munge_tensor"(%[[TENSOR_ITER]]) : (tensor) -> tensor -// CHECK: %[[MEMREF_MUNGED:.*]] = bufferization.to_memref %[[TENSOR_MUNGED]] : memref +// CHECK: %[[MEMREF_MUNGED:.*]] = bufferization.to_memref %[[TENSOR_MUNGED]] : tensor to memref // CHECK: scf.yield %[[MEMREF_MUNGED]] : memref // CHECK: } // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[RESULT]] : memref @@ -96,7 +96,7 @@ func.func @for_correct_recursive_legalization_behavior(%arg0: tensor, %inde // CHECK-LABEL: func @bufferize_while( // CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64, %[[ARG2:.*]]: tensor -// CHECK: %[[M:.*]] = bufferization.to_memref %[[ARG2]] : memref +// CHECK: %[[M:.*]] = bufferization.to_memref %[[ARG2]] : tensor to memref // Note: scf.while iter_args always bufferize to a memory write. This could be // optimized by analyzing the loop body. // CHECK: %[[MEMREF_COPY:.*]] = memref.alloc() diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-encodings.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-encodings.mlir new file mode 100644 index 00000000000000..709943e5965858 --- /dev/null +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-encodings.mlir @@ -0,0 +1,73 @@ +// RUN: mlir-opt %s -one-shot-bufferize="use-encoding-for-memory-space allow-return-allocs-from-loops allow-unknown-ops" -allow-unregistered-dialect -split-input-file | FileCheck %s + +// Here and below, unknown op 'some.use' will force 'bufferization.to_tensor' operations to remain in the body, +// allowing us to check that the encoding on the '%iter' tensor is correctly preserved. + +func.func @scf_for_iter_arg(%arg0: tensor<128xf32, 1>, %arg1: index, %arg2: index, %arg3: index) -> tensor<128xf32, 1> { + %0 = scf.for %i = %arg1 to %arg2 step %arg3 iter_args(%iter = %arg0) -> tensor<128xf32, 1> { + %0 = "some.use"(%iter) : (tensor<128xf32, 1>) -> tensor<128xf32, 1> + scf.yield %0 : tensor<128xf32, 1> + } + return %0 : tensor<128xf32, 1> +} + +// CHECK-LABEL: func.func @scf_for_iter_arg +// CHECK-SAME: (%[[arg0:.+]]: tensor<128xf32, 1 : i64>, %[[arg1:.+]]: index, %[[arg2:.+]]: index, %[[arg3:.+]]: index) +// CHECK: %[[v0:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32, 1 : i64> to memref<128xf32, strided<[?], offset: ?>, 1> +// CHECK: %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 1> +// CHECK: memref.copy %[[v0]], %[[alloc]] : memref<128xf32, strided<[?], offset: ?>, 1> to memref<128xf32, 1> +// CHECK: %[[cast:.+]] = memref.cast %[[alloc]] : memref<128xf32, 1> to memref<128xf32, strided<[?], offset: ?>, 1> +// CHECK: %[[v1:.+]] = scf.for %{{.+}} = %[[arg1]] to %[[arg2]] step %[[arg3]] iter_args(%[[arg6:.+]] = %[[cast]]) -> (memref<128xf32, strided<[?], offset: ?>, 1>) +// CHECK-NEXT: %[[v3:.+]] = bufferization.to_tensor %[[arg6]] : memref<128xf32, strided<[?], offset: ?>, 1> to tensor<128xf32, 1 : i64> +// CHECK-NEXT: %[[v4:.+]] = "some.use"(%[[v3]]) : (tensor<128xf32, 1 : i64>) -> tensor<128xf32, 1 : i64> +// CHECK-NEXT: %[[v5:.+]] = bufferization.to_memref %[[v4]] : tensor<128xf32, 1 : i64> to memref<128xf32, strided<[?], offset: ?>, 1> +// CHECK-NEXT: scf.yield %[[v5]] : memref<128xf32, strided<[?], offset: ?>, 1> +// CHECK: %[[v2:.+]] = bufferization.to_tensor %[[v1]] : memref<128xf32, strided<[?], offset: ?>, 1> to tensor<128xf32, 1 : i64> +// CHECK: return %[[v2]] : tensor<128xf32, 1 : i64> + +// ----- + +func.func @scf_forall( + %idx: index, + %idx2: index, + %arg1: tensor, + %arg2: tensor) -> (tensor) { + %cst = arith.constant 4.200000e+01 : f32 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %2 = scf.forall (%arg3) in (%idx2) shared_outs(%o = %arg2) -> (tensor) { + %8 = "some.use"(%o) : (tensor) -> tensor + scf.forall.in_parallel { + tensor.parallel_insert_slice %8 into %o[5] [%idx] [%c1] : + tensor into tensor + } + } + return %2 : tensor +} + +// CHECK-LABEL: func.func @scf_forall +// CHECK: scf.forall +// CHECK: %[[v2:.+]] = bufferization.to_tensor %{{.+}} : memref to tensor +// CHECK: %[[v3:.+]] = "some.use"(%[[v2]]) : (tensor) -> tensor +// CHECK: bufferization.to_memref %[[v3]] : tensor to memref, 1> +// CHECK: %[[v1:.+]] = bufferization.to_tensor %{{.+}} : memref to tensor +// CHECK: return %[[v1]] : tensor + +// ----- + +func.func @scf_execute_region(%arg0: tensor<128xf32, 1>) -> tensor<128xf32, 1> { + %0 = scf.execute_region -> tensor<128xf32, 1> { + scf.yield %arg0 : tensor<128xf32, 1> + } + %1 = "some.use"(%0) : (tensor<128xf32, 1>) -> tensor<128xf32, 1> + return %1 : tensor<128xf32, 1> +} + +// CHECK-LABEL: func.func @scf_execute_region +// CHECK-SAME: (%[[arg0:.+]]: tensor<128xf32, 1 : i64>) +// CHECK: %[[v0:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32, 1 : i64> to memref<128xf32, strided<[?], offset: ?>, 1> +// CHECK: %[[v1:.+]] = scf.execute_region -> memref<128xf32, strided<[?], offset: ?>, 1> +// CHECK: scf.yield %[[v0]] : memref<128xf32, strided<[?], offset: ?>, 1> +// CHECK: %[[v2:.+]] = bufferization.to_tensor %[[v1]] : memref<128xf32, strided<[?], offset: ?>, 1> to tensor<128xf32, 1 : i64> +// CHECK: %[[v3:.+]] = "some.use"(%[[v2]]) : (tensor<128xf32, 1 : i64>) -> tensor<128xf32, 1 : i64> +// CHECK: return %[[v3]] : tensor<128xf32, 1 : i64> diff --git a/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir b/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir index 6dd0353d9374ad..bb15d018a6c448 100644 --- a/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir @@ -77,10 +77,10 @@ spirv.func @bf16_to_f32_vec_unsupported(%arg0 : vector<2xi16>) "None" { //===----------------------------------------------------------------------===// spirv.func @split_barrier() "None" { - // CHECK: spirv.INTEL.ControlBarrierArrive , , - spirv.INTEL.ControlBarrierArrive , , - // CHECK: spirv.INTEL.ControlBarrierWait , , - spirv.INTEL.ControlBarrierWait , , + // CHECK: spirv.INTEL.ControlBarrierArrive + spirv.INTEL.ControlBarrierArrive + // CHECK: spirv.INTEL.ControlBarrierWait + spirv.INTEL.ControlBarrierWait spirv.Return } diff --git a/mlir/test/Dialect/Shape/bufferize.mlir b/mlir/test/Dialect/Shape/bufferize.mlir index 9f30a052208f0b..02e147d917d0f9 100644 --- a/mlir/test/Dialect/Shape/bufferize.mlir +++ b/mlir/test/Dialect/Shape/bufferize.mlir @@ -6,7 +6,7 @@ // CHECK: %[[WTRUE:.*]] = shape.const_witness true // CHECK: %[[MEMREF:.*]] = shape.assuming %[[WTRUE]] -> (memref<2xf16>) { // CHECK: %[[TENSOR_VAL:.*]] = "test.source"() : () -> tensor<2xf16> -// CHECK: %[[YIELDED_MEMREF:.*]] = bufferization.to_memref %[[TENSOR_VAL]] : memref<2xf16> +// CHECK: %[[YIELDED_MEMREF:.*]] = bufferization.to_memref %[[TENSOR_VAL]] : tensor<2xf16> to memref<2xf16> // CHECK: shape.assuming_yield %[[YIELDED_MEMREF]] : memref<2xf16> // CHECK: } // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[MEMREF:.*]] : memref<2xf16> diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul24_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul24_lib.mlir index 8293169049ca61..6d98667e775634 100644 --- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul24_lib.mlir +++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul24_lib.mlir @@ -14,19 +14,19 @@ // CHECK-SAME: %[[VAL_2:.*2]]: tensor) -> tensor { // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_5:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK: %[[VAL_5:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK: %[[VAL_6:.*]] = gpu.wait async // CHECK: %[[VAL_7:.*]] = memref.dim %[[VAL_5]], %[[VAL_3]] : memref // CHECK: %[[VAL_8:.*]] = memref.dim %[[VAL_5]], %[[VAL_4]] : memref // CHECK: %[[VAL_9:.*]], %[[VAL_10:.*]] = gpu.alloc async {{\[}}%[[VAL_6]]] (%[[VAL_7]], %[[VAL_8]]) : memref // CHECK: %[[VAL_11:.*]] = gpu.memcpy async {{\[}}%[[VAL_10]]] %[[VAL_9]], %[[VAL_5]] : memref, memref -// CHECK: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref +// CHECK: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref // CHECK: %[[VAL_13:.*]] = gpu.wait async // CHECK: %[[VAL_14:.*]] = memref.dim %[[VAL_12]], %[[VAL_3]] : memref // CHECK: %[[VAL_15:.*]] = memref.dim %[[VAL_12]], %[[VAL_4]] : memref // CHECK: %[[VAL_16:.*]], %[[VAL_17:.*]] = gpu.alloc async {{\[}}%[[VAL_13]]] (%[[VAL_14]], %[[VAL_15]]) : memref // CHECK: %[[VAL_18:.*]] = gpu.memcpy async {{\[}}%[[VAL_17]]] %[[VAL_16]], %[[VAL_12]] : memref, memref -// CHECK: %[[VAL_19:.*]] = bufferization.to_memref %[[VAL_2]] : memref +// CHECK: %[[VAL_19:.*]] = bufferization.to_memref %[[VAL_2]] : tensor to memref // CHECK: %[[VAL_20:.*]] = gpu.wait async // CHECK: %[[VAL_21:.*]] = memref.dim %[[VAL_19]], %[[VAL_3]] : memref // CHECK: %[[VAL_22:.*]] = memref.dim %[[VAL_19]], %[[VAL_4]] : memref diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir index 699508acf13cf7..63c308a3d5e6f0 100644 --- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir +++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir @@ -30,13 +30,13 @@ // CHECK: %[[VAL_23:.*]] = memref.dim %[[VAL_11]], %[[VAL_3]] : memref // CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = gpu.alloc async {{\[}}%[[VAL_22]]] (%[[VAL_23]]) : memref // CHECK: %[[VAL_26:.*]] = gpu.memcpy async {{\[}}%[[VAL_25]]] %[[VAL_24]], %[[VAL_11]] : memref, memref -// CHECK: %[[VAL_27:.*]] = bufferization.to_memref %[[VAL_1]] : memref +// CHECK: %[[VAL_27:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref // CHECK: %[[VAL_28:.*]] = gpu.wait async // CHECK: %[[VAL_29:.*]] = memref.dim %[[VAL_27]], %[[VAL_3]] : memref // CHECK: %[[VAL_30:.*]] = memref.dim %[[VAL_27]], %[[VAL_4]] : memref // CHECK: %[[VAL_31:.*]], %[[VAL_32:.*]] = gpu.alloc async {{\[}}%[[VAL_28]]] (%[[VAL_29]], %[[VAL_30]]) : memref // CHECK: %[[VAL_33:.*]] = gpu.memcpy async {{\[}}%[[VAL_32]]] %[[VAL_31]], %[[VAL_27]] : memref, memref -// CHECK: %[[VAL_34:.*]] = bufferization.to_memref %[[VAL_2]] : memref +// CHECK: %[[VAL_34:.*]] = bufferization.to_memref %[[VAL_2]] : tensor to memref // CHECK: %[[VAL_35:.*]] = gpu.wait async // CHECK: %[[VAL_36:.*]] = memref.dim %[[VAL_34]], %[[VAL_3]] : memref // CHECK: %[[VAL_37:.*]] = memref.dim %[[VAL_34]], %[[VAL_4]] : memref diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir index 14a0e9c30b2016..088e468cee7954 100644 --- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir +++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir @@ -30,12 +30,12 @@ module { // CHECK: %[[VAL_22:.*]] = memref.dim %[[VAL_10]], %[[VAL_3]] : memref // CHECK: %[[VAL_23:.*]], %[[VAL_24:.*]] = gpu.alloc async {{\[}}%[[VAL_21]]] (%[[VAL_22]]) : memref // CHECK: %[[VAL_25:.*]] = gpu.memcpy async {{\[}}%[[VAL_24]]] %[[VAL_23]], %[[VAL_10]] : memref, memref -// CHECK: %[[VAL_26:.*]] = bufferization.to_memref %[[VAL_1]] : memref +// CHECK: %[[VAL_26:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref // CHECK: %[[VAL_27:.*]] = gpu.wait async // CHECK: %[[VAL_28:.*]] = memref.dim %[[VAL_26]], %[[VAL_3]] : memref // CHECK: %[[VAL_29:.*]], %[[VAL_30:.*]] = gpu.alloc async {{\[}}%[[VAL_27]]] (%[[VAL_28]]) : memref // CHECK: %[[VAL_31:.*]] = gpu.memcpy async {{\[}}%[[VAL_30]]] %[[VAL_29]], %[[VAL_26]] : memref, memref -// CHECK: %[[VAL_32:.*]] = bufferization.to_memref %[[VAL_2]] : memref +// CHECK: %[[VAL_32:.*]] = bufferization.to_memref %[[VAL_2]] : tensor to memref // CHECK: %[[VAL_33:.*]] = gpu.wait async // CHECK: %[[VAL_34:.*]] = memref.dim %[[VAL_32]], %[[VAL_3]] : memref // CHECK: %[[VAL_35:.*]], %[[VAL_36:.*]] = gpu.alloc async {{\[}}%[[VAL_33]]] (%[[VAL_34]]) : memref diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir index 97f36d49927bf9..1058bc03fe9cb9 100644 --- a/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir +++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir @@ -28,11 +28,11 @@ // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 8 : index // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK: %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> -// CHECK: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64> +// CHECK: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<8x8xf64> to memref<8x8xf64> // CHECK: %[[VAL_7:.*]] = gpu.wait async // CHECK: %[[VAL_8:.*]], %[[VAL_9:.*]] = gpu.alloc async {{\[}}%[[VAL_7]]] () : memref<8x8xf64> // CHECK: %[[VAL_10:.*]] = gpu.memcpy async {{\[}}%[[VAL_9]]] %[[VAL_8]], %[[VAL_6]] : memref<8x8xf64>, memref<8x8xf64> -// CHECK: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64> +// CHECK: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<8x8xf64> to memref<8x8xf64> // CHECK: %[[VAL_12:.*]] = gpu.wait async // CHECK: %[[VAL_13:.*]], %[[VAL_14:.*]] = gpu.alloc async {{\[}}%[[VAL_12]]] () : memref<8x8xf64> // CHECK: %[[VAL_15:.*]] = gpu.memcpy async {{\[}}%[[VAL_14]]] %[[VAL_13]], %[[VAL_11]] : memref<8x8xf64>, memref<8x8xf64> diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir index 93f49002a47d2d..32741086b9e6eb 100644 --- a/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir +++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir @@ -30,13 +30,13 @@ // CHECK: %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor // CHECK: %[[VAL_9:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor // CHECK: %[[VAL_10:.*]] = tensor.dim %[[VAL_2]], %[[VAL_4]] : tensor -// CHECK: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref +// CHECK: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref // CHECK: %[[VAL_12:.*]] = gpu.wait async // CHECK: %[[VAL_13:.*]] = memref.dim %[[VAL_11]], %[[VAL_3]] : memref // CHECK: %[[VAL_14:.*]] = memref.dim %[[VAL_11]], %[[VAL_4]] : memref // CHECK: %[[VAL_15:.*]], %[[VAL_16:.*]] = gpu.alloc async {{\[}}%[[VAL_12]]] (%[[VAL_13]], %[[VAL_14]]) : memref // CHECK: %[[VAL_17:.*]] = gpu.memcpy async {{\[}}%[[VAL_16]]] %[[VAL_15]], %[[VAL_11]] : memref, memref -// CHECK: %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_2]] : memref +// CHECK: %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_2]] : tensor to memref // CHECK: %[[VAL_19:.*]] = gpu.wait async // CHECK: %[[VAL_20:.*]] = memref.dim %[[VAL_18]], %[[VAL_3]] : memref // CHECK: %[[VAL_21:.*]] = memref.dim %[[VAL_18]], %[[VAL_4]] : memref diff --git a/mlir/test/Dialect/SparseTensor/constant_index_map.mlir b/mlir/test/Dialect/SparseTensor/constant_index_map.mlir index f9559ce648c783..857967bcf521ab 100644 --- a/mlir/test/Dialect/SparseTensor/constant_index_map.mlir +++ b/mlir/test/Dialect/SparseTensor/constant_index_map.mlir @@ -14,8 +14,8 @@ // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_5:.*]] = tensor.empty() : tensor<77xi1, #{{.*}}> -// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<1x77xi1> -// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<1x77xi1> +// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<1x77xi1> +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<1x77xi1> // CHECK: %[[VAL_8:.*]] = scf.for %[[VAL_9:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] iter_args(%[[VAL_10:.*]] = %[[VAL_5]]) -> (tensor<77xi1, #{{.*}}>) { // CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]], %[[VAL_9]]] : memref<1x77xi1> // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_3]], %[[VAL_9]]] : memref<1x77xi1> diff --git a/mlir/test/Dialect/SparseTensor/dense.mlir b/mlir/test/Dialect/SparseTensor/dense.mlir index 60a217e05e61ec..5ed1558a53163c 100644 --- a/mlir/test/Dialect/SparseTensor/dense.mlir +++ b/mlir/test/Dialect/SparseTensor/dense.mlir @@ -40,7 +40,7 @@ // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index // CHECK: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> +// CHECK: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { @@ -79,7 +79,7 @@ func.func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>, // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16xf32> +// CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index @@ -122,7 +122,7 @@ func.func @dense2(%arga: tensor<32x16xf32>, // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16x8xf32> +// CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<32x16x8xf32> to memref<32x16x8xf32> // CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index diff --git a/mlir/test/Dialect/SparseTensor/fuse_sparse_pad_with_consumer.mlir b/mlir/test/Dialect/SparseTensor/fuse_sparse_pad_with_consumer.mlir index 0ef143a1a2f38c..275f7f2ff25f7e 100644 --- a/mlir/test/Dialect/SparseTensor/fuse_sparse_pad_with_consumer.mlir +++ b/mlir/test/Dialect/SparseTensor/fuse_sparse_pad_with_consumer.mlir @@ -30,7 +30,7 @@ // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<4x4xf32, #sparse> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<4x4xf32, #sparse> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<4x4xf32, #sparse> to memref -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_10]] : memref<8x8xf32> +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_10]] : // CHECK-DAG: linalg.fill ins(%[[VAL_8]] : f32) outs(%[[VAL_14]] : memref<8x8xf32>) // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_5]] { // CHECK: %[[VAL_16:.*]] = arith.subi %[[VAL_15]], %[[VAL_7]] : index @@ -54,7 +54,7 @@ // CHECK: memref.store %[[VAL_30]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_27]]] : memref<8x8xf32> // CHECK: } {"Emitted from" = "linalg.generic"} // CHECK: } {"Emitted from" = "linalg.generic"} -// CHECK: %[[VAL_31:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<8x8xf32> +// CHECK: %[[VAL_31:.*]] = bufferization.to_tensor %[[VAL_14]] : // CHECK: return %[[VAL_31]] : tensor<8x8xf32> // CHECK: } func.func @padded_mul(%arg0: tensor<4x4xf32, #CSR>, %arg1: tensor<8x8xf32>) -> tensor<8x8xf32> { diff --git a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir index 2b9a2dd8f4883d..58f182dbdc44d1 100644 --- a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir +++ b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir @@ -101,7 +101,7 @@ func.func @sparse_scale(%argx: tensor) -> tensor to memref> // C_HECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref> // C_HECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref -// C_HECK: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// C_HECK: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xf64> to memref<32xf64> // C_HECK: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // C_HECK: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // C_HECK: %[[VAL_13:.*]] = scf.while (%[[VAL_14:.*]] = %[[VAL_11]]) : (index) -> index { @@ -170,7 +170,7 @@ func.func @matvec(%arga: tensor<32x64xf64, #SortedCOO>, // C_HECK-DAG: %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref> // C_HECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref> // C_HECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref -// C_HECK: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x64xf64> +// C_HECK: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x64xf64> to memref<32x64xf64> // C_HECK: linalg.fill ins(%[[VAL_4]] : f64) outs(%[[VAL_15]] : memref<32x64xf64>) // C_HECK: %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref // C_HECK: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir index fcc221660353a1..003dcc6708d634 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir @@ -51,7 +51,7 @@ func.func @add_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK: %[[VAL_INITTENSOR:.*]] = tensor.empty() : tensor<32xf32> // CHECK: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref -// CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_INITTENSOR]] : memref<32xf32> +// CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_INITTENSOR]] : tensor<32xf32> to memref<32xf32> // CHECK: linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_7]] : memref<32xf32>) // CHECK: scf.for %[[VAL_8:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] { // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_8]]] : memref @@ -247,7 +247,7 @@ func.func @mul_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf32> to memref<32xf32> // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>) // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { @@ -278,7 +278,7 @@ func.func @add_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tens // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf32> to memref<32xf32> // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>) // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { @@ -309,7 +309,7 @@ func.func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tens // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant true // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32xf32> +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<32xf32> to memref<32xf32> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref @@ -366,7 +366,7 @@ func.func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tens // CHECK-SAME: %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> { // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_5:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32xf32> +// CHECK-DAG: %[[VAL_5:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<32xf32> to memref<32xf32> // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref @@ -406,7 +406,7 @@ func.func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tens // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf32> to memref<32xf32> // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] // CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>) // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref @@ -463,7 +463,7 @@ func.func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tens // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf32> to memref<32xf32> // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] // CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>) // CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref @@ -830,7 +830,7 @@ func.func @two_way_inv_alt(%arga: tensor<16xf32, #SV>, // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor to memref // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor to memref -// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : memref +// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref // CHECK-DAG: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK-DAG: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_10:.*]] = memref.load %[[VAL_6]][] : memref @@ -875,7 +875,7 @@ func.func @sum_reduction(%arga: tensor, %argx: tensor) -> tenso // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor to memref // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_11]][] : memref // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref @@ -977,11 +977,11 @@ func.func @sum_reduction_ss(%arga: tensor<16xf32, #SV>, // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : tensor to memref // CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_13]][] : memref // CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_9]][] : memref // CHECK-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref @@ -1089,11 +1089,11 @@ func.func @sum_reduction_inv(%arga: tensor<16xf32, #SV>, // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant true // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : tensor to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_3]] {level = 0 : index} : tensor to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_3]] {level = 0 : index} : tensor to memref // CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor to memref @@ -1272,7 +1272,7 @@ func.func @four_tensors_op(%arga: tensor, // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_2]] : tensor to memref -// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_3]] : memref +// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_3]] : tensor to memref // CHECK-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_15]][] : memref // CHECK-DAG: %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_19:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir index 06670ab096fcdf..9c34e54db6c853 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir @@ -25,8 +25,8 @@ // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index @@ -62,8 +62,8 @@ func.func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xi1> to memref<32x16xi1> // CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_10]] : memref<32x16xi1>) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index @@ -98,8 +98,8 @@ func.func @cmp_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index @@ -137,8 +137,8 @@ func.func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref @@ -202,8 +202,8 @@ func.func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xi1> to memref<32x16xi1> // CHECK-DAG: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>) // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_15]]] : memref @@ -265,8 +265,8 @@ func.func @cmp_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref @@ -306,8 +306,8 @@ func.func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>) // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref @@ -376,9 +376,9 @@ func.func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1> -// CHECK-DAG: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>) +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xi1> to memref<32x16xi1> +// CHECK-DAG: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>) // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_15]], %[[VAL_19:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) { @@ -444,9 +444,9 @@ func.func @cmp_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>) +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>) // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_5]] { @@ -488,8 +488,8 @@ func.func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>) // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref @@ -584,8 +584,8 @@ func.func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1> +// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xi1> to memref<32x16xi1> // CHECK-DAG: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_16]] : memref<32x16xi1>) // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_7]]] : memref @@ -679,8 +679,8 @@ func.func @cmp_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16xf32> to memref<32x16xf32> +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32x16xf32>) // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref @@ -726,7 +726,7 @@ func.func @mul_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>) // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref @@ -891,7 +891,7 @@ func.func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1> +// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xi1> to memref<32x16xi1> // CHECK-DAG: linalg.fill ins(%[[VAL_3]] : i1) outs(%[[VAL_17]] : memref<32x16xi1>) // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref @@ -1166,7 +1166,7 @@ func.func @sub_ss_batched(%0: tensor<2x3xf64, #BatchedVector>, %1: tensor<2x3xf6 // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>) // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref @@ -1260,7 +1260,7 @@ func.func @mul_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>) // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref @@ -1362,7 +1362,7 @@ func.func @add_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #T // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>) // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref @@ -1415,8 +1415,8 @@ func.func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #T // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<16x32xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<16x32xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16x32xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<16xf32> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf32> to memref<32xf32> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<16xf32> to memref<16xf32> // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref // CHECK-DAG: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index @@ -1464,7 +1464,7 @@ func.func @matvec(%argA: tensor<16x32xf32, #Tds>, %argb: tensor<32xf32>, %argx: // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref // CHECK: %[[VAL_10:.*]] = scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_3]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f32) { // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref @@ -1511,7 +1511,7 @@ func.func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor) -> // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_3]] : tensor -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref // CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_11]] : memref) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] { // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref @@ -1563,9 +1563,9 @@ func.func @scale(%arga: tensor, %argx: tensor) -> tensor // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor to memref // CHECK-DAG: %[[VAL_11:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_3]] : memref +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : tensor to memref +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_3]] : tensor to memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_15]] to %[[VAL_16]] step %[[VAL_4]] { @@ -1638,10 +1638,10 @@ func.func @sampled_dense_dense(%args: tensor, // CHECK-DAG: %[[VAL_17:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor to memref // CHECK-DAG: %[[VAL_18:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor to memref // CHECK-DAG: %[[VAL_19:.*]] = sparse_tensor.values %[[VAL_2]] : tensor to memref -// CHECK-DAG: %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_3]] : memref -// CHECK-DAG: %[[VAL_21:.*]] = bufferization.to_memref %[[VAL_4]] : memref +// CHECK-DAG: %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_3]] : tensor to memref +// CHECK-DAG: %[[VAL_21:.*]] = bufferization.to_memref %[[VAL_4]] : tensor to memref // CHECK-DAG: %[[VAL_22:.*]] = sparse_tensor.lvl %[[VAL_2]], %[[VAL_6]] : tensor +// CHECK-DAG: %[[VAL_24:.*]] = bufferization.to_memref %[[VAL_5]] : tensor to memref // CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_21]][] : memref // CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir index 427a5c3d03a730..9158ac427763b4 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir @@ -33,9 +33,9 @@ // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: %[[VAL_14:.*]] = arith.muli %[[VAL_12]], %[[VAL_4]] : index // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { @@ -75,9 +75,9 @@ func.func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: %[[VAL_14:.*]] = arith.muli %[[VAL_12]], %[[VAL_4]] : index // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { @@ -120,8 +120,8 @@ func.func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> // CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_9]] { // CHECK: %[[VAL_18:.*]] = arith.muli %[[VAL_16]], %[[VAL_5]] : index @@ -187,9 +187,9 @@ func.func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { // CHECK: %[[VAL_16:.*]] = arith.muli %[[VAL_14]], %[[VAL_5]] : index // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] { @@ -234,9 +234,9 @@ func.func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_8]] { // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_15]]] : memref // CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_8]] : index @@ -305,9 +305,9 @@ func.func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref // CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_6]] : index @@ -354,9 +354,9 @@ func.func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_18:.*]] = %[[VAL_8]] to %[[VAL_4]] step %[[VAL_9]] { // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref // CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_9]] : index @@ -450,9 +450,9 @@ func.func @add_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref // CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_6]] : index @@ -499,9 +499,9 @@ func.func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_15]], %[[VAL_19:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) { @@ -575,9 +575,9 @@ func.func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>) // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_6]] { @@ -625,9 +625,9 @@ func.func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>) // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref // CHECK: %[[VAL_20:.*]]:2 = scf.while (%[[VAL_21:.*]] = %[[VAL_18]], %[[VAL_22:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) { @@ -726,9 +726,9 @@ func.func @add_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_15]] to %[[VAL_16]] step %[[VAL_6]] { @@ -778,9 +778,9 @@ func.func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_16]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_16]] : memref<32x16x8xf32>) // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_17]], %[[VAL_21:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) { @@ -883,9 +883,9 @@ func.func @add_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>) // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_5]] { @@ -937,9 +937,9 @@ func.func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_19:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_19]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_19:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_19]] : memref<32x16x8xf32>) // CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref // CHECK: %[[VAL_22:.*]]:2 = scf.while (%[[VAL_23:.*]] = %[[VAL_20]], %[[VAL_24:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) { @@ -1067,9 +1067,9 @@ func.func @add_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> -// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>) +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16x8xf32> to memref<32x16x8xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>) // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_18:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_5]] { @@ -1127,11 +1127,11 @@ func.func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 2 : index} : tensor to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.lvl %[[VAL_1]], %[[VAL_6]] : tensor -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_3]] : memref +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor to memref +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_3]] : tensor to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.lvl %[[VAL_1]], %[[VAL_5]] : tensor // CHECK-DAG: %[[VAL_14:.*]] = tensor.dim %[[VAL_2]], %[[VAL_6]] : tensor -// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_5]] to %[[VAL_13]] step %[[VAL_6]] { // CHECK: %[[VAL_19:.*]] = arith.muli %[[VAL_17]], %[[VAL_10]] : index // CHECK: scf.for %[[VAL_18:.*]] = %[[VAL_5]] to %[[VAL_10]] step %[[VAL_6]] { @@ -1191,7 +1191,7 @@ func.func @kernel_3d(%arga: tensor, // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20x30xf32, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<10x20x30xf32, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref // CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_10]][] : memref // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref @@ -1246,10 +1246,10 @@ func.func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor) // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor // CHECK-DAG: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-DAG: %[[VAL_9:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : tensor to memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_12]][] : memref // CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_9]] step %[[VAL_3]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f32) { // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_15]]] : memref @@ -1305,10 +1305,10 @@ func.func @sum_reduction_inv(%arga: tensor, // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_8:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20xf32> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<30xf32> -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<10x20x30xf32> -// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<10x20x30xf32>) +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<20xf32> to memref<20xf32> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<30xf32> to memref<30xf32> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : tensor<10x20x30xf32> to memref<10x20x30xf32> +// CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<10x20x30xf32>) // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] { // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_14]]] : memref // CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] { diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir index 1ec6fb586d434e..e2dbadc4db5bfe 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir @@ -25,8 +25,8 @@ // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<4xf32> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<4xf32> to memref<4xf32> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xf32> to memref<32xf32> // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<4xf32> // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref @@ -112,8 +112,8 @@ func.func @mul_inv_enc_dense1d(%arga: tensor<32xf32, #EncDenseVec>, // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi32, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi32, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34xi32> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi32> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<34xi32> to memref<34xi32> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xi32> to memref<32xi32> // CHECK-DAG: linalg.fill ins(%[[ZERO]] : i32) outs(%[[VAL_11]] : memref<32xi32>) // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref @@ -163,8 +163,8 @@ func.func @and_affine_dense1d(%arga: tensor<32xi32, #SpVec>, // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf64, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34x19xf64> -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf64> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<34x19xf64> to memref<34x19xf64> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf64> to memref<32x16xf64> // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_3]] { // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref // CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_14]], %[[VAL_3]] : index @@ -223,7 +223,7 @@ func.func @mul_affine_dense2d(%arga: tensor<32x16xf64, #CSR>, // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf64> +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf64> to memref<32x16xf64> // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_15]] to %[[VAL_16]] step %[[VAL_5]] { @@ -287,7 +287,7 @@ func.func @mul_affine_dense_dim_2d(%arga: tensor<34x16xf64, #CSR>, // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf64> +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32x16xf64> to memref<32x16xf64> // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_6]]] : memref // CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_15]] to %[[VAL_16]] step %[[VAL_6]] { diff --git a/mlir/test/Dialect/SparseTensor/sparse_batch.mlir b/mlir/test/Dialect/SparseTensor/sparse_batch.mlir index f158fc6108a130..cfddef743cf28e 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_batch.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_batch.mlir @@ -14,7 +14,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<8x4x2xf32, #sparse{{[0-9]*}}> to memref<8x?xindex> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<8x4x2xf32, #sparse{{[0-9]*}}> to memref<8x?xindex> // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x4x2xf32, #sparse{{[0-9]*}}> to memref<8x?xf32> -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_6]] : memref<8x4x2xf32> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_6]] : tensor<8x4x2xf32> // CHECK-DAG: linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_10]] : memref<8x4x2xf32>) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_1]] { // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_1]] { diff --git a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir index b6c7b771394b15..d1d8276f8daefd 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir @@ -38,7 +38,7 @@ // CHECK-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64> +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf64> to memref<32xf64> // CHECK: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] { @@ -70,7 +70,7 @@ func.func @abs(%arga: tensor<32xf64, #SV>, // CHECK-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64> +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf64> to memref<32xf64> // CHECK: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] { @@ -99,10 +99,10 @@ func.func @ceil(%arga: tensor<32xf64, #SV>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> { // CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64> +// CHECK-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref +// CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref +// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf64> to memref<32xf64> // CHECK: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] { @@ -131,10 +131,10 @@ func.func @floor(%arga: tensor<32xf64, #SV>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> { // CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64> +// CHECK-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref +// CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref +// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf64> to memref<32xf64> // CHECK: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] { @@ -169,8 +169,8 @@ func.func @neg(%arga: tensor<32xf64, #SV>, // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf64> to memref<32xf64> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xf64> to memref<32xf64> // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { @@ -229,8 +229,8 @@ func.func @add(%arga: tensor<32xf64, #SV>, // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf64> to memref<32xf64> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xf64> to memref<32xf64> // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { @@ -289,8 +289,8 @@ func.func @sub(%arga: tensor<32xf64, #SV>, // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64> -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf64> to memref<32xf64> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xf64> to memref<32xf64> // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] { @@ -325,7 +325,7 @@ func.func @mul(%arga: tensor<32xf64, #SV>, // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xf64> to memref<32xf64> // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] { diff --git a/mlir/test/Dialect/SparseTensor/sparse_fusion.mlir b/mlir/test/Dialect/SparseTensor/sparse_fusion.mlir index 50f21416f5a74b..d9f48afef48100 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_fusion.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fusion.mlir @@ -25,10 +25,10 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<100xf64, #sparse> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<100xf64, #sparse> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<100xf64, #sparse> to memref -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_8]] : memref<100xf64> -// CHECK-DAG: linalg.fill ins(%[[VAL_4]] : f64) outs(%[[VAL_12]] : memref<100xf64>) -// CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref -// CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_2]]] : memref +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_8]] : +// CHECK-DAG: linalg.fill ins(%[[VAL_4]] : f64) outs(%[[VAL_12]] : memref<100xf64>) +// CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_3]]) : (index, index) -> (index, index) { // CHECK: %[[VAL_18:.*]] = arith.cmpi ult, %[[VAL_16]], %[[VAL_14]] : index // CHECK: scf.condition(%[[VAL_18]]) %[[VAL_16]], %[[VAL_17]] : index, index @@ -57,7 +57,7 @@ // CHECK: scf.for %[[VAL_31:.*]] = %[[VAL_32:.*]]#1 to %[[VAL_5]] step %[[VAL_2]] { // CHECK: memref.store %[[VAL_7]], %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<100xf64> // CHECK: } -// CHECK: %[[VAL_33:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<100xf64> +// CHECK: %[[VAL_33:.*]] = bufferization.to_tensor %[[VAL_12]] : // CHECK: return %[[VAL_33]] : tensor<100xf64> // CHECK: } func.func @sparse_fusion(%argA: tensor<100xf64, #SV>) -> tensor<100xf64> { diff --git a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir index 8fa473b5a9dba5..3a33a200f8279d 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir @@ -33,8 +33,8 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xi64> to memref<32xi64> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xi64> to memref<32xi64> // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { @@ -94,8 +94,8 @@ func.func @add(%arga: tensor<32xi64, #SV>, // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64> -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xi64> to memref<32xi64> +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xi64> to memref<32xi64> // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { @@ -154,8 +154,8 @@ func.func @sub(%arga: tensor<32xi64, #SV>, // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64> -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xi64> to memref<32xi64> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xi64> to memref<32xi64> // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] { @@ -190,7 +190,7 @@ func.func @mul(%arga: tensor<32xi64, #SV>, // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xi64> to memref<32xi64> // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] { @@ -224,7 +224,7 @@ func.func @divsbyc(%arga: tensor<32xi64, #SV>, // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xi64> to memref<32xi64> // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] { @@ -258,8 +258,8 @@ func.func @divubyc(%arga: tensor<32xi64, #SV>, // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64> -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xi64> to memref<32xi64> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xi64> to memref<32xi64> // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] { @@ -296,8 +296,8 @@ func.func @and(%arga: tensor<32xi64, #SV>, // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xi64> to memref<32xi64> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xi64> to memref<32xi64> // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { @@ -356,8 +356,8 @@ func.func @or(%arga: tensor<32xi64, #SV>, // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xi64> to memref<32xi64> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xi64> to memref<32xi64> // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { @@ -414,7 +414,7 @@ func.func @xor(%arga: tensor<32xi64, #SV>, // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xi64> to memref<32xi64> // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] { @@ -448,7 +448,7 @@ func.func @ashrbyc(%arga: tensor<32xi64, #SV>, // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xi64> to memref<32xi64> // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] { @@ -482,7 +482,7 @@ func.func @lsrbyc(%arga: tensor<32xi64, #SV>, // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<32xi64> to memref<32xi64> // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] { diff --git a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir index 78e29979ca1acb..d215ebb1c0c6f1 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir @@ -18,8 +18,8 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30xf32> -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x30xf32> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<20x30xf32> to memref<20x30xf32> +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<10x30xf32> to memref<10x30xf32> // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_5]] { @@ -58,13 +58,13 @@ func.func @matmul1(%a: tensor<10x20xf32, #DCSR>, // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 10 : index // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<10x20xf32> +// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<10x20xf32> to memref<10x20xf32> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x30xf32> +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<10x30xf32> to memref<10x30xf32> // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref @@ -203,13 +203,13 @@ func.func @matmul2(%A: tensor<4x8xf64, #DCSR>, // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 6 : index // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<8x8xi32> +// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<8x8xi32> to memref<8x8xi32> // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<6x6xi32> +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<6x6xi32> to memref<6x6xi32> // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_13]], %[[VAL_14]]] : memref<6x6xi32> @@ -255,13 +255,13 @@ func.func @conv2d(%input: tensor<8x8xi32>, // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 2 : i64 -// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<5x3xi8> +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<5x3xi8> to memref<5x3xi8> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<5x6xi64> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<5x6xi64> to memref<5x6xi64> // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref @@ -309,7 +309,7 @@ func.func @quantized_matmul(%input1: tensor<5x3xi8>, // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<1024xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<1024xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<1024xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor to memref // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_11]][] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_kernels_to_iterator.mlir b/mlir/test/Dialect/SparseTensor/sparse_kernels_to_iterator.mlir index f819458e038582..836e26b51f7c18 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_kernels_to_iterator.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_kernels_to_iterator.mlir @@ -85,7 +85,7 @@ func.func @sqsum(%arg0: tensor) -> tensor { // CHECK: %[[VAL_3:.*]] = arith.constant 0 : index // CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32 // CHECK: %[[VAL_5:.*]] = arith.constant dense<0> : tensor<10xi32> -// CHECK: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_5]] : memref<10xi32> +// CHECK: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_5]] : tensor<10xi32> to memref<10xi32> // CHECK: linalg.fill ins(%[[VAL_4]] : i32) outs(%[[VAL_6]] : memref<10xi32>) // CHECK: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10xi32, #sparse{{.*}}> to memref // CHECK: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10xi32, #sparse{{.*}}> to memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir index c27df00785522a..cab57389f032e1 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir @@ -29,8 +29,8 @@ // CHECK-HIR-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> // CHECK-HIR-DAG: %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> // CHECK-HIR-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse{{[0-9]*}}> -// CHECK-HIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64> -// CHECK-HIR-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// CHECK-HIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<64xf64> to memref<64xf64> +// CHECK-HIR-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xf64> to memref<32xf64> // CHECK-HIR: scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK-HIR-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref // CHECK-HIR-DAG: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index @@ -60,8 +60,8 @@ // CHECK-MIR-DAG: %[[VAL_6:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref // CHECK-MIR-DAG: %[[VAL_7:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref // CHECK-MIR-DAG: %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref -// CHECK-MIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64> -// CHECK-MIR-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// CHECK-MIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<64xf64> to memref<64xf64> +// CHECK-MIR-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xf64> to memref<32xf64> // CHECK-MIR: scf.for %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK-MIR-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref // CHECK-MIR-DAG: %[[VAL_16:.*]] = arith.addi %[[VAL_14]], %[[VAL_5]] : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir index 9fbb9dd0a26d17..b998eeb0d39443 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir @@ -32,8 +32,8 @@ // CHECK-HIR-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[DEMAP]] {level = 1 : index} // CHECK-HIR-DAG: %[[VAL_7:.*]] = sparse_tensor.coordinates %[[DEMAP]] {level = 1 : index} // CHECK-HIR-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[DEMAP]] -// CHECK-HIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64> -// CHECK-HIR-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// CHECK-HIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<64xf64> to memref<64xf64> +// CHECK-HIR-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xf64> to memref<32xf64> // CHECK-HIR: scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK-HIR-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_12]]] : memref<64xf64> // CHECK-HIR-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref @@ -62,8 +62,8 @@ // CHECK-MIR-DAG: %[[VAL_7:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref // CHECK-MIR-DAG: %[[VAL_8:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref // CHECK-MIR-DAG: %[[VAL_9:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref -// CHECK-MIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64> -// CHECK-MIR-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// CHECK-MIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<64xf64> to memref<64xf64> +// CHECK-MIR-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xf64> to memref<32xf64> // CHECK-MIR: scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK-MIR: %[[VAL_16:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_15]]] : memref<64xf64> // CHECK-MIR: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir index 773c5677eea550..e1e1953779fa88 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir @@ -29,8 +29,8 @@ // CHECK-HIR-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> // CHECK-HIR-DAG: %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> // CHECK-HIR-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse{{[0-9]*}}> -// CHECK-HIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64> -// CHECK-HIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// CHECK-HIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<64xf64> to memref<64xf64> +// CHECK-HIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xf64> to memref<32xf64> // CHECK-HIR: scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK-HIR-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref // CHECK-HIR-DAG: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_5]] : index @@ -60,8 +60,8 @@ // CHECK-MIR-DAG: %[[VAL_6:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref // CHECK-MIR-DAG: %[[VAL_7:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref // CHECK-MIR-DAG: %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref -// CHECK-MIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64> -// CHECK-MIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// CHECK-MIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<64xf64> to memref<64xf64> +// CHECK-MIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<32xf64> to memref<32xf64> // CHECK-MIR: scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK-MIR-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref // CHECK-MIR-DAG: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_5]] : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir index 2ac36fa6d8996b..b80a48363773f7 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir @@ -35,13 +35,13 @@ // CHECK-DAG: %[[VAL_10:.*]] = arith.constant 80 : index // CHECK-DAG: %[[VAL_11:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_12:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_0]] : memref<10x20x30x40x50x60x70x80xf32> +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<10x20x30x40x50x60x70x80xf32> to memref<10x20x30x40x50x60x70x80xf32> // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 3 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 3 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x20x30x40x50x60x70x80xf32> +// CHECK-DAG: %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<10x20x30x40x50x60x70x80xf32> to memref<10x20x30x40x50x60x70x80xf32> // CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32> // CHECK: scf.for %[[VAL_21:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_12]] { // CHECK: %[[VAL_23:.*]] = arith.muli %[[VAL_21]], %[[VAL_9]] : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir b/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir index 5b453e9a736a24..ab7a30e2f96a59 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir @@ -19,7 +19,7 @@ // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10xi32, #{{.*}}> to memref // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10xi32, #{{.*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xi32, #{{.*}}> to memref -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<10xf32> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<10xf32> to memref<10xf32> // CHECK-DAG: linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_8]] : memref<10xf32>) // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref @@ -53,7 +53,7 @@ func.func @allout_inplace(%arga: tensor<10xi32, #SV>, // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10xi32, #{{.*}}> to memref // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10xi32, #{{.*}}> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xi32, #{{.*}}> to memref -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_4]] : memref<10xf32> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_4]] : tensor<10xf32> to memref<10xf32> // CHECK-DAG: linalg.fill ins(%[[VAL_2]] : f32) outs(%[[VAL_8]] : memref<10xf32>) // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_1]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref @@ -86,7 +86,7 @@ func.func @allout_materialize(%arga: tensor<10xi32, #SV>) -> tensor<10xf32> { // CHECK-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10xf32, #{{.*}}> to memref // CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10xf32, #{{.*}}> to memref // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xf32, #{{.*}}> to memref -// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<10xf32> +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<10xf32> to memref<10xf32> // CHECK-DAG: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK-DAG: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] { diff --git a/mlir/test/Dialect/SparseTensor/sparse_pack.mlir b/mlir/test/Dialect/SparseTensor/sparse_pack.mlir index a90194a74ee4a3..91e3842bdd367f 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_pack.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_pack.mlir @@ -12,12 +12,12 @@ // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 2 : index // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 100 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : memref<2xindex> +// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<2xindex> to memref<2xindex> // CHECK-DAG: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]] : memref<6x2xi32> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<6x2xi32> to memref<6x2xi32> // CHECK-DAG: %[[VAL_9:.*]] = memref.collapse_shape %[[VAL_8]] {{\[\[}}0, 1]] : memref<6x2xi32> into memref<12xi32> // CHECK-DAG: %[[VAL_10:.*]] = memref.cast %[[VAL_9]] : memref<12xi32> to memref -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_0]] : memref<6xf64> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<6xf64> to memref<6xf64> // CHECK-DAG: %[[VAL_12:.*]] = memref.cast %[[VAL_11]] : memref<6xf64> to memref // CHECK: %[[VAL_13:.*]] = sparse_tensor.storage_specifier.init // CHECK: %[[VAL_14:.*]] = sparse_tensor.storage_specifier.set %[[VAL_13]] lvl_sz at 0 with %[[VAL_4]] @@ -45,18 +45,18 @@ func.func @sparse_pack(%values: tensor<6xf64>, %pos:tensor<2xindex>, %coordinate // CHECK-SAME: %[[VAL_5:.*]]: tensor<2xindex>, // CHECK-SAME: %[[VAL_6:.*]]: tensor<6x2xi32>) -> (tensor<6xf64>, tensor<2xindex>, tensor<6x2xi32>) { // CHECK: %[[VAL_7:.*]] = sparse_tensor.storage_specifier.get %[[VAL_3]] pos_mem_sz at 0 -// CHECK: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_5]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_5]] : tensor<2xindex> to memref<2xindex> // CHECK: %[[VAL_9:.*]] = memref.subview %[[VAL_8]][0] {{\[}}%[[VAL_7]]] [1] : memref<2xindex> to memref // CHECK: %[[VAL_10:.*]] = memref.subview %[[VAL_0]][0] {{\[}}%[[VAL_7]]] [1] : memref to memref // CHECK: memref.copy %[[VAL_10]], %[[VAL_9]] : memref to memref // CHECK: %[[VAL_11:.*]] = sparse_tensor.storage_specifier.get %[[VAL_3]] crd_mem_sz at 0 -// CHECK: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_6]] : memref<6x2xi32> +// CHECK: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_6]] : tensor<6x2xi32> to memref<6x2xi32> // CHECK: %[[VAL_13:.*]] = memref.collapse_shape %[[VAL_12]] {{\[\[}}0, 1]] : memref<6x2xi32> into memref<12xi32> // CHECK: %[[VAL_14:.*]] = memref.subview %[[VAL_13]][0] {{\[}}%[[VAL_11]]] [1] : memref<12xi32> to memref // CHECK: %[[VAL_15:.*]] = memref.subview %[[VAL_1]][0] {{\[}}%[[VAL_11]]] [1] : memref to memref // CHECK: memref.copy %[[VAL_15]], %[[VAL_14]] : memref to memref // CHECK: %[[VAL_16:.*]] = sparse_tensor.storage_specifier.get %[[VAL_3]] val_mem_sz -// CHECK: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_4]] : memref<6xf64> +// CHECK: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_4]] : tensor<6xf64> to memref<6xf64> // CHECK: %[[VAL_18:.*]] = memref.subview %[[VAL_17]][0] {{\[}}%[[VAL_16]]] [1] : memref<6xf64> to memref // CHECK: %[[VAL_19:.*]] = memref.subview %[[VAL_2]][0] {{\[}}%[[VAL_16]]] [1] : memref to memref // CHECK: memref.copy %[[VAL_19]], %[[VAL_18]] : memref to memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir index 44a551464c8609..c2cabd4351112b 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir @@ -24,8 +24,8 @@ // CHECK-DAG: %[[TMP_0:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} // CHECK-DAG: %[[TMP_1:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} // CHECK-DAG: %[[TMP_2:.*]] = sparse_tensor.values %[[TMP_arg0]] -// CHECK-DAG: %[[TMP_3:.*]] = bufferization.to_memref %[[TMP_arg1]] : memref<32xf32> -// CHECK-DAG: %[[TMP_4:.*]] = bufferization.to_memref %[[TMP_arg2]] : memref<16xf32> +// CHECK-DAG: %[[TMP_3:.*]] = bufferization.to_memref %[[TMP_arg1]] : tensor<32xf32> to memref<32xf32> +// CHECK-DAG: %[[TMP_4:.*]] = bufferization.to_memref %[[TMP_arg2]] : tensor<16xf32> to memref<16xf32> // CHECK: scf.parallel (%[[TMP_arg3:.*]]) = (%[[TMP_c0]]) to (%[[TMP_c16]]) step (%[[TMP_c1]]) { // CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> // CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_0]][%[[TMP_arg3]]] : memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir index 07c273fcddc3bf..5f8002b5b6d31f 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir @@ -24,7 +24,7 @@ // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index // CHECK: %[[DEMAP:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]] // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<30x10x20xf32, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30x10xf32> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<20x30x10xf32> to memref<20x30x10xf32> // CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_9]] : memref<20x30x10xf32>) // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_10]], %[[VAL_4]] : index @@ -64,7 +64,7 @@ func.func @sparse_static_dims(%arga: tensor<10x20x30xf32, #X>, // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_2]] : tensor // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_3]] : tensor // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_4]] : tensor -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref // CHECK-DAG: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_10]] : memref) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_4]] { // CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_8]] : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir index 9bf10345f4ea55..93b5da41fc7f98 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir @@ -26,7 +26,7 @@ // CHECK-HIR-DAG: %[[VAL_6:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_2]] : tensor // CHECK-HIR-DAG: %[[VAL_7:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_4]] : tensor // CHECK-HIR-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[DEMAP]] : tensor -// CHECK-HIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref +// CHECK-HIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref // CHECK-HIR: %[[VAL_11:.*]] = tensor.extract %[[VAL_1]][] : tensor // CHECK-HIR: %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_14:.*]] = %[[VAL_11]]) -> (f32) { // CHECK-HIR: %[[VAL_18:.*]] = arith.muli %[[VAL_13]], %[[VAL_6]] : index @@ -58,7 +58,7 @@ // CHECK-MIR-DAG: %[[DimSize1:.*]] = call @sparseLvlSize(%[[ARGA]], %[[I1]]) // CHECK-MIR-DAG: %[[DimSize2:.*]] = call @sparseLvlSize(%[[ARGA]], %[[I2]]) // CHECK-MIR-DAG: %[[VAL_8:.*]] = call @sparseValuesF32(%[[ARGA]]) : (!llvm.ptr) -> memref -// CHECK-MIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[ARGX]] : memref +// CHECK-MIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[ARGX]] : tensor to memref // CHECK-MIR: %[[VAL_11:.*]] = tensor.extract %[[ARGX]][] : tensor // CHECK-MIR: %[[VAL_12:.*]] = scf.for %[[D2:.*]] = %[[I0]] to %[[DimSize0]] step %[[I1]] iter_args(%[[VAL_14:.*]] = %[[VAL_11]]) -> (f32) { // CHECK-MIR: %[[VAL_18:.*]] = arith.muli %[[D2]], %[[DimSize1]] : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir index 666882edcbab38..e5df646851d43e 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir @@ -33,8 +33,8 @@ // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref -// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_4]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref +// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_4]] : tensor<32x16xf32> to memref<32x16xf32> // CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_14]][] : memref // CHECK-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_6]]] : memref // CHECK-DAG: %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir index a03b97684a7a4a..e769534641ec85 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir @@ -64,14 +64,14 @@ func.func @fold_yield_direct_zero() -> tensor<32xf64> { // CHECK-DAG: %[[VAL_6:.*]] = arith.constant dense<0.000000e+00> : tensor<8x8xf64> // CHECK-DAG: %[[VAL_7:.*]] = bufferization.alloc_tensor() copy(%[[VAL_6]]) : tensor<8x8xf64> // CHECK-DAG: %[[VAL_8:.*]] = bufferization.alloc_tensor() copy(%[[VAL_6]]) : tensor<8x8xf64> -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64> -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<8x8xf64> to memref<8x8xf64> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<8x8xf64> to memref<8x8xf64> // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_8]] : memref<8x8xf64> +// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_8]] : tensor<8x8xf64> to memref<8x8xf64> // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_19:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_5]] { @@ -132,8 +132,8 @@ func.func @sampled_dd_unfused(%args: tensor<8x8xf64, #SM>, // CHECK-DAG: %[[VAL_8:.*]] = arith.constant dense<0.000000e+00> : tensor<8x8xf64> // CHECK-DAG: %[[VAL_9:.*]] = bufferization.alloc_tensor() copy(%[[VAL_8]]) : tensor<8x8xf64> // CHECK-DAG: %[[VAL_10:.*]] = tensor.empty() : tensor<8x8xf64, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64> -// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<8x8xf64> to memref<8x8xf64> +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<8x8xf64> to memref<8x8xf64> // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir index a66028c61b22f4..3cc0aa26c8bc29 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir @@ -30,8 +30,8 @@ // CHECK-DAG: %[[VAL_6:.*]] = arith.constant false // CHECK-DAG: %[[VAL_7:.*]] = arith.constant true // CHECK-DAG: %[[VAL_8:.*]] = tensor.empty() : tensor<8x8xf64, #sparse{{[0-9]*}}> -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64> -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<8x8xf64> to memref<8x8xf64> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<8x8xf64> to memref<8x8xf64> // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir index 281e7858ce25ec..c99d5d25f7b4a7 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir @@ -31,7 +31,7 @@ // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_14]][] : memref // CHECK: %[[VAL_16:.*]] = scf.for %[[VAL_17:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] iter_args(%[[VAL_18:.*]] = %[[VAL_15]]) -> (f64) { // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir index ac357d9dc24855..d88372276989df 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir @@ -28,7 +28,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : tensor<8xi64> to memref<8xi64> // CHECK-DAG: linalg.fill ins(%[[VAL_4]] : i64) outs(%[[VAL_11]] : memref<8xi64>) // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref @@ -70,7 +70,7 @@ func.func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8 // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse{{[0-9]*}}> to memref -// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64> +// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : tensor<8xi64> to memref<8xi64> // CHECK-DAG: linalg.fill ins(%[[VAL_3]] : i64) outs(%[[VAL_11]] : memref<8xi64>) // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref diff --git a/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir b/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir index 287b62ef44c65f..6c3acf43f241e1 100644 --- a/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir +++ b/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir @@ -24,8 +24,8 @@ // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 8 : index // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<8x8xf64> -// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64> +// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<8x8xf64> to memref<8x8xf64> +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : tensor<8x8xf64> to memref<8x8xf64> // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref diff --git a/mlir/test/Dialect/SparseTensor/spy_sddmm_bsr.mlir b/mlir/test/Dialect/SparseTensor/spy_sddmm_bsr.mlir index 10a7ac5802ec98..df1e564c06231a 100755 --- a/mlir/test/Dialect/SparseTensor/spy_sddmm_bsr.mlir +++ b/mlir/test/Dialect/SparseTensor/spy_sddmm_bsr.mlir @@ -37,8 +37,8 @@ // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]] : tensor to tensor // CHECK-DAG: %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref -// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : tensor to memref +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : tensor to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.lvl %[[VAL_7]], %[[VAL_4]] : tensor // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_7]] {level = 1 : index} : tensor to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_7]] {level = 1 : index} : tensor to memref diff --git a/mlir/test/Dialect/SparseTensor/unused-tensor.mlir b/mlir/test/Dialect/SparseTensor/unused-tensor.mlir index f85acb9c6969a8..7e8b9f83fac795 100644 --- a/mlir/test/Dialect/SparseTensor/unused-tensor.mlir +++ b/mlir/test/Dialect/SparseTensor/unused-tensor.mlir @@ -28,8 +28,8 @@ // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 4 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<2x4xf64> -// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<2x4xf64> +// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : tensor<2x4xf64> +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : tensor<2x4xf64> // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] { diff --git a/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir b/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir index 578e86a793f906..15228c6a5f79ad 100644 --- a/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir +++ b/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir @@ -16,7 +16,7 @@ // CHECK-ON-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-ON-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-ON-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-ON: %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref // CHECK-ON: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-ON: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref @@ -42,7 +42,7 @@ // CHECK-OFF-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK-OFF-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-OFF-DAG: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-OFF: %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref // CHECK-OFF: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK-OFF: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref @@ -93,7 +93,7 @@ func.func @sparse_reduction_ori(%argx: tensor, // CHECK-ON-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-ON-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-ON-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-ON: %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref // CHECK-ON: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-ON: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref @@ -119,7 +119,7 @@ func.func @sparse_reduction_ori(%argx: tensor, // CHECK-OFF-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK-OFF-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-OFF-DAG: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-OFF: %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref // CHECK-OFF: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK-OFF: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref @@ -168,7 +168,7 @@ func.func @sparse_reduction_ori_accumulator_on_rhs(%argx: tensor, // CHECK-ON-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-ON-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-ON-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-ON: %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref // CHECK-ON: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref // CHECK-ON: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref @@ -194,7 +194,7 @@ func.func @sparse_reduction_ori_accumulator_on_rhs(%argx: tensor, // CHECK-OFF-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK-OFF-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-OFF-DAG: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-OFF: %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref // CHECK-OFF: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK-OFF: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref @@ -243,7 +243,7 @@ func.func @sparse_reduction_subi(%argx: tensor, // CHECK-ON-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-ON-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-ON-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-ON: %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref // CHECK-ON: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-ON: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref @@ -269,7 +269,7 @@ func.func @sparse_reduction_subi(%argx: tensor, // CHECK-OFF-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK-OFF-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-OFF-DAG: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-OFF: %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref // CHECK-OFF: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK-OFF: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref @@ -319,7 +319,7 @@ func.func @sparse_reduction_xor(%argx: tensor, // CHECK-ON-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-ON-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-ON-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-ON: %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref // CHECK-ON: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-ON: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref @@ -345,7 +345,7 @@ func.func @sparse_reduction_xor(%argx: tensor, // CHECK-OFF-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK-OFF-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-OFF-DAG: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-OFF: %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref // CHECK-OFF: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK-OFF: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref @@ -395,7 +395,7 @@ func.func @sparse_reduction_addi(%argx: tensor, // CHECK-ON-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-ON-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-ON-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-ON: %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref // CHECK-ON: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-ON: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref @@ -421,7 +421,7 @@ func.func @sparse_reduction_addi(%argx: tensor, // CHECK-OFF-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK-OFF-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-OFF-DAG: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-OFF: %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref // CHECK-OFF: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK-OFF: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_reduction_subf(%argx: tensor, // CHECK-ON-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-ON-DAG: %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-ON-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-ON-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-ON: %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref // CHECK-ON: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-ON: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref @@ -497,7 +497,7 @@ func.func @sparse_reduction_subf(%argx: tensor, // CHECK-OFF-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK-OFF-DAG: %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor to memref // CHECK-OFF-DAG: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor to memref -// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref +// CHECK-OFF-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : tensor to memref // CHECK-OFF: %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref // CHECK-OFF: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK-OFF: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir index 3a3c8af15e6e41..ecd285be461947 100644 --- a/mlir/test/Dialect/Tensor/bufferize.mlir +++ b/mlir/test/Dialect/Tensor/bufferize.mlir @@ -3,7 +3,7 @@ // CHECK-LABEL: func @dim( // CHECK-SAME: %[[TENSOR:.*]]: tensor<*xf32>, // CHECK-SAME: %[[INDEX:.*]]: index) -> index { -// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<*xf32> +// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : tensor<*xf32> to memref<*xf32> // CHECK: %[[EXTENT:.*]] = memref.dim %[[MEMREF]], %[[INDEX]] : memref<*xf32> // CHECK: return %[[EXTENT]] : index func.func @dim(%arg0: tensor<*xf32>, %arg1: index) -> index { @@ -39,7 +39,7 @@ func.func @tensor.cast(%arg0: tensor) -> tensor<2xindex> { // CHECK-LABEL: func @tensor.cast_from_unranked( // CHECK-SAME: %[[TENSOR:.*]]: tensor<*xf32>) -> tensor<2xf32> { -// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<*xf32> +// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : tensor<*xf32> to memref<*xf32> // CHECK: %[[CASTED_MEMREF:.*]] = memref.cast %[[MEMREF]] : memref<*xf32> to memref<2xf32, strided<[?], offset: ?>> // CHECK: %[[RET:.*]] = bufferization.to_tensor %[[CASTED_MEMREF]] : memref<2xf32, strided<[?], offset: ?>> // CHECK: return %[[RET]] : tensor<2xf32> @@ -52,7 +52,7 @@ func.func @tensor.cast_from_unranked(%arg0: tensor<*xf32>) -> tensor<2xf32> { // CHECK-LABEL: func @tensor.cast_to_unranked( // CHECK-SAME: %[[TENSOR:.*]]: tensor<2xf32>) -> tensor<*xf32> { -// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<2xf32> +// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : tensor<2xf32> to memref<2xf32> // CHECK: %[[CASTED_MEMREF:.*]] = memref.cast %[[MEMREF]] : memref<2xf32> to memref<*xf32> // CHECK: %[[RET:.*]] = bufferization.to_tensor %[[CASTED_MEMREF]] : memref<*xf32> // CHECK: return %[[RET]] : tensor<*xf32> @@ -77,7 +77,7 @@ func.func @tensor.empty() -> tensor<5xf32> { // CHECK-LABEL: func @tensor.extract( // CHECK-SAME: %[[TENSOR:.*]]: tensor, // CHECK-SAME: %[[IDX:.*]]: index) -> f32 { -// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref +// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : tensor to memref // CHECK: %[[RET:.*]] = memref.load %[[MEMREF]][%[[IDX]]] : memref // CHECK: return %[[RET]] : f32 // CHECK: } @@ -199,7 +199,7 @@ func.func @tensor.from_elements_3d(%f0 : f32) -> tensor<3x2x2xf32> { // CHECK-LABEL: func @tensor.generate( // CHECK-SAME: %[[ARG:.*]]: tensor<*xf32>, // CHECK-SAME: %[[DYNAMIC_EXTENT:.*]]: index) -> tensor { -// CHECK-DAG: %[[ARG_M:.*]] = bufferization.to_memref %[[ARG]] : memref<*xf32> +// CHECK-DAG: %[[ARG_M:.*]] = bufferization.to_memref %[[ARG]] : tensor<*xf32> to memref<*xf32> // CHECK-DAG: %[[ALLOC:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) {{.*}} : memref // CHECK: %[[ALLOC_T:.*]] = bufferization.to_tensor %[[ALLOC]] // CHECK: %[[MAPPED:.*]] = linalg.map @@ -266,7 +266,7 @@ func.func @tensor.generate_unknown_ops_in_body(%arg0: index) -> tensor // CHECK-SAME: %[[t1:.*]]: tensor, %[[idx1:.*]]: index, %[[idx2:.*]]: index func.func @tensor.extract_slice( %t1: tensor, %idx1: index, %idx2: index) -> tensor { - // CHECK: %[[m:.*]] = bufferization.to_memref %[[t1]] : memref + // CHECK: %[[m:.*]] = bufferization.to_memref %[[t1]] : tensor to memref // CHECK: %[[r:.*]] = memref.subview %[[m]][5, %[[idx2]]] [%[[idx1]], 10] [1, 1] : memref to memref> %0 = tensor.extract_slice %t1[5, %idx2][%idx1, 10][1, 1] : tensor to tensor @@ -282,7 +282,7 @@ func.func @tensor.extract_slice( // CHECK-SAME: %[[idx2:.*]]: index func.func @tensor.extract_slice_rank_reducing( %t1: tensor, %idx1: index, %idx2: index) -> tensor { - // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : tensor to memref // CHECK: %[[r:.*]] = memref.subview %[[m1]][5, %[[idx1]], 10] [%[[idx2]], 1, 15] [1, 1, 1] : memref to memref> %0 = tensor.extract_slice %t1[5, %idx1, 10][%idx2, 1, 15][1, 1, 1] : tensor to tensor @@ -300,8 +300,8 @@ func.func @tensor.insert_slice(%t1: tensor, %t2: tensor, %idx1: index, %idx2: index) -> tensor { // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index - // CHECK-DAG: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref - // CHECK-DAG: %[[m2:.*]] = bufferization.to_memref %[[t2]] : memref + // CHECK-DAG: %[[m1:.*]] = bufferization.to_memref %[[t1]] : tensor to memref + // CHECK-DAG: %[[m2:.*]] = bufferization.to_memref %[[t2]] : tensor to memref // CHECK-DAG: %[[dim0:.*]] = memref.dim %[[m1]], %[[c0]] // CHECK-DAG: %[[dim1:.*]] = memref.dim %[[m1]], %[[c1]] // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim0]], %[[dim1]]) @@ -353,7 +353,7 @@ func.func @tensor.insert_slice_rank_reducing_2( // CHECK-SAME: %[[f:.*]]: f32 func.func @tensor.insert(%t1: tensor<5xf32>, %idx1: index, %f: f32) -> tensor<5xf32> { // CHECK-DAG: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32> - // CHECK-DAG: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref<5xf32> + // CHECK-DAG: %[[m1:.*]] = bufferization.to_memref %[[t1]] : tensor<5xf32> to memref<5xf32> // CHECK: memref.copy %[[m1]], %[[alloc]] // CHECK: memref.store %[[f]], %[[alloc]][%[[idx1]]] %0 = tensor.insert %f into %t1[%idx1] : tensor<5xf32> @@ -368,7 +368,7 @@ func.func @tensor.insert(%t1: tensor<5xf32>, %idx1: index, %f: f32) -> tensor<5x // CHECK-LABEL: func @tensor.expand_shape( // CHECK-SAME: %[[t1:.*]]: tensor func.func @tensor.expand_shape(%t1: tensor, %sz0: index) -> tensor<2x?x10xf32> { - // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[DIM:.*]] = memref.dim %[[m1]], %[[C0]] : memref // CHECK: %[[C2:.*]] = arith.constant 2 : index @@ -388,7 +388,7 @@ func.func @tensor.expand_shape(%t1: tensor, %sz0: index) -> tensor<2x? // CHECK-SAME: %[[t1:.*]]: tensor func.func @tensor.expand_shape_of_slice( %t1: tensor, %o1: index, %s1: index, %sz0: index) -> tensor { - // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : // CHECK: %[[subview:.*]] = memref.subview %[[m1]][%{{.*}}, 5] [%{{.*}}, 10] [1, 1] : memref to memref> %0 = tensor.extract_slice %t1[%o1, 5][%s1, 10][1, 1] : tensor to tensor @@ -408,7 +408,7 @@ func.func @tensor.expand_shape_of_slice( // CHECK-SAME: %[[t1:.*]]: tensor func.func @tensor.expand_shape_of_scalar_slice( %t1: tensor, %o1: index, %s1: index) -> tensor<1xf32> { - // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : tensor to memref // CHECK: %[[subview:.*]] = memref.subview %[[m1]][%{{.*}}] [1] [1] : memref to memref> %0 = tensor.extract_slice %t1[%o1][1][1] : tensor to tensor // CHECK: %[[expanded:.*]] = memref.expand_shape %[[subview]] [] output_shape [1] : memref into memref<1xf32, strided<[1], offset: ?>> @@ -423,7 +423,7 @@ func.func @tensor.expand_shape_of_scalar_slice( // CHECK-LABEL: func @tensor.collapse_shape( // CHECK-SAME: %[[t1:.*]]: tensor<2x?x?xf32> func.func @tensor.collapse_shape(%t1: tensor<2x?x?xf32>) -> tensor { - // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref<2x?x?xf32> + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : tensor<2x?x?xf32> to memref<2x?x?xf32> // CHECK: %[[collapsed:.*]] = memref.collapse_shape %[[m1]] [ // CHECK-SAME: [0, 1], [2]] : memref<2x?x?xf32> into memref %0 = tensor.collapse_shape %t1 [[0, 1], [2]] @@ -439,7 +439,7 @@ func.func @tensor.collapse_shape(%t1: tensor<2x?x?xf32>) -> tensor { // CHECK-LABEL: func @tensor.collapse_shape_to_scalar( // CHECK-SAME: %[[t1:.*]]: tensor<1x1x1xf32> func.func @tensor.collapse_shape_to_scalar(%t1: tensor<1x1x1xf32>) -> tensor { - // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref<1x1x1xf32> + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : tensor<1x1x1xf32> to memref<1x1x1xf32> // CHECK: %[[collapsed:.*]] = memref.collapse_shape %[[m1]] [] : memref<1x1x1xf32> into memref %0 = tensor.collapse_shape %t1 [] : tensor<1x1x1xf32> into tensor @@ -528,7 +528,7 @@ func.func @tensor.collapse_shape_of_slice5(%arg0: tensor<2x2x2xi64>) -> tensor<4 // CHECK-LABEL: func @tensor.reshape( // CHECK-SAME: %[[t1:.*]]: tensor func.func @tensor.reshape(%t1: tensor) -> tensor<2x2x5xf32> { - // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : tensor to memref // CHECK: %[[two:.*]] = arith.constant 2 : i64 %two = arith.constant 2 : i64 @@ -560,7 +560,7 @@ func.func @tensor.reshape(%t1: tensor) -> tensor<2x2x5xf32> { // CHECK-SAME: %[[t1:.*]]: tensor, %[[l2:.*]]: index, %[[h1:.*]]: index, %[[h2:.*]]: index func.func @tensor.pad(%t1: tensor, %l2: index, %h1: index, %h2: index) -> tensor { - // CHECK-DAG: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref + // CHECK-DAG: %[[m1:.*]] = bufferization.to_memref %[[t1]] : tensor to memref // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[dim0:.*]] = memref.dim %[[m1]], %[[c0]] diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize-encodings.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize-encodings.mlir new file mode 100644 index 00000000000000..7398fdf614e1a7 --- /dev/null +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize-encodings.mlir @@ -0,0 +1,20 @@ +// RUN: mlir-opt %s -one-shot-bufferize="use-encoding-for-memory-space" -split-input-file | FileCheck %s + +func.func @from_elements(%fill: f32, %f: f32, %idx: index) -> tensor<3xf32, 1> { + %t = tensor.from_elements %fill, %fill, %fill : tensor<3xf32, 1> + %i = tensor.insert %f into %t[%idx] : tensor<3xf32, 1> + return %i : tensor<3xf32, 1> +} + +// CHECK-LABEL: @from_elements +// CHECK-SAME: (%[[arg0:.+]]: f32, %[[arg1:.+]]: f32, %[[arg2:.+]]: index) -> tensor<3xf32, 1 : i64> +// CHECK: %[[alloc:.+]] = memref.alloc() {{.*}} : memref<3xf32, 1> +// CHECK-DAG: %[[c0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[c1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[c2:.+]] = arith.constant 2 : index +// CHECK: memref.store %[[arg0]], %[[alloc]][%[[c0]]] : memref<3xf32, 1> +// CHECK: memref.store %[[arg0]], %[[alloc]][%[[c1]]] : memref<3xf32, 1> +// CHECK: memref.store %[[arg0]], %[[alloc]][%[[c2]]] : memref<3xf32, 1> +// CHECK: memref.store %[[arg1]], %[[alloc]][%[[arg2]]] : memref<3xf32, 1> +// CHECK: %[[v0:.+]] = bufferization.to_tensor %[[alloc]] : memref<3xf32, 1> to tensor<3xf32, 1 : i64> +// CHECK: return %[[v0]] : tensor<3xf32, 1 : i64> diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir index dc4306b8316ab7..af4f84640890bb 100644 --- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir @@ -402,7 +402,7 @@ func.func @tensor.reshape() -> tensor<2x2x5xf32> { // CHECK-SAME: %[[INPUT:[a-zA-Z0-9]*]]: memref<2x2xf32, strided<[?, ?], offset: ?>, 3>, // CHECK-SAME: %[[LAYOUT:[a-zA-Z0-9]*]]: memref<2xi32, strided<[?], offset: ?>>, func.func @reshape_with_non_identity_layout(%arg0: memref<2x2xf32, strided<[?, ?], offset: ?>, 3>, %arg1: tensor<2xi32>, %idx: index) -> f32 { - %t = bufferization.to_tensor %arg0 restrict : memref<2x2xf32, strided<[?, ?], offset: ?>, 3> + %t = bufferization.to_tensor %arg0 restrict : memref<2x2xf32, strided<[?, ?], offset: ?>, 3> to tensor<2x2xf32> // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[INPUT]][1, 0] [1, 2] [1, 1] : memref<2x2xf32, strided<[?, ?], offset: ?>, 3> to memref<2xf32, strided<[?], offset: ?>, 3> %extracted_slice = tensor.extract_slice %t[1, 0] [1, 2] [1, 1] : tensor<2x2xf32> to tensor<2xf32> diff --git a/mlir/test/Dialect/Vector/bufferize.mlir b/mlir/test/Dialect/Vector/bufferize.mlir index 3399f60a2c3bf3..c2abebe706ac0e 100644 --- a/mlir/test/Dialect/Vector/bufferize.mlir +++ b/mlir/test/Dialect/Vector/bufferize.mlir @@ -2,7 +2,7 @@ // CHECK-LABEL: func @transfer_read( // CHECK-SAME: %[[t:.*]]: tensor, %[[o1:.*]]: index, %[[o2:.*]]: index, %[[pad:.*]]: f32) -// CHECK: %[[m:.*]] = bufferization.to_memref %[[t]] : memref +// CHECK: %[[m:.*]] = bufferization.to_memref %[[t]] : tensor to memref // CHECK: %[[r:.*]] = vector.transfer_read %[[m]][%[[o1]], %[[o2]]], %[[pad]] {in_bounds = [true, false]} : memref, vector<5x6xf32> // CHECK: return %[[r]] func.func @transfer_read(%t: tensor, %o1: index, @@ -16,7 +16,7 @@ func.func @transfer_read(%t: tensor, %o1: index, // CHECK-LABEL: func @transfer_write( // CHECK-SAME: %[[t:.*]]: tensor, %[[o1:.*]]: index, %[[o2:.*]]: index, %[[vec:.*]]: vector<5x6xf32>, %[[mask:.*]]: vector<5x6xi1>) -// CHECK: %[[m:.*]] = bufferization.to_memref %[[t]] : memref +// CHECK: %[[m:.*]] = bufferization.to_memref %[[t]] : tensor to memref // CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}}, %{{.*}}) {{.*}} : memref // CHECK: memref.copy %[[m]], %[[alloc]] // CHECK: vector.transfer_write %[[vec]], %[[alloc]][%[[o1]], %[[o2]]], %[[mask]] {in_bounds = [true, false]} : vector<5x6xf32>, memref @@ -35,7 +35,7 @@ func.func @transfer_write(%t: tensor, %o1: index, // CHECK-LABEL: func @gather( // CHECK-SAME: %[[base:.*]]: tensor, %[[v:.*]]: vector<16xi32>, // CHECK-SAME: %[[mask:.*]]: vector<16xi1>, %[[pass_thru:.*]]: vector<16xf32>) -// CHECK: %[[m:.*]] = bufferization.to_memref %[[base]] : memref +// CHECK: %[[m:.*]] = bufferization.to_memref %[[base]] : tensor to memref // CHECK: %[[c0:.*]] = arith.constant 0 : index // CHECK: %[[out:.*]] = vector.gather %[[m]][%[[c0]], %[[c0]]] [%[[v]]], %[[mask]], %[[pass_thru]] : memref, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32> func.func @gather(%base: tensor, %v: vector<16xi32>, %mask: vector<16xi1>, %pass_thru: vector<16xf32>) -> vector<16xf32> { diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-matvec-const.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-matvec-const.mlir index a2afc4d13943e1..415697aaaba5d8 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-matvec-const.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-matvec-const.mlir @@ -47,7 +47,7 @@ module { // Call the kernel with an vector taken from global memory. %xbuf = memref.get_global @__constant_64xf64 : memref<64xf64> - %x = bufferization.to_tensor %xbuf restrict : memref<64xf64> + %x = bufferization.to_tensor %xbuf restrict : memref<64xf64> to tensor<64xf64> %0 = call @matvec(%A, %x, %y) : (tensor<1024x64xf64, #CSR>, tensor<64xf64>, tensor<1024xf64>) -> tensor<1024xf64> // diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir index 5a624e64342974..6ae54d65ea37b5 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir @@ -2,7 +2,7 @@ // NOTE: this test requires gpu-sm80 // // RUN: mlir-opt \ -// RUN: --pass-pipeline="builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-nvgpu-to-nvvm,affine-expand-index-ops,lower-affine,convert-arith-to-llvm),convert-vector-to-llvm,canonicalize,cse)" \ +// RUN: --pass-pipeline="builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-nvgpu-to-nvvm,affine-expand-index-ops,lower-affine,arith-expand,convert-arith-to-llvm),convert-vector-to-llvm,canonicalize,cse)" \ // RUN: %s \ // RUN: | mlir-opt --gpu-lower-to-nvvm-pipeline="cubin-chip=sm_80 cubin-features=+ptx71 cubin-format=%gpu_compilation_format" \ // RUN: | mlir-cpu-runner \ diff --git a/mlir/test/Integration/Dialect/Tosa/CPU/test-maxpool-dynamic.mlir b/mlir/test/Integration/Dialect/Tosa/CPU/test-maxpool-dynamic.mlir index 05a78e32b9e115..1b5fc9070ef6cc 100644 --- a/mlir/test/Integration/Dialect/Tosa/CPU/test-maxpool-dynamic.mlir +++ b/mlir/test/Integration/Dialect/Tosa/CPU/test-maxpool-dynamic.mlir @@ -54,7 +54,7 @@ func.func @main() { %result_static = func.call @max_pool_static(%A) : (!tensor_type) -> !tensor_type %result_dynamic = func.call @max_pool_dynamic(%A_dynamic) : (tensor) -> tensor - %static_buffer = bufferization.to_memref %result_static : !memref_type + %static_buffer = bufferization.to_memref %result_static : !tensor_type to !memref_type %unranked_static_buffer = memref.cast %static_buffer : !memref_type to memref<*xf32> // CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 4, 4, 1] strides = [16, 4, 1, 1] data = @@ -81,7 +81,7 @@ func.func @main() { func.call @printMemrefF32(%unranked_static_buffer) : (memref<*xf32>) -> () - %dynamic_buffer = bufferization.to_memref %result_dynamic : memref + %dynamic_buffer = bufferization.to_memref %result_dynamic : tensor to memref %unranked_dynamic_buffer = memref.cast %dynamic_buffer : memref to memref<*xf32> // CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 4, 4, 1] strides = [16, 4, 1, 1] data = diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/mulf-full.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/mulf-full.mlir index a7c5b91273423b..8cf15cd6978682 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/mulf-full.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/mulf-full.mlir @@ -100,8 +100,8 @@ func.func @entry() -> i32 { ]> : tensor<16x32xbf16> // Set up memory. - %a = bufferization.to_memref %0 : memref<16x32xbf16> - %b = bufferization.to_memref %1 : memref<16x32xbf16> + %a = bufferization.to_memref %0 : tensor<16x32xbf16> to memref<16x32xbf16> + %b = bufferization.to_memref %1 : tensor<16x32xbf16> to memref<16x32xbf16> %c = memref.alloc() : memref<16x16xf32> // Call kernel. diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/muli-full.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/muli-full.mlir index 7b7ee54db8c348..652ba0698c4c9c 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/muli-full.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/muli-full.mlir @@ -100,8 +100,8 @@ func.func @entry() -> i32 { ]> : tensor<16x64xi8> // Set up memory. - %a = bufferization.to_memref %0 : memref<16x64xi8> - %b = bufferization.to_memref %1 : memref<16x64xi8> + %a = bufferization.to_memref %0 : tensor<16x64xi8> to memref<16x64xi8> + %b = bufferization.to_memref %1 : tensor<16x64xi8> to memref<16x64xi8> %c = memref.alloc() : memref<16x16xi32> // Call kernel. diff --git a/mlir/test/Target/SPIRV/intel-ext-ops.mlir b/mlir/test/Target/SPIRV/intel-ext-ops.mlir index 8c50501cf7409d..6d2fd324363c62 100644 --- a/mlir/test/Target/SPIRV/intel-ext-ops.mlir +++ b/mlir/test/Target/SPIRV/intel-ext-ops.mlir @@ -40,10 +40,10 @@ spirv.module Logical GLSL450 requires #spirv.vce { // CHECK-LABEL: @split_barrier spirv.func @split_barrier() "None" { - // CHECK: spirv.INTEL.ControlBarrierArrive , , - spirv.INTEL.ControlBarrierArrive , , - // CHECK: spirv.INTEL.ControlBarrierWait , , - spirv.INTEL.ControlBarrierWait , , + // CHECK: spirv.INTEL.ControlBarrierArrive + spirv.INTEL.ControlBarrierArrive + // CHECK: spirv.INTEL.ControlBarrierWait + spirv.INTEL.ControlBarrierWait spirv.Return } } diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 6752113cab8d41..239d5292180269 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -2781,7 +2781,7 @@ def TestGraphLoopOp : TEST_Op<"graph_loop", //===----------------------------------------------------------------------===// // Test InferIntRangeInterface //===----------------------------------------------------------------------===// -def InferIntRangeType : AnyTypeOf<[AnyInteger, Index, VectorOf<[AnyInteger, Index]>]>; +def InferIntRangeType : AnyTypeOf<[AnyInteger, Index, VectorOfNonZeroRankOf<[AnyInteger, Index]>]>; def TestWithBoundsOp : TEST_Op<"with_bounds", [DeclareOpInterfaceMethods, diff --git a/mlir/tools/mlir-tblgen/BytecodeDialectGen.cpp b/mlir/tools/mlir-tblgen/BytecodeDialectGen.cpp index d7967c7a77534d..da28ca3a7eba97 100644 --- a/mlir/tools/mlir-tblgen/BytecodeDialectGen.cpp +++ b/mlir/tools/mlir-tblgen/BytecodeDialectGen.cpp @@ -10,6 +10,7 @@ #include "mlir/TableGen/GenInfo.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVectorExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/TableGen/Error.h" @@ -161,13 +162,12 @@ void printParseConditional(mlir::raw_indented_ostream &ios, return formatv("read{0}", capitalize(name)); }; - auto parsedArgs = - llvm::to_vector(make_filter_range(args, [](const Init *const attr) { - const Record *def = cast(attr)->getDef(); - if (def->isSubClassOf("Array")) - return true; - return !def->getValueAsString("cParser").empty(); - })); + auto parsedArgs = llvm::filter_to_vector(args, [](const Init *const attr) { + const Record *def = cast(attr)->getDef(); + if (def->isSubClassOf("Array")) + return true; + return !def->getValueAsString("cParser").empty(); + }); interleave( zip(parsedArgs, argNames), @@ -277,8 +277,8 @@ void Generator::emitParseHelper(StringRef kind, StringRef returnType, printParseConditional(ios, args, argNames); // Compute args to pass to create method. - auto passedArgs = llvm::to_vector(make_filter_range( - argNames, [](StringRef str) { return !str.starts_with("_"); })); + auto passedArgs = llvm::filter_to_vector( + argNames, [](StringRef str) { return !str.starts_with("_"); }); std::string argStr; raw_string_ostream argStream(argStr); interleaveComma(passedArgs, argStream, diff --git a/mlir/unittests/IR/AffineMapTest.cpp b/mlir/unittests/IR/AffineMapTest.cpp index 166692f731d1cf..ff1f28235d4093 100644 --- a/mlir/unittests/IR/AffineMapTest.cpp +++ b/mlir/unittests/IR/AffineMapTest.cpp @@ -97,17 +97,12 @@ TEST(AffineMapTest, getInversePermutation) { auto resultsInv1 = inverseMap1.getResults(); EXPECT_EQ(resultsInv1.size(), 3UL); - // 1.1 Expect d2 - AffineDimExpr expr = llvm::dyn_cast(resultsInv1[0]); - EXPECT_TRUE(expr && expr.getPosition() == 2); - - // 1.2 Expect d0 - expr = llvm::dyn_cast(resultsInv1[1]); - EXPECT_TRUE(expr && expr.getPosition() == 0); - - // 1.3 Expect d3 - expr = llvm::dyn_cast(resultsInv1[2]); - EXPECT_TRUE(expr && expr.getPosition() == 3); + // Expect (d2, d0, d3) + SmallVector expected = {2, 0, 3}; + for (auto [idx, res] : llvm::enumerate(resultsInv1)) { + AffineDimExpr expr = llvm::dyn_cast(res); + EXPECT_TRUE(expr && expr.getPosition() == expected[idx]); + } // 2. (d0, d1, d2) -> (d1, d0 + d1, d0, d2, d1, d2, d1, d0) auto sum = d0 + d1; @@ -118,15 +113,10 @@ TEST(AffineMapTest, getInversePermutation) { auto resultsInv2 = inverseMap2.getResults(); EXPECT_EQ(resultsInv2.size(), 3UL); - // 2.1 Expect d2 - expr = llvm::dyn_cast(resultsInv2[0]); - EXPECT_TRUE(expr && expr.getPosition() == 2); - - // 2.2 Expect d0 - expr = llvm::dyn_cast(resultsInv2[1]); - EXPECT_TRUE(expr && expr.getPosition() == 0); - - // 2.3 Expect d3 - expr = llvm::dyn_cast(resultsInv2[2]); - EXPECT_TRUE(expr && expr.getPosition() == 3); + // Expect (d2, d0, d3) + expected = {2, 0, 3}; + for (auto [idx, res] : llvm::enumerate(resultsInv2)) { + AffineDimExpr expr = llvm::dyn_cast(res); + EXPECT_TRUE(expr && expr.getPosition() == expected[idx]); + } }