diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index dcfe68e020fc93..8c9fedf4b4406c 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -228,6 +228,10 @@ Changes in existing checks ` check to avoid false positive for C++23 deducing this. +- Improved :doc:`misc-use-internal-linkage + ` check to insert ``static`` keyword + before type qualifiers such as ``const`` and ``volatile``. + - Improved :doc:`modernize-avoid-c-arrays ` check to suggest using ``std::span`` as a replacement for parameters of incomplete C array type in @@ -237,10 +241,6 @@ Changes in existing checks ` check to fix false positive when using loop variable in initializer of lambda capture. -- Improved :doc:`misc-use-internal-linkage - ` check to insert ``static`` keyword - before type qualifiers such as ``const`` and ``volatile``. - - Improved :doc:`modernize-min-max-use-initializer-list ` check by fixing a false positive when only an implicit conversion happened inside an diff --git a/clang/lib/AST/ASTConcept.cpp b/clang/lib/AST/ASTConcept.cpp index bdc713ca3e791e..f7ee0fb3ee92da 100644 --- a/clang/lib/AST/ASTConcept.cpp +++ b/clang/lib/AST/ASTConcept.cpp @@ -22,11 +22,11 @@ static void CreateUnsatisfiedConstraintRecord(const ASTContext &C, const UnsatisfiedConstraintRecord &Detail, UnsatisfiedConstraintRecord *TrailingObject) { - if (Detail.is()) - new (TrailingObject) UnsatisfiedConstraintRecord(Detail.get()); + if (auto *E = dyn_cast(Detail)) + new (TrailingObject) UnsatisfiedConstraintRecord(E); else { auto &SubstitutionDiagnostic = - *Detail.get *>(); + *cast *>(Detail); StringRef Message = C.backupStr(SubstitutionDiagnostic.second); auto *NewSubstDiag = new (C) std::pair( SubstitutionDiagnostic.first, Message); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 23df7878a3bf29..3e6f0d628ca926 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -374,10 +374,10 @@ static const Decl &adjustDeclToTemplate(const Decl &D) { llvm::PointerUnion PU = CTSD->getSpecializedTemplateOrPartial(); - return PU.is() - ? *static_cast(PU.get()) + return isa(PU) + ? *static_cast(cast(PU)) : *static_cast( - PU.get()); + cast(PU)); } // Class is instantiated from a member definition of a class template? diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 7cf2519d6a71fb..3d8215ffc8c228 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -4033,7 +4033,7 @@ unsigned Compiler::allocateLocalPrimitive(DeclTy &&Src, PrimType Ty, // (int){12} in C. Consider using Expr::isTemporaryObject() instead // or isa(). Descriptor *D = P.createDescriptor(Src, Ty, Descriptor::InlineDescMD, IsConst, - Src.is()); + isa(Src)); Scope::Local Local = this->createLocal(D); if (auto *VD = dyn_cast_if_present(Src.dyn_cast())) Locals.insert({VD, Local}); diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp index 590ee19de6d2f0..c98a3506b0a90b 100644 --- a/clang/lib/AST/ByteCode/Program.cpp +++ b/clang/lib/AST/ByteCode/Program.cpp @@ -158,7 +158,7 @@ unsigned Program::getOrCreateDummy(const DeclTy &D) { if (const auto *E = D.dyn_cast()) { QT = E->getType(); } else { - const ValueDecl *VD = cast(D.get()); + const ValueDecl *VD = cast(cast(D)); IsWeak = VD->isWeak(); QT = VD->getType(); if (const auto *RT = QT->getAs()) diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index bfeb4827f79587..741e908cf9bc56 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1991,7 +1991,7 @@ void DeclaratorDecl::setQualifierInfo(NestedNameSpecifierLoc QualifierLoc) { // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. - auto *savedTInfo = DeclInfo.get(); + auto *savedTInfo = cast(DeclInfo); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. @@ -2010,7 +2010,7 @@ void DeclaratorDecl::setTrailingRequiresClause(Expr *TrailingRequiresClause) { // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. - auto *savedTInfo = DeclInfo.get(); + auto *savedTInfo = cast(DeclInfo); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. @@ -2026,7 +2026,7 @@ void DeclaratorDecl::setTemplateParameterListsInfo( // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. - auto *savedTInfo = DeclInfo.get(); + auto *savedTInfo = cast(DeclInfo); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. @@ -2534,7 +2534,7 @@ EvaluatedStmt *VarDecl::ensureEvaluatedStmt() const { // work to avoid leaking those, but we do so in VarDecl::evaluateValue // where we can detect whether there's anything to clean up or not. Eval = new (getASTContext()) EvaluatedStmt; - Eval->Value = Init.get(); + Eval->Value = cast(Init); Init = Eval; } return Eval; @@ -3017,7 +3017,7 @@ void ParmVarDecl::setUninstantiatedDefaultArg(Expr *arg) { Expr *ParmVarDecl::getUninstantiatedDefaultArg() { assert(hasUninstantiatedDefaultArg() && "Wrong kind of initialization expression!"); - return cast_if_present(Init.get()); + return cast_if_present(cast(Init)); } bool ParmVarDecl::hasDefaultArg() const { @@ -4010,12 +4010,12 @@ FunctionDecl::TemplatedKind FunctionDecl::getTemplatedKind() const { "No other valid types in NamedDecl"); return TK_FunctionTemplate; } - if (TemplateOrSpecialization.is()) + if (isa(TemplateOrSpecialization)) return TK_MemberSpecialization; - if (TemplateOrSpecialization.is()) + if (isa(TemplateOrSpecialization)) return TK_FunctionTemplateSpecialization; - if (TemplateOrSpecialization.is - ()) + if (isa( + TemplateOrSpecialization)) return TK_DependentFunctionTemplateSpecialization; llvm_unreachable("Did we miss a TemplateOrSpecialization type?"); @@ -4062,9 +4062,9 @@ void FunctionDecl::setDescribedFunctionTemplate( } bool FunctionDecl::isFunctionTemplateSpecialization() const { - return TemplateOrSpecialization.is() || - TemplateOrSpecialization - .is(); + return isa(TemplateOrSpecialization) || + isa( + TemplateOrSpecialization); } void FunctionDecl::setInstantiatedFromDecl(FunctionDecl *FD) { @@ -4216,7 +4216,7 @@ void FunctionDecl::setFunctionTemplateSpecialization( const TemplateArgumentListInfo *TemplateArgsAsWritten, SourceLocation PointOfInstantiation) { assert((TemplateOrSpecialization.isNull() || - TemplateOrSpecialization.is()) && + isa(TemplateOrSpecialization)) && "Member function is already a specialization"); assert(TSK != TSK_Undeclared && "Must specify the type of function template specialization"); @@ -4287,8 +4287,8 @@ TemplateSpecializationKind FunctionDecl::getTemplateSpecializationKind() const { // A dependent function template specialization is an explicit specialization, // except when it's a friend declaration. - if (TemplateOrSpecialization - .is() && + if (isa( + TemplateOrSpecialization) && getFriendObjectKind() == FOK_None) return TSK_ExplicitSpecialization; @@ -4331,8 +4331,8 @@ FunctionDecl::getTemplateSpecializationKindForInstantiation() const { TemplateOrSpecialization.dyn_cast()) return MSInfo->getTemplateSpecializationKind(); - if (TemplateOrSpecialization - .is() && + if (isa( + TemplateOrSpecialization) && getFriendObjectKind() == FOK_None) return TSK_ExplicitSpecialization; diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index 39c548e9c22539..25560faae8672b 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -2733,14 +2733,14 @@ int64_t CXXCtorInitializer::getID(const ASTContext &Context) const { TypeLoc CXXCtorInitializer::getBaseClassLoc() const { if (isBaseInitializer()) - return Initializee.get()->getTypeLoc(); + return cast(Initializee)->getTypeLoc(); else return {}; } const Type *CXXCtorInitializer::getBaseClass() const { if (isBaseInitializer()) - return Initializee.get()->getType().getTypePtr(); + return cast(Initializee)->getType().getTypePtr(); else return nullptr; } @@ -2752,7 +2752,7 @@ SourceLocation CXXCtorInitializer::getSourceLocation() const { if (isAnyMemberInitializer()) return getMemberLocation(); - if (const auto *TSInfo = Initializee.get()) + if (const auto *TSInfo = cast(Initializee)) return TSInfo->getTypeLoc().getBeginLoc(); return {}; diff --git a/clang/lib/AST/DeclFriend.cpp b/clang/lib/AST/DeclFriend.cpp index d003842bfb7c74..6bfc2eb62b2843 100644 --- a/clang/lib/AST/DeclFriend.cpp +++ b/clang/lib/AST/DeclFriend.cpp @@ -36,8 +36,7 @@ FriendDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L, SourceLocation EllipsisLoc, ArrayRef FriendTypeTPLists) { #ifndef NDEBUG - if (Friend.is()) { - const auto *D = Friend.get(); + if (const auto *D = dyn_cast(Friend)) { assert(isa(D) || isa(D) || isa(D) || diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index f487032a37ab73..1da3f26bf23cd5 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -992,7 +992,7 @@ ClassTemplateSpecializationDecl::getSpecializedTemplate() const { if (const auto *PartialSpec = SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization->getSpecializedTemplate(); - return SpecializedTemplate.get(); + return cast(SpecializedTemplate); } SourceRange @@ -1008,7 +1008,7 @@ ClassTemplateSpecializationDecl::getSourceRange() const { if (const auto *CTPSD = Pattern.dyn_cast()) return CTPSD->getSourceRange(); - return Pattern.get()->getSourceRange(); + return cast(Pattern)->getSourceRange(); } case TSK_ExplicitSpecialization: { SourceRange Range = CXXRecordDecl::getSourceRange(); @@ -1404,7 +1404,7 @@ VarTemplateDecl *VarTemplateSpecializationDecl::getSpecializedTemplate() const { if (const auto *PartialSpec = SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization->getSpecializedTemplate(); - return SpecializedTemplate.get(); + return cast(SpecializedTemplate); } SourceRange VarTemplateSpecializationDecl::getSourceRange() const { @@ -1419,7 +1419,7 @@ SourceRange VarTemplateSpecializationDecl::getSourceRange() const { if (const auto *VTPSD = Pattern.dyn_cast()) return VTPSD->getSourceRange(); - VarTemplateDecl *VTD = Pattern.get(); + VarTemplateDecl *VTD = cast(Pattern); if (hasInit()) { if (VarTemplateDecl *Definition = VTD->getDefinition()) return Definition->getSourceRange(); diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 0ce129de85f03f..678c9245ab46e6 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -162,7 +162,7 @@ QualType CXXTypeidExpr::getTypeOperand(const ASTContext &Context) const { assert(isTypeOperand() && "Cannot call getTypeOperand for typeid(expr)"); Qualifiers Quals; return Context.getUnqualifiedArrayType( - Operand.get()->getType().getNonReferenceType(), Quals); + cast(Operand)->getType().getNonReferenceType(), Quals); } static bool isGLValueFromPointerDeref(const Expr *E) { @@ -216,7 +216,7 @@ QualType CXXUuidofExpr::getTypeOperand(ASTContext &Context) const { assert(isTypeOperand() && "Cannot call getTypeOperand for __uuidof(expr)"); Qualifiers Quals; return Context.getUnqualifiedArrayType( - Operand.get()->getType().getNonReferenceType(), Quals); + cast(Operand)->getType().getNonReferenceType(), Quals); } // CXXScalarValueInitExpr @@ -1829,11 +1829,11 @@ void MaterializeTemporaryExpr::setExtendingDecl(ValueDecl *ExtendedBy, // We may need to allocate extra storage for the mangling number and the // extended-by ValueDecl. - if (!State.is()) + if (!isa(State)) State = LifetimeExtendedTemporaryDecl::Create( - cast(State.get()), ExtendedBy, ManglingNumber); + cast(cast(State)), ExtendedBy, ManglingNumber); - auto ES = State.get(); + auto ES = cast(State); ES->ExtendingDecl = ExtendedBy; ES->ManglingNumber = ManglingNumber; } diff --git a/clang/lib/AST/ExprConcepts.cpp b/clang/lib/AST/ExprConcepts.cpp index 6efe73ea085a79..e6afcdd5dc3e86 100644 --- a/clang/lib/AST/ExprConcepts.cpp +++ b/clang/lib/AST/ExprConcepts.cpp @@ -94,8 +94,7 @@ ConceptSpecializationExpr::Create(const ASTContext &C, ConceptReference *Loc, const TypeConstraint * concepts::ExprRequirement::ReturnTypeRequirement::getTypeConstraint() const { assert(isTypeConstraint()); - auto TPL = - TypeConstraintInfo.getPointer().get(); + auto TPL = cast(TypeConstraintInfo.getPointer()); return cast(TPL->getParam(0)) ->getTypeConstraint(); } diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp index 9723c0cfa83bbe..919dd2320abcc0 100644 --- a/clang/lib/AST/ParentMapContext.cpp +++ b/clang/lib/AST/ParentMapContext.cpp @@ -107,7 +107,7 @@ class ParentMapContext::ParentMap { return DynTypedNode::create(*D); if (const auto *S = U.dyn_cast()) return DynTypedNode::create(*S); - return *U.get(); + return *cast(U); } template @@ -127,17 +127,17 @@ class ParentMapContext::ParentMap { ParentMap(ASTContext &Ctx); ~ParentMap() { for (const auto &Entry : PointerParents) { - if (Entry.second.is()) { - delete Entry.second.get(); - } else if (Entry.second.is()) { - delete Entry.second.get(); + if (auto *DTN = dyn_cast(Entry.second)) { + delete DTN; + } else if (auto *PV = dyn_cast(Entry.second)) { + delete PV; } } for (const auto &Entry : OtherParents) { - if (Entry.second.is()) { - delete Entry.second.get(); - } else if (Entry.second.is()) { - delete Entry.second.get(); + if (auto *DTN = dyn_cast(Entry.second)) { + delete DTN; + } else if (auto *PV = dyn_cast(Entry.second)) { + delete PV; } } } @@ -392,14 +392,14 @@ class ParentMapContext::ParentMap::ASTVisitor else NodeOrVector = new DynTypedNode(ParentStack.back()); } else { - if (!NodeOrVector.template is()) { + if (!isa(NodeOrVector)) { auto *Vector = new ParentVector( 1, getSingleDynTypedNodeFromParentMap(NodeOrVector)); delete NodeOrVector.template dyn_cast(); NodeOrVector = Vector; } - auto *Vector = NodeOrVector.template get(); + auto *Vector = cast(NodeOrVector); // Skip duplicates for types that have memoization data. // We must check that the type has memoization data before calling // llvm::is_contained() because DynTypedNode::operator== can't compare all diff --git a/clang/lib/AST/TemplateName.cpp b/clang/lib/AST/TemplateName.cpp index c500507fecdf59..7d6275caedc4f5 100644 --- a/clang/lib/AST/TemplateName.cpp +++ b/clang/lib/AST/TemplateName.cpp @@ -151,13 +151,13 @@ TemplateName::NameKind TemplateName::getKind() const { return Template; } - if (Storage.is()) + if (isa(Storage)) return DependentTemplate; - if (Storage.is()) + if (isa(Storage)) return QualifiedTemplate; - UncommonTemplateNameStorage *uncommon - = Storage.get(); + UncommonTemplateNameStorage *uncommon = + cast(Storage); if (uncommon->getAsOverloadedStorage()) return OverloadedTemplate; if (uncommon->getAsAssumedTemplateName()) diff --git a/clang/tools/clang-shlib/CMakeLists.txt b/clang/tools/clang-shlib/CMakeLists.txt index 2d97347ea7f828..31484ec49c7739 100644 --- a/clang/tools/clang-shlib/CMakeLists.txt +++ b/clang/tools/clang-shlib/CMakeLists.txt @@ -48,13 +48,11 @@ add_clang_library(clang-cpp ${_OBJECTS} LINK_LIBS ${_DEPS}) -# AIX linker does not support version script -if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") - configure_file(simple_version_script.map.in simple_version_script.map) - if (CMAKE_SYSTEM_NAME STREQUAL "Linux") - target_link_options(clang-cpp PRIVATE LINKER:--version-script,${CMAKE_CURRENT_BINARY_DIR}/simple_version_script.map) - endif() +configure_file(simple_version_script.map.in simple_version_script.map) + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_options(clang-cpp PRIVATE LINKER:--version-script,${CMAKE_CURRENT_BINARY_DIR}/simple_version_script.map) endif() # Optimize function calls for default visibility definitions to avoid PLT and diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 3d416e6985d02c..57cb443798cd8f 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -114,6 +114,7 @@ struct Configuration { bool is64() const { return llvm::COFF::is64Bit(machine); } llvm::COFF::MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN; + bool machineInferred = false; size_t wordsize; bool verbose = false; WindowsSubsystem subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN; diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index df3c5a176b52e0..0c6df701284b7d 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -46,6 +46,8 @@ static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) { return COFF::isArm64EC(mt) || mt == AMD64; case ARM64X: return COFF::isAnyArm64(mt) || mt == AMD64; + case IMAGE_FILE_MACHINE_UNKNOWN: + return true; default: return ctx.config.machine == mt; } @@ -74,14 +76,26 @@ void SymbolTable::addFile(InputFile *file) { } MachineTypes mt = file->getMachineType(); - if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN) { - ctx.config.machine = mt; - ctx.driver.addWinSysRootLibSearchPaths(); - } else if (!compatibleMachineType(ctx, mt)) { + // The ARM64EC target must be explicitly specified and cannot be inferred. + if (mt == ARM64EC && + (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN || + (ctx.config.machineInferred && + (ctx.config.machine == ARM64 || ctx.config.machine == AMD64)))) { + error(toString(file) + ": machine type arm64ec is ambiguous and cannot be " + "inferred, use /machine:arm64ec or /machine:arm64x"); + return; + } + if (!compatibleMachineType(ctx, mt)) { error(toString(file) + ": machine type " + machineToStr(mt) + " conflicts with " + machineToStr(ctx.config.machine)); return; } + if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN && + mt != IMAGE_FILE_MACHINE_UNKNOWN) { + ctx.config.machineInferred = true; + ctx.config.machine = mt; + ctx.driver.addWinSysRootLibSearchPaths(); + } ctx.driver.parseDirectives(file); } diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp index 0ec79bb5423e5d..6b60ebb18e8212 100644 --- a/lld/Common/ErrorHandler.cpp +++ b/lld/Common/ErrorHandler.cpp @@ -337,7 +337,6 @@ void ErrorHandler::fatal(const Twine &msg) { } SyncStream::~SyncStream() { - os.flush(); switch (level) { case DiagLevel::None: break; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 14131dd9f765ce..07c8f1d1a679d8 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -284,7 +284,7 @@ static bool isCompatible(Ctx &ctx, InputFile *file) { StringRef target = !ctx.arg.bfdname.empty() ? ctx.arg.bfdname : ctx.arg.emulation; if (!target.empty()) { - ErrAlways(ctx) << file << " is incompatible with " << target; + Err(ctx) << file << " is incompatible with " << target; return false; } @@ -295,10 +295,10 @@ static bool isCompatible(Ctx &ctx, InputFile *file) { existing = ctx.sharedFiles[0]; else if (!ctx.bitcodeFiles.empty()) existing = ctx.bitcodeFiles[0]; - std::string with; + auto diag = Err(ctx); + diag << file << " is incompatible"; if (existing) - with = " with " + toStr(ctx, existing); - ErrAlways(ctx) << file << " is incompatible" << with; + diag << " with " << existing; return false; } diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index e110adead5ad01..d311dba41741c0 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -74,13 +74,12 @@ static std::optional getLinkerScriptLocation(Ctx &ctx, return std::nullopt; } -static std::string getDefinedLocation(Ctx &ctx, const Symbol &sym) { - const char msg[] = "\n>>> defined in "; +static void printDefinedLocation(ELFSyncStream &s, const Symbol &sym) { + s << "\n>>> defined in "; if (sym.file) - return msg + toStr(ctx, sym.file); - if (std::optional loc = getLinkerScriptLocation(ctx, sym)) - return msg + *loc; - return ""; + return void(s << sym.file); + if (std::optional loc = getLinkerScriptLocation(s.ctx, sym)) + return void(s << *loc); } // Construct a message in the following format. @@ -88,13 +87,14 @@ static std::string getDefinedLocation(Ctx &ctx, const Symbol &sym) { // >>> defined in /home/alice/src/foo.o // >>> referenced by bar.c:12 (/home/alice/src/bar.c:12) // >>> /home/alice/src/bar.o:(.text+0x1) -static std::string getLocation(Ctx &ctx, InputSectionBase &s, const Symbol &sym, - uint64_t off) { - std::string msg = getDefinedLocation(ctx, sym) + "\n>>> referenced by "; - std::string src = s.getSrcMsg(sym, off); +static void printLocation(ELFSyncStream &s, InputSectionBase &sec, + const Symbol &sym, uint64_t off) { + printDefinedLocation(s, sym); + s << "\n>>> referenced by "; + std::string src = sec.getSrcMsg(sym, off); if (!src.empty()) - msg += src + "\n>>> "; - return msg + s.getObjMsg(off); + s << src << "\n>>> "; + s << sec.getObjMsg(off); } void elf::reportRangeError(Ctx &ctx, uint8_t *loc, const Relocation &rel, @@ -121,7 +121,7 @@ void elf::reportRangeError(Ctx &ctx, uint8_t *loc, const Relocation &rel, if (!errPlace.srcLoc.empty()) diag << "\n>>> referenced by " << errPlace.srcLoc; if (rel.sym && !rel.sym->isSection()) - diag << getDefinedLocation(ctx, *rel.sym); + printDefinedLocation(diag, *rel.sym); if (errPlace.isec && errPlace.isec->name.starts_with(".debug")) diag << "; consider recompiling with -fdebug-types-section to reduce size " @@ -133,8 +133,10 @@ void elf::reportRangeError(Ctx &ctx, uint8_t *loc, int64_t v, int n, auto diag = Err(ctx); diag << getErrorPlace(ctx, loc).loc << msg << " is out of range: " << v << " is not in [" << llvm::minIntN(n) << ", " << llvm::maxIntN(n) << "]"; - if (!sym.getName().empty()) - diag << "; references '" << &sym << '\'' << getDefinedLocation(ctx, sym); + if (!sym.getName().empty()) { + diag << "; references '" << &sym << '\''; + printDefinedLocation(diag, sym); + } } // Build a bitmask with one bit set for each 64 subset of RelExpr. @@ -522,42 +524,39 @@ int64_t RelocationScanner::computeMipsAddend(const RelTy &rel, RelExpr expr, // Custom error message if Sym is defined in a discarded section. template -static std::string maybeReportDiscarded(Ctx &ctx, Undefined &sym) { +static void maybeReportDiscarded(Ctx &ctx, ELFSyncStream &msg, Undefined &sym) { auto *file = dyn_cast_or_null>(sym.file); if (!file || !sym.discardedSecIdx) - return ""; + return; ArrayRef objSections = file->template getELFShdrs(); - std::string msg; if (sym.type == ELF::STT_SECTION) { - msg = "relocation refers to a discarded section: "; - msg += CHECK2( + msg << "relocation refers to a discarded section: "; + msg << CHECK2( file->getObj().getSectionName(objSections[sym.discardedSecIdx]), file); } else { - msg = "relocation refers to a symbol in a discarded section: " + - toStr(ctx, sym); + msg << "relocation refers to a symbol in a discarded section: " << &sym; } - msg += "\n>>> defined in " + toStr(ctx, file); + msg << "\n>>> defined in " << file; Elf_Shdr_Impl elfSec = objSections[sym.discardedSecIdx - 1]; if (elfSec.sh_type != SHT_GROUP) - return msg; + return; // If the discarded section is a COMDAT. StringRef signature = file->getShtGroupSignature(objSections, elfSec); if (const InputFile *prevailing = ctx.symtab->comdatGroups.lookup(CachedHashStringRef(signature))) { - msg += "\n>>> section group signature: " + signature.str() + - "\n>>> prevailing definition is in " + toStr(ctx, prevailing); + msg << "\n>>> section group signature: " << signature + << "\n>>> prevailing definition is in " << prevailing; if (sym.nonPrevailing) { - msg += "\n>>> or the symbol in the prevailing group had STB_WEAK " + msg << "\n>>> or the symbol in the prevailing group had STB_WEAK " "binding and the symbol in a non-prevailing group had STB_GLOBAL " "binding. Mixing groups with STB_WEAK and STB_GLOBAL binding " "signature is not supported"; } } - return msg; } // Check whether the definition name def is a mangled function name that matches @@ -695,8 +694,9 @@ static const Symbol *getAlternativeSpelling(Ctx &ctx, const Undefined &sym, static void reportUndefinedSymbol(Ctx &ctx, const UndefinedDiag &undef, bool correctSpelling) { Undefined &sym = *undef.sym; + ELFSyncStream msg(ctx, DiagLevel::None); - auto visibility = [&]() -> std::string { + auto visibility = [&]() { switch (sym.visibility()) { case STV_INTERNAL: return "internal "; @@ -709,75 +709,70 @@ static void reportUndefinedSymbol(Ctx &ctx, const UndefinedDiag &undef, } }; - std::string msg; switch (ctx.arg.ekind) { case ELF32LEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; case ELF32BEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; case ELF64LEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; case ELF64BEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; default: llvm_unreachable(""); } - if (msg.empty()) - msg = "undefined " + visibility() + "symbol: " + toStr(ctx, sym); + if (msg.str().empty()) + msg << "undefined " << visibility() << "symbol: " << &sym; const size_t maxUndefReferences = 3; - size_t i = 0; - for (UndefinedDiag::Loc l : undef.locs) { - if (i >= maxUndefReferences) - break; + for (UndefinedDiag::Loc l : + ArrayRef(undef.locs).take_front(maxUndefReferences)) { InputSectionBase &sec = *l.sec; uint64_t offset = l.offset; - msg += "\n>>> referenced by "; + msg << "\n>>> referenced by "; // In the absence of line number information, utilize DW_TAG_variable (if // present) for the enclosing symbol (e.g. var in `int *a[] = {&undef};`). Symbol *enclosing = sec.getEnclosingSymbol(offset); std::string src = sec.getSrcMsg(enclosing ? *enclosing : sym, offset); if (!src.empty()) - msg += src + "\n>>> "; - msg += sec.getObjMsg(offset); - i++; + msg << src << "\n>>> "; + msg << sec.getObjMsg(offset); } - if (i < undef.locs.size()) - msg += ("\n>>> referenced " + Twine(undef.locs.size() - i) + " more times") - .str(); + if (maxUndefReferences < undef.locs.size()) + msg << "\n>>> referenced " << (undef.locs.size() - maxUndefReferences) + << " more times"; if (correctSpelling) { std::string pre_hint = ": ", post_hint; if (const Symbol *corrected = getAlternativeSpelling(ctx, sym, pre_hint, post_hint)) { - msg += - "\n>>> did you mean" + pre_hint + toStr(ctx, *corrected) + post_hint; + msg << "\n>>> did you mean" << pre_hint << corrected << post_hint; if (corrected->file) - msg += "\n>>> defined in: " + toStr(ctx, corrected->file); + msg << "\n>>> defined in: " << corrected->file; } } if (sym.getName().starts_with("_ZTV")) - msg += - "\n>>> the vtable symbol may be undefined because the class is missing " - "its key function (see https://lld.llvm.org/missingkeyfunction)"; + msg << "\n>>> the vtable symbol may be undefined because the class is " + "missing its key function " + "(see https://lld.llvm.org/missingkeyfunction)"; if (ctx.arg.gcSections && ctx.arg.zStartStopGC && sym.getName().starts_with("__start_")) { - msg += "\n>>> the encapsulation symbol needs to be retained under " + msg << "\n>>> the encapsulation symbol needs to be retained under " "--gc-sections properly; consider -z nostart-stop-gc " "(see https://lld.llvm.org/ELF/start-stop-gc)"; } if (undef.isWarning) - Warn(ctx) << msg; + Warn(ctx) << msg.str(); else - ctx.e.error(msg, ErrorTag::SymbolNotFound, {sym.getName()}); + ctx.e.error(msg.str(), ErrorTag::SymbolNotFound, {sym.getName()}); } void elf::reportUndefinedSymbols(Ctx &ctx) { @@ -1020,9 +1015,9 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, if (sym.scriptDefined) return true; - Err(ctx) << "relocation " << type - << " cannot refer to absolute symbol: " << &sym - << getLocation(ctx, *sec, sym, relOff); + auto diag = Err(ctx); + diag << "relocation " << type << " cannot refer to absolute symbol: " << &sym; + printLocation(diag, *sec, sym, relOff); return true; } @@ -1188,18 +1183,21 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, if (!ctx.arg.shared && sym.isShared() && !(ctx.arg.emachine == EM_AARCH64 && type == R_AARCH64_AUTH_ABS64)) { if (!canDefineSymbolInExecutable(ctx, sym)) { - Err(ctx) << "cannot preempt symbol: " << &sym - << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "cannot preempt symbol: " << &sym; + printLocation(diag, *sec, sym, offset); return; } if (sym.isObject()) { // Produce a copy relocation. if (auto *ss = dyn_cast(&sym)) { - if (!ctx.arg.zCopyreloc) - Err(ctx) << "unresolvable relocation " << type << " against symbol '" - << ss << "'; recompile with -fPIC or remove '-z nocopyreloc'" - << getLocation(ctx, *sec, sym, offset); + if (!ctx.arg.zCopyreloc) { + auto diag = Err(ctx); + diag << "unresolvable relocation " << type << " against symbol '" + << ss << "'; recompile with -fPIC or remove '-z nocopyreloc'"; + printLocation(diag, *sec, sym, offset); + } sym.setFlags(NEEDS_COPY); } sec->addReloc({expr, type, offset, addend, &sym}); @@ -1234,20 +1232,26 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, // * If a library definition gets preempted to the executable, it will have // the wrong ebx value. if (sym.isFunc()) { - if (ctx.arg.pie && ctx.arg.emachine == EM_386) - Err(ctx) << "symbol '" << &sym - << "' cannot be preempted; recompile with -fPIE" - << getLocation(ctx, *sec, sym, offset); + if (ctx.arg.pie && ctx.arg.emachine == EM_386) { + auto diag = Err(ctx); + diag << "symbol '" << &sym + << "' cannot be preempted; recompile with -fPIE"; + printLocation(diag, *sec, sym, offset); + } sym.setFlags(NEEDS_COPY | NEEDS_PLT); sec->addReloc({expr, type, offset, addend, &sym}); return; } } - Err(ctx) << "relocation " << type << " cannot be used against " - << (sym.getName().empty() ? "local symbol" - : ("symbol '" + toStr(ctx, sym) + "'")) - << "; recompile with -fPIC" << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "relocation " << type << " cannot be used against "; + if (sym.getName().empty()) + diag << "local symbol"; + else + diag << "symbol '" << &sym << "'"; + diag << "; recompile with -fPIC"; + printLocation(diag, *sec, sym, offset); } // This function is similar to the `handleTlsRelocation`. MIPS does not @@ -1284,9 +1288,10 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, int64_t addend) { if (expr == R_TPREL || expr == R_TPREL_NEG) { if (ctx.arg.shared) { - Err(ctx) << "relocation " << type << " against " << &sym - << " cannot be used with -shared" - << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "relocation " << type << " against " << &sym + << " cannot be used with -shared"; + printLocation(diag, *sec, sym, offset); return 1; } return 0; @@ -1493,9 +1498,10 @@ void RelocationScanner::scanOne(typename Relocs::const_iterator &i) { // Skip the error check for CREL, which does not set `end`. if constexpr (!RelTy::IsCrel) { if (i == end) { - Err(ctx) << "R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last " - "relocation" - << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last " + "relocation"; + printLocation(diag, *sec, sym, offset); return; } } diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h index e70afbd87d1556..79e20be2bb6be8 100644 --- a/lld/include/lld/Common/ErrorHandler.h +++ b/lld/include/lld/Common/ErrorHandler.h @@ -71,6 +71,7 @@ #include "lld/Common/LLVM.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -158,13 +159,14 @@ enum class DiagLevel { None, Log, Msg, Warn, Err, Fatal }; class SyncStream { ErrorHandler &e; DiagLevel level; - std::string buf; + llvm::SmallString<0> buf; public: - mutable llvm::raw_string_ostream os{buf}; + mutable llvm::raw_svector_ostream os{buf}; SyncStream(ErrorHandler &e, DiagLevel level) : e(e), level(level) {} SyncStream(SyncStream &&o) : e(o.e), level(o.level), buf(std::move(o.buf)) {} ~SyncStream(); + StringRef str() { return os.str(); } }; [[noreturn]] void exitLld(int val); diff --git a/lld/test/COFF/arm64ec.test b/lld/test/COFF/arm64ec.test index e50b14ce0184c8..75288e97e598dd 100644 --- a/lld/test/COFF/arm64ec.test +++ b/lld/test/COFF/arm64ec.test @@ -4,6 +4,7 @@ RUN: split-file %s %t.dir && cd %t.dir RUN: llvm-mc -filetype=obj -triple=aarch64-windows arm64-data-sym.s -o arm64-data-sym.obj RUN: llvm-mc -filetype=obj -triple=arm64ec-windows arm64ec-data-sym.s -o arm64ec-data-sym.obj RUN: llvm-mc -filetype=obj -triple=x86_64-windows x86_64-data-sym.s -o x86_64-data-sym.obj +RUN: llvm-mc -filetype=obj -triple=i686-windows x86_64-data-sym.s -o i686-data-sym.obj RUN: llvm-cvtres -machine:arm64x -out:arm64x-resource.obj %S/Inputs/resource.res RUN: lld-link -out:test.dll -machine:arm64ec arm64ec-data-sym.obj -dll -noentry @@ -46,6 +47,26 @@ RUN: not lld-link -out:test.dll -machine:arm64 arm64-data-sym.obj x86_64-data-sy RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT3 %s INCOMPAT3: lld-link: error: x86_64-data-sym.obj: machine type x64 conflicts with arm64 +arm64ec machine type can't be inferred, it must be specified explicitly. +RUN: not lld-link -out:test.dll arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s +INCOMPAT4: lld-link: error: arm64ec-data-sym.obj: machine type arm64ec is ambiguous and cannot be inferred, use /machine:arm64ec or /machine:arm64x + +RUN: not lld-link -out:test.dll x86_64-data-sym.obj arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s + +RUN: not lld-link -out:test.dll arm64-data-sym.obj arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s + +RUN: not lld-link -out:test.dll i686-data-sym.obj arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT5 %s +INCOMPAT5: lld-link: error: arm64ec-data-sym.obj: machine type arm64ec conflicts with x86 + +arm64x can be inferred and when mixed with ARM64, the first one wins +RUN: lld-link -out:test.dll -dll -noentry arm64x-resource.obj arm64-data-sym.obj x86_64-data-sym.obj arm64ec-data-sym.obj +RUN: not lld-link -out:test.dll -dll -noentry arm64-data-sym.obj arm64x-resource.obj x86_64-data-sym.obj 2>&1 | FileCheck -check-prefix=INCOMPAT3 %s +RUN: not lld-link -out:test.dll -dll -noentry arm64-data-sym.obj arm64x-resource.obj arm64ec-data-sym.obj 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s + #--- arm64ec-data-sym.s .data .globl arm64ec_data_sym diff --git a/lld/test/ELF/incompatible.s b/lld/test/ELF/incompatible.s index 39c25106f4d721..0d25acd857610b 100644 --- a/lld/test/ELF/incompatible.s +++ b/lld/test/ELF/incompatible.s @@ -6,11 +6,11 @@ // RUN: not ld.lld %ta.o %tb.o -o /dev/null 2>&1 | \ // RUN: FileCheck --check-prefix=A-AND-B %s -// A-AND-B: b.o is incompatible with {{.*}}a.o +// A-AND-B: error: {{.*}}b.o is incompatible with {{.*}}a.o -// RUN: not ld.lld %tb.o %tc.o -o /dev/null 2>&1 | \ +// RUN: ld.lld --noinhibit-exec %tb.o %tc.o -o /dev/null 2>&1 | \ // RUN: FileCheck --check-prefix=B-AND-C %s -// B-AND-C: c.o is incompatible with {{.*}}b.o +// B-AND-C: warning: {{.*}}c.o is incompatible with {{.*}}b.o // RUN: not ld.lld %ta.o %ti686.so -o /dev/null 2>&1 | \ // RUN: FileCheck --check-prefix=A-AND-SO %s @@ -69,8 +69,8 @@ // RUN: rm -f %t.a // RUN: llvm-ar rc %t.a %tc.o // RUN: llvm-mc -filetype=obj -triple=i686-linux %s -o %td.o -// RUN: not ld.lld %t.a %td.o 2>&1 -o /dev/null | FileCheck --check-prefix=ARCHIVE %s -// ARCHIVE: {{.*}}d.o is incompatible +// RUN: ld.lld --noinhibit-exec %t.a %td.o 2>&1 -o /dev/null | FileCheck --check-prefix=ARCHIVE %s +// ARCHIVE: warning: {{.*}}d.o is incompatible{{$}} .global _start _start: .data diff --git a/lld/test/ELF/linkerscript/symbol-location.s b/lld/test/ELF/linkerscript/symbol-location.s index 4620982bf3f206..fd5cc9de048f1d 100644 --- a/lld/test/ELF/linkerscript/symbol-location.s +++ b/lld/test/ELF/linkerscript/symbol-location.s @@ -2,6 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o # RUN: echo 'foo = _start;' > %t.script # RUN: not ld.lld -shared -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -shared --defsym 'foo = _start' %t.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK2 ## Here we check that symbol 'foo' location is reported properly. @@ -9,6 +10,10 @@ # CHECK: >>> defined in {{.*}}.script:1 # CHECK: >>> referenced by {{.*}}.o:(.text+0x1) +# CHECK2: error: relocation R_X86_64_PC32 cannot be used against symbol 'foo' +# CHECK2: >>> defined in --defsym{{$}} +# CHECK2: >>> referenced by {{.*}}.o:(.text+0x1) + .text .globl _start _start: diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index b8c53a474ba6b9..a184e6dd891aff 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -1380,6 +1380,8 @@ class Process : public std::enable_shared_from_this, virtual bool GetProcessInfo(ProcessInstanceInfo &info); + virtual lldb_private::UUID FindModuleUUID(const llvm::StringRef path); + /// Get the exit status for a process. /// /// \return diff --git a/lldb/source/Core/DynamicLoader.cpp b/lldb/source/Core/DynamicLoader.cpp index 3c6c6bd365706e..acc84dbf016fbe 100644 --- a/lldb/source/Core/DynamicLoader.cpp +++ b/lldb/source/Core/DynamicLoader.cpp @@ -157,11 +157,9 @@ DynamicLoader::GetSectionListFromModule(const ModuleSP module) const { ModuleSP DynamicLoader::FindModuleViaTarget(const FileSpec &file) { Target &target = m_process->GetTarget(); ModuleSpec module_spec(file, target.GetArchitecture()); - ModuleSpec module_spec_from_process; - // Process may be able to augment the module_spec with UUID, e.g. ELF core. - if (m_process->GetModuleSpec(file, target.GetArchitecture(), - module_spec_from_process)) { - module_spec = module_spec_from_process; + if (UUID uuid = m_process->FindModuleUUID(file.GetPath())) { + // Process may be able to augment the module_spec with UUID, e.g. ELF core. + module_spec.GetUUID() = uuid; } if (ModuleSP module_sp = target.GetImages().FindFirstModule(module_spec)) diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp index 57b12f07b5e0be..b3916cc913f7db 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp @@ -286,20 +286,12 @@ void ProcessElfCore::UpdateBuildIdForNTFileEntries() { } } -bool ProcessElfCore::GetModuleSpec(const FileSpec &module_file_spec, - const ArchSpec &arch, - ModuleSpec &module_spec) { - module_spec.Clear(); - for (NT_FILE_Entry &entry : m_nt_file_entries) { - if (module_file_spec.GetPath() == entry.path) { - module_spec.GetFileSpec() = module_file_spec; - module_spec.GetArchitecture() = arch; - module_spec.GetUUID() = entry.uuid; - return true; - } - } - - return false; +UUID ProcessElfCore::FindModuleUUID(const llvm::StringRef path) { + // Returns the gnu uuid from matched NT_FILE entry + for (NT_FILE_Entry &entry : m_nt_file_entries) + if (path == entry.path) + return entry.uuid; + return UUID(); } lldb_private::DynamicLoader *ProcessElfCore::GetDynamicLoader() { diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h index a7b1822ccf01ff..a91c04a277f601 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h @@ -163,9 +163,7 @@ class ProcessElfCore : public lldb_private::PostMortemProcess { // Populate gnu uuid for each NT_FILE entry void UpdateBuildIdForNTFileEntries(); - bool GetModuleSpec(const lldb_private::FileSpec &module_file_spec, - const lldb_private::ArchSpec &arch, - lldb_private::ModuleSpec &module_spec) override; + lldb_private::UUID FindModuleUUID(const llvm::StringRef path) override; // Returns the value of certain type of note of a given start address lldb_private::UUID FindBuidIdInCoreMemory(lldb::addr_t address); diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 9125ceca74a003..db33525978a16a 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -6080,6 +6080,10 @@ bool Process::GetProcessInfo(ProcessInstanceInfo &info) { return platform_sp->GetProcessInfo(GetID(), info); } +lldb_private::UUID Process::FindModuleUUID(const llvm::StringRef path) { + return lldb_private::UUID(); +} + ThreadCollectionSP Process::GetHistoryThreads(lldb::addr_t addr) { ThreadCollectionSP threads; diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 699ca91cd1f8f4..5b50e1943e3db1 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -367,32 +367,37 @@ defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : BWWriteResPair; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; defm : X86WriteResPairUnsupported; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResUnsupported; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResUnsupported; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index b820418bb55191..d06e8a99370976 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -364,33 +364,41 @@ defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; -defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; defm : X86WriteRes; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; @@ -983,7 +991,6 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } -def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSrm)>; def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>; def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> { @@ -1349,13 +1356,6 @@ def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> { } def: InstRW<[HWWriteResGroup75], (instregex "FICOM(P?)(16|32)m")>; -def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ReleaseAtCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm)>; - def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 7be9f51bcd46bd..775ad6b1078a53 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -348,28 +348,33 @@ defm : X86WriteRes defm : X86WriteRes; defm : X86WriteRes; // Unsupported = 1 -defm : SBWriteResPair; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; // Unsupported = 1 -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; // Unsupported = 1 - -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 +// F16C Instructions (IvyBridge+) +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 // Vector integer operations. defm : X86WriteRes; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index a24a86b4201c31..529108a5aaa97f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -478,32 +478,23 @@ void VPIRBasicBlock::execute(VPTransformState *State) { void VPBasicBlock::execute(VPTransformState *State) { bool Replica = bool(State->Lane); - VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; - VPBlockBase *SingleHPred = nullptr; BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. - auto IsLoopRegion = [](VPBlockBase *BB) { - auto *R = dyn_cast(BB); - return R && !R->isReplicator(); + auto IsReplicateRegion = [](VPBlockBase *BB) { + auto *R = dyn_cast_or_null(BB); + return R && R->isReplicator(); }; // 1. Create an IR basic block. - if (PrevVPBB && /* A */ - !((SingleHPred = getSingleHierarchicalPredecessor()) && - SingleHPred->getExitingBasicBlock() == PrevVPBB && - PrevVPBB->getSingleHierarchicalSuccessor() && - (SingleHPred->getParent() == getEnclosingLoopRegion() && - !IsLoopRegion(SingleHPred))) && /* B */ - !(Replica && getPredecessors().empty())) { /* C */ - // The last IR basic block is reused, as an optimization, in three cases: - // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null; - // B. when the current VPBB has a single (hierarchical) predecessor which - // is PrevVPBB and the latter has a single (hierarchical) successor which - // both are in the same non-replicator region; and - // C. when the current VPBB is an entry of a region replica - where PrevVPBB - // is the exiting VPBB of this region from a previous instance, or the - // predecessor of this region. - + if (this == getPlan()->getVectorPreheader() || + (Replica && this == getParent()->getEntry()) || + IsReplicateRegion(getSingleHierarchicalPredecessor())) { + // Reuse the previous basic block if the current VPBB is either + // * the vector preheader, + // * the entry to a replicate region, or + // * the exit of a replicate region. + State->CFG.VPBB2IRBB[this] = NewBB; + } else { NewBB = createEmptyBasicBlock(State->CFG); State->Builder.SetInsertPoint(NewBB); @@ -518,8 +509,6 @@ void VPBasicBlock::execute(VPTransformState *State) { State->CFG.PrevBB = NewBB; State->CFG.VPBB2IRBB[this] = NewBB; connectToPredecessors(State->CFG); - } else { - State->CFG.VPBB2IRBB[this] = NewBB; } // 2. Fill the IR basic block with IR instructions. diff --git a/llvm/test/CodeGen/Hexagon/widen-not-load.ll b/llvm/test/CodeGen/Hexagon/widen-not-load.ll index d8d658342616c3..5bf8b57054a915 100644 --- a/llvm/test/CodeGen/Hexagon/widen-not-load.ll +++ b/llvm/test/CodeGen/Hexagon/widen-not-load.ll @@ -1,7 +1,9 @@ ; Test that double word post increment load is not generated. ; REQUIRES: asserts -; RUN: llc -march=hexagon -O2 -debug-only=hexagon-load-store-widening %s -o 2>&1 - | FileCheck %s +; REQUIRES: asserts +; RUN: llc -march=hexagon -O2 -debug-only=hexagon-load-store-widening \ +; RUN: %s -o 2>&1 - | FileCheck %s ; Loads with positive invalid postinc is not widened define ptr @test1() { diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s index df0053a1dcb9b5..25f79397fa071d 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s @@ -448,7 +448,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: 2 1 1.00 cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2 @@ -687,7 +687,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 172.00 75.83 117.33 17.00 101.83 67.00 67.00 +# CHECK-NEXT: - 172.00 75.83 117.33 17.00 102.83 67.00 67.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -732,7 +732,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s index 1b196b4355a6d4..028625013a85cc 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s @@ -1115,9 +1115,9 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 vcomiss %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * vcomiss (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 11 1.00 * vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: 2 11 1.00 * vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2 @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 6 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 9 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 2 9 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 9 1.00 * vcvtsd2si (%rax), %ecx @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 424.25 3.25 12.67 +# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 421.25 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1825,9 +1825,9 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - - - - vcomiss %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcomiss (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %ymm0, %ymm2 @@ -1847,7 +1847,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s index 9fcd03bfb2fd45..07870d92dac555 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s @@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 4.00 - 0.67 +# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 4.00 - 0.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 - - 0.33 vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s index e76d90521afa9c..8851be4679a1e9 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s @@ -423,7 +423,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 comisd %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * comisd (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2dq %xmm0, %xmm2 @@ -433,7 +433,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 3 9 1.00 * cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 cvtps2pd %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 119.25 2.25 4.67 +# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 117.25 2.25 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -709,7 +709,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - comisd %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - comisd (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2 @@ -719,7 +719,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtps2pd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s index 49db25cb0bdfb1..7f07fd56fe60dc 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 10 1.00 * vcvtsd2si (%rax), %ecx @@ -1152,7 +1152,7 @@ vzeroupper # CHECK-NEXT: 3 5 2.00 vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 1 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %rcx @@ -1734,7 +1734,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 572.00 248.50 319.00 39.00 369.50 179.50 179.50 +# CHECK-NEXT: - 572.00 248.50 319.00 39.00 371.50 179.50 179.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1845,7 +1845,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtsd2si (%rax), %ecx @@ -1860,7 +1860,7 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s index 8736c1c6234af7..7cd1d3fc35ee67 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s @@ -1263,14 +1263,14 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 2 5 1.00 vcvtsd2usi %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtsd2usi %xmm0, %rcx # CHECK-NEXT: 3 10 1.00 * vcvtsd2usi (%rax), %ecx @@ -2053,7 +2053,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1506.00 198.00 335.00 25.00 523.00 304.50 304.50 +# CHECK-NEXT: - 1506.00 198.00 335.00 25.00 529.00 304.50 304.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2230,14 +2230,14 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2usi %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2usi %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtsd2usi (%rax), %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s index 8bf3c21891f7f8..9587c40ede68be 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s @@ -1970,14 +1970,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vcvtps2udq %xmm16, %xmm19 # CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax), %xmm19 # CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax){1to4}, %xmm19 @@ -3269,7 +3269,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1935.00 278.00 579.50 48.00 738.50 495.50 495.50 +# CHECK-NEXT: - 1935.00 278.00 579.50 48.00 744.50 495.50 495.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -3511,14 +3511,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %xmm16, %xmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %xmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to4}, %xmm19 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s index 7dea75f8f8fec0..4abcd6fc516b79 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s @@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider @@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00 +# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s index df0053a1dcb9b5..25f79397fa071d 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s @@ -448,7 +448,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: 2 1 1.00 cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2 @@ -687,7 +687,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 172.00 75.83 117.33 17.00 101.83 67.00 67.00 +# CHECK-NEXT: - 172.00 75.83 117.33 17.00 102.83 67.00 67.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -732,7 +732,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s index 05c476079c0f9d..179393abb08d47 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1115,9 +1115,9 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 vcomiss %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * vcomiss (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 10 1.00 * vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 10 1.00 * vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 12 1.00 * vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: 2 12 1.00 * vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2 @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 6 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 10 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 2 10 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 9 1.00 * vcvtsd2si (%rax), %ecx @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 336.00 215.58 248.58 173.17 173.17 38.00 427.58 3.25 12.67 +# CHECK-NEXT: - 336.00 215.58 248.58 173.17 173.17 38.00 424.58 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1825,9 +1825,9 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - - - - vcomiss %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcomiss (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %ymm0, %ymm2 @@ -1847,7 +1847,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s index 538ecf99074eda..d1fb824fee23db 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s @@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67 +# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 6.00 - 0.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s index 907db6f44a9e29..37a28a66fd3506 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s @@ -209,7 +209,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 comiss %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * comiss (%rax), %xmm1 # CHECK-NEXT: 1 3 1.00 cvtpi2ps %mm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * cvtpi2ps (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 1.00 * cvtps2pi (%rax), %mm2 # CHECK-NEXT: 2 4 1.00 cvtsi2ss %ecx, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s index 3b4aeb37968fd5..c9c3e20eeadedd 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s @@ -423,7 +423,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 comisd %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * comisd (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 10 1.00 * cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 10 1.00 * cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2dq %xmm0, %xmm2 @@ -433,7 +433,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 2 10 1.00 * cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 cvtps2pd %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 112.00 70.75 95.75 63.17 63.17 14.00 119.25 2.25 4.67 +# CHECK-NEXT: - 112.00 70.75 95.75 63.17 63.17 14.00 117.25 2.25 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -709,7 +709,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - comisd %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - comisd (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2 @@ -719,7 +719,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtps2pd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s index d6d157827b3141..781676d70763c0 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 10 1.00 * vcvtsd2si (%rax), %ecx @@ -1152,7 +1152,7 @@ vzeroupper # CHECK-NEXT: 3 5 2.00 vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 1 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %rcx @@ -1734,7 +1734,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 572.00 248.50 319.00 39.00 369.50 179.50 179.50 +# CHECK-NEXT: - 572.00 248.50 319.00 39.00 371.50 179.50 179.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1845,7 +1845,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtsd2si (%rax), %ecx @@ -1860,7 +1860,7 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s index a2ec86e8724faa..9284810b9e73be 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s @@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider @@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00 +# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s index e2cfd02bc76c84..ff0f22bec1402c 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s @@ -448,7 +448,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: 2 1 1.00 cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2 @@ -687,7 +687,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 172.00 75.83 117.33 17.00 101.83 67.00 67.00 +# CHECK-NEXT: - 172.00 75.83 117.33 17.00 102.83 67.00 67.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -732,7 +732,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2 diff --git a/mlir/include/mlir/Conversion/CMakeLists.txt b/mlir/include/mlir/Conversion/CMakeLists.txt index d212bf3e395e71..9f76ab659215ea 100644 --- a/mlir/include/mlir/Conversion/CMakeLists.txt +++ b/mlir/include/mlir/Conversion/CMakeLists.txt @@ -6,3 +6,5 @@ mlir_tablegen(Passes.capi.cpp.inc -gen-pass-capi-impl --prefix Conversion) add_public_tablegen_target(MLIRConversionPassIncGen) add_mlir_doc(Passes ConversionPasses ./ -gen-pass-doc) + +add_subdirectory(ConvertToLLVM) diff --git a/mlir/include/mlir/Conversion/ConvertToLLVM/CMakeLists.txt b/mlir/include/mlir/Conversion/ConvertToLLVM/CMakeLists.txt new file mode 100644 index 00000000000000..54d7a03fc22dff --- /dev/null +++ b/mlir/include/mlir/Conversion/ConvertToLLVM/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_TARGET_DEFINITIONS ToLLVMInterface.td) +mlir_tablegen(ToLLVMAttrInterface.h.inc -gen-attr-interface-decls) +mlir_tablegen(ToLLVMAttrInterface.cpp.inc -gen-attr-interface-defs) +mlir_tablegen(ToLLVMOpInterface.h.inc -gen-op-interface-decls) +mlir_tablegen(ToLLVMOpInterface.cpp.inc -gen-op-interface-defs) +add_public_tablegen_target(MLIRConvertToLLVMInterfaceIncGen) +add_dependencies(mlir-generic-headers MLIRConvertToLLVMInterfaceIncGen) diff --git a/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h index 00aeed9bf29dc2..6fd043646acd31 100644 --- a/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h +++ b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h @@ -11,6 +11,7 @@ #include "mlir/IR/DialectInterface.h" #include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OpDefinition.h" namespace mlir { class ConversionTarget; @@ -18,6 +19,7 @@ class LLVMTypeConverter; class MLIRContext; class Operation; class RewritePatternSet; +class AnalysisManager; /// Base class for dialect interfaces providing translation to LLVM IR. /// Dialects that can be translated should provide an implementation of this @@ -50,6 +52,18 @@ void populateConversionTargetFromOperation(Operation *op, LLVMTypeConverter &typeConverter, RewritePatternSet &patterns); +/// Helper function for populating LLVM conversion patterns. If `op` implements +/// the `ConvertToLLVMOpInterface` interface, then the LLVM conversion pattern +/// attributes provided by the interface will be used to configure the +/// conversion target, type converter, and the pattern set. +void populateOpConvertToLLVMConversionPatterns(Operation *op, + ConversionTarget &target, + LLVMTypeConverter &typeConverter, + RewritePatternSet &patterns); } // namespace mlir +#include "mlir/Conversion/ConvertToLLVM/ToLLVMAttrInterface.h.inc" + +#include "mlir/Conversion/ConvertToLLVM/ToLLVMOpInterface.h.inc" + #endif // MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_H diff --git a/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.td b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.td new file mode 100644 index 00000000000000..1331a9802c570f --- /dev/null +++ b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.td @@ -0,0 +1,76 @@ + +//===- ToLLVMInterface.td - Conversion to LLVM interfaces -----*- tablegen -*-===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines interfaces for managing transformations, including populating +// pattern rewrites. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_TD +#define MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_TD + +include "mlir/IR/OpBase.td" + +//===----------------------------------------------------------------------===// +// Attribute interface +//===----------------------------------------------------------------------===// + +def ConvertToLLVMAttrInterface : + AttrInterface<"ConvertToLLVMAttrInterface"> { + let description = [{ + The `ConvertToLLVMAttrInterface` attribute interfaces allows using + attributes to configure the convert to LLVM infrastructure, this includes: + - The conversion target. + - The LLVM type converter. + - The pattern set. + + This interface permits fined grained configuration of the `convert-to-llvm` + process. For example, attributes with target information like + `#nvvm.target` or `#rodcl.target` can leverage this interface for populating + patterns specific to a particular target. + }]; + let cppNamespace = "::mlir"; + let methods = [ + InterfaceMethod< + /*desc=*/[{ + Populate the dialect conversion target, type converter and pattern set. + }], + /*retTy=*/"void", + /*methodName=*/"populateConvertToLLVMConversionPatterns", + /*args=*/(ins "::mlir::ConversionTarget&":$target, + "::mlir::LLVMTypeConverter&":$typeConverter, + "::mlir::RewritePatternSet&":$patternSet)> + ]; +} + +//===----------------------------------------------------------------------===// +// Op interface +//===----------------------------------------------------------------------===// + +def ConvertToLLVMOpInterface : OpInterface<"ConvertToLLVMOpInterface"> { + let description = [{ + Interface for collecting all convert to LLVM attributes stored in an + operation. See `ConvertToLLVMAttrInterface` for more information on these + attributes. + }]; + let cppNamespace = "::mlir"; + let methods = [ + InterfaceMethod< + /*desc=*/[{ + Populate the provided vector with a list of convert to LLVM attributes + to apply. + }], + /*retTy=*/"void", + /*methodName=*/"getConvertToLLVMConversionAttrs", + /*args=*/(ins + "::llvm::SmallVectorImpl<::mlir::ConvertToLLVMAttrInterface>&":$attrs) + > + ]; +} + +#endif // MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_TD diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUToLLVM.h b/mlir/include/mlir/Conversion/GPUCommon/GPUToLLVM.h new file mode 100644 index 00000000000000..ad8c39fe676618 --- /dev/null +++ b/mlir/include/mlir/Conversion/GPUCommon/GPUToLLVM.h @@ -0,0 +1,25 @@ +//===- GPUToLLVM.h - Convert GPU to LLVM dialect ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files declares registration functions for converting GPU to LLVM. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_GPUCOMMON_GPUTOLLVM_H +#define MLIR_CONVERSION_GPUCOMMON_GPUTOLLVM_H + +namespace mlir { +class DialectRegistry; +namespace gpu { +/// Registers the `ConvertToLLVMOpInterface` interface on the `gpu::GPUModuleOP` +/// operation. +void registerConvertGpuToLLVMInterface(DialectRegistry ®istry); +} // namespace gpu +} // namespace mlir + +#endif // MLIR_CONVERSION_GPUCOMMON_GPUTOLLVM_H diff --git a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVM.h b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVM.h new file mode 100644 index 00000000000000..6311630a23c8f6 --- /dev/null +++ b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVM.h @@ -0,0 +1,27 @@ +//===- GPUToNVVM.h - Convert GPU to NVVM dialect ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files declares registration functions for converting GPU to NVVM. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_GPUTONVVM_GPUTONVVM_H +#define MLIR_CONVERSION_GPUTONVVM_GPUTONVVM_H + +namespace mlir { +class DialectRegistry; +namespace NVVM { +/// Registers the `ConvertToLLVMAttrInterface` interface on the +/// `NVVM::NVVMTargetAttr` attribute. This interface populates the conversion +/// target, LLVM type converter, and pattern set for converting GPU operations +/// to NVVM. +void registerConvertGpuToNVVMInterface(DialectRegistry ®istry); +} // namespace NVVM +} // namespace mlir + +#endif // MLIR_CONVERSION_GPUTONVVM_GPUTONVVM_H diff --git a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h index 645e86a4309621..fc7c967f1b62cf 100644 --- a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h +++ b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h @@ -31,6 +31,10 @@ LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type); /// Configure target to convert from the GPU dialect to NVVM. void configureGpuToNVVMConversionLegality(ConversionTarget &target); +/// Configure the LLVM type convert to convert types and address spaces from the +/// GPU dialect to NVVM. +void configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter); + /// Collect a set of patterns to convert from the GPU dialect to NVVM. void populateGpuToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns); diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 4d272ba219c6f1..e394bae64e0918 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -22,12 +22,20 @@ def ConvertToLLVMPass : Pass<"convert-to-llvm"> { This is a generic pass to convert to LLVM, it uses the `ConvertToLLVMPatternInterface` dialect interface to delegate to dialects the injection of conversion patterns. + + If `dynamic` is set to `true`, the pass will look for + `ConvertToLLVMAttrInterface` attributes and use them to further configure + the conversion process. This option also uses the `DataLayoutAnalysis` + analysis to configure the type converter. Enabling this option incurs in + extra overhead. }]; let constructor = "mlir::createConvertToLLVMPass()"; let options = [ ListOption<"filterDialects", "filter-dialects", "std::string", "Test conversion patterns of only the specified dialects">, + Option<"useDynamic", "dynamic", "bool", "false", + "Use op conversion attributes to configure the conversion">, ]; } diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h index 1f2ef26b450701..14a6a2787b3a5d 100644 --- a/mlir/include/mlir/InitAllExtensions.h +++ b/mlir/include/mlir/InitAllExtensions.h @@ -18,6 +18,8 @@ #include "mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h" #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" +#include "mlir/Conversion/GPUCommon/GPUToLLVM.h" +#include "mlir/Conversion/GPUToNVVM/GPUToNVVM.h" #include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h" #include "mlir/Conversion/MathToLLVM/MathToLLVM.h" #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" @@ -72,6 +74,8 @@ inline void registerAllExtensions(DialectRegistry ®istry) { registerConvertOpenMPToLLVMInterface(registry); ub::registerConvertUBToLLVMInterface(registry); registerConvertAMXToLLVMInterface(registry); + gpu::registerConvertGpuToLLVMInterface(registry); + NVVM::registerConvertGpuToNVVMInterface(registry); // Register all transform dialect extensions. affine::registerTransformDialectExtension(registry); diff --git a/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt b/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt index de3d850d520c0f..c71711ba2ebedb 100644 --- a/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt @@ -7,6 +7,7 @@ add_mlir_conversion_library(MLIRConvertToLLVMInterface ToLLVMInterface.cpp DEPENDS + MLIRConvertToLLVMInterfaceIncGen LINK_LIBS PUBLIC MLIRIR @@ -21,6 +22,7 @@ add_mlir_conversion_library(MLIRConvertToLLVMPass LINK_LIBS PUBLIC MLIRIR + MLIRConvertToLLVMInterface MLIRLLVMCommonConversion MLIRLLVMDialect MLIRPass diff --git a/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp b/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp index b2407a258c2719..673ba814d338f4 100644 --- a/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp +++ b/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Analysis/DataLayoutAnalysis.h" #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" #include "mlir/Conversion/ConvertToLLVM/ToLLVMPass.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" @@ -27,6 +28,41 @@ namespace mlir { using namespace mlir; namespace { +/// Base class for creating the internal implementation of `convert-to-llvm` +/// passes. +class ConvertToLLVMPassInterface { +public: + ConvertToLLVMPassInterface(MLIRContext *context, + ArrayRef filterDialects); + virtual ~ConvertToLLVMPassInterface() = default; + + /// Get the dependent dialects used by `convert-to-llvm`. + static void getDependentDialects(DialectRegistry ®istry); + + /// Initialize the internal state of the `convert-to-llvm` pass + /// implementation. This method is invoked by `ConvertToLLVMPass::initialize`. + /// This method returns whether the initialization process failed. + virtual LogicalResult initialize() = 0; + + /// Transform `op` to LLVM with the conversions available in the pass. The + /// analysis manager can be used to query analyzes like `DataLayoutAnalysis` + /// to further configure the conversion process. This method is invoked by + /// `ConvertToLLVMPass::runOnOperation`. This method returns whether the + /// transformation process failed. + virtual LogicalResult transform(Operation *op, + AnalysisManager manager) const = 0; + +protected: + /// Visit the `ConvertToLLVMPatternInterface` dialect interfaces and call + /// `visitor` with each of the interfaces. If `filterDialects` is non-empty, + /// then `visitor` is invoked only with the dialects in the `filterDialects` + /// list. + LogicalResult visitInterfaces( + llvm::function_ref visitor); + MLIRContext *context; + /// List of dialects names to use as filters. + ArrayRef filterDialects; +}; /// This DialectExtension can be attached to the context, which will invoke the /// `apply()` method for every loaded dialect. If a dialect implements the @@ -58,74 +94,188 @@ class LoadDependentDialectExtension : public DialectExtensionBase { } }; +//===----------------------------------------------------------------------===// +// StaticConvertToLLVM +//===----------------------------------------------------------------------===// + +/// Static implementation of the `convert-to-llvm` pass. This version only looks +/// at dialect interfaces to configure the conversion process. +struct StaticConvertToLLVM : public ConvertToLLVMPassInterface { + /// Pattern set with conversions to LLVM. + std::shared_ptr patterns; + /// The conversion target. + std::shared_ptr target; + /// The LLVM type converter. + std::shared_ptr typeConverter; + using ConvertToLLVMPassInterface::ConvertToLLVMPassInterface; + + /// Configure the conversion to LLVM at pass initialization. + LogicalResult initialize() final { + auto target = std::make_shared(*context); + auto typeConverter = std::make_shared(context); + RewritePatternSet tempPatterns(context); + target->addLegalDialect(); + // Populate the patterns with the dialect interface. + if (failed(visitInterfaces([&](ConvertToLLVMPatternInterface *iface) { + iface->populateConvertToLLVMConversionPatterns( + *target, *typeConverter, tempPatterns); + }))) + return failure(); + this->patterns = + std::make_unique(std::move(tempPatterns)); + this->target = target; + this->typeConverter = typeConverter; + return success(); + } + + /// Apply the conversion driver. + LogicalResult transform(Operation *op, AnalysisManager manager) const final { + if (failed(applyPartialConversion(op, *target, *patterns))) + return failure(); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// DynamicConvertToLLVM +//===----------------------------------------------------------------------===// + +/// Dynamic implementation of the `convert-to-llvm` pass. This version inspects +/// the IR to configure the conversion to LLVM. +struct DynamicConvertToLLVM : public ConvertToLLVMPassInterface { + /// A list of all the `ConvertToLLVMPatternInterface` dialect interfaces used + /// to partially configure the conversion process. + std::shared_ptr> + interfaces; + using ConvertToLLVMPassInterface::ConvertToLLVMPassInterface; + + /// Collect the dialect interfaces used to configure the conversion process. + LogicalResult initialize() final { + auto interfaces = + std::make_shared>(); + // Collect the interfaces. + if (failed(visitInterfaces([&](ConvertToLLVMPatternInterface *iface) { + interfaces->push_back(iface); + }))) + return failure(); + this->interfaces = interfaces; + return success(); + } + + /// Configure the conversion process and apply the conversion driver. + LogicalResult transform(Operation *op, AnalysisManager manager) const final { + RewritePatternSet patterns(context); + ConversionTarget target(*context); + target.addLegalDialect(); + // Get the data layout analysis. + const auto &dlAnalysis = manager.getAnalysis(); + LLVMTypeConverter typeConverter(context, &dlAnalysis); + + // Configure the conversion with dialect level interfaces. + for (ConvertToLLVMPatternInterface *iface : *interfaces) + iface->populateConvertToLLVMConversionPatterns(target, typeConverter, + patterns); + + // Configure the conversion attribute interfaces. + populateOpConvertToLLVMConversionPatterns(op, target, typeConverter, + patterns); + + // Apply the conversion. + if (failed(applyPartialConversion(op, target, std::move(patterns)))) + return failure(); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ConvertToLLVMPass +//===----------------------------------------------------------------------===// + /// This is a generic pass to convert to LLVM, it uses the /// `ConvertToLLVMPatternInterface` dialect interface to delegate to dialects /// the injection of conversion patterns. class ConvertToLLVMPass : public impl::ConvertToLLVMPassBase { - std::shared_ptr patterns; - std::shared_ptr target; - std::shared_ptr typeConverter; + std::shared_ptr impl; public: using impl::ConvertToLLVMPassBase::ConvertToLLVMPassBase; void getDependentDialects(DialectRegistry ®istry) const final { - registry.insert(); - registry.addExtensions(); + ConvertToLLVMPassInterface::getDependentDialects(registry); } LogicalResult initialize(MLIRContext *context) final { - RewritePatternSet tempPatterns(context); - auto target = std::make_shared(*context); - target->addLegalDialect(); - auto typeConverter = std::make_shared(context); - - if (!filterDialects.empty()) { - // Test mode: Populate only patterns from the specified dialects. Produce - // an error if the dialect is not loaded or does not implement the - // interface. - for (std::string &dialectName : filterDialects) { - Dialect *dialect = context->getLoadedDialect(dialectName); - if (!dialect) - return emitError(UnknownLoc::get(context)) - << "dialect not loaded: " << dialectName << "\n"; - auto *iface = dyn_cast(dialect); - if (!iface) - return emitError(UnknownLoc::get(context)) - << "dialect does not implement ConvertToLLVMPatternInterface: " - << dialectName << "\n"; - iface->populateConvertToLLVMConversionPatterns(*target, *typeConverter, - tempPatterns); - } - } else { - // Normal mode: Populate all patterns from all dialects that implement the - // interface. - for (Dialect *dialect : context->getLoadedDialects()) { - // First time we encounter this dialect: if it implements the interface, - // let's populate patterns ! - auto *iface = dyn_cast(dialect); - if (!iface) - continue; - iface->populateConvertToLLVMConversionPatterns(*target, *typeConverter, - tempPatterns); - } - } - - this->patterns = - std::make_unique(std::move(tempPatterns)); - this->target = target; - this->typeConverter = typeConverter; + std::shared_ptr impl; + // Choose the pass implementation. + if (useDynamic) + impl = std::make_shared(context, filterDialects); + else + impl = std::make_shared(context, filterDialects); + if (failed(impl->initialize())) + return failure(); + this->impl = impl; return success(); } void runOnOperation() final { - if (failed(applyPartialConversion(getOperation(), *target, *patterns))) - signalPassFailure(); + if (failed(impl->transform(getOperation(), getAnalysisManager()))) + return signalPassFailure(); } }; } // namespace +//===----------------------------------------------------------------------===// +// ConvertToLLVMPassInterface +//===----------------------------------------------------------------------===// + +ConvertToLLVMPassInterface::ConvertToLLVMPassInterface( + MLIRContext *context, ArrayRef filterDialects) + : context(context), filterDialects(filterDialects) {} + +void ConvertToLLVMPassInterface::getDependentDialects( + DialectRegistry ®istry) { + registry.insert(); + registry.addExtensions(); +} + +LogicalResult ConvertToLLVMPassInterface::visitInterfaces( + llvm::function_ref visitor) { + if (!filterDialects.empty()) { + // Test mode: Populate only patterns from the specified dialects. Produce + // an error if the dialect is not loaded or does not implement the + // interface. + for (StringRef dialectName : filterDialects) { + Dialect *dialect = context->getLoadedDialect(dialectName); + if (!dialect) + return emitError(UnknownLoc::get(context)) + << "dialect not loaded: " << dialectName << "\n"; + auto *iface = dyn_cast(dialect); + if (!iface) + return emitError(UnknownLoc::get(context)) + << "dialect does not implement ConvertToLLVMPatternInterface: " + << dialectName << "\n"; + visitor(iface); + } + } else { + // Normal mode: Populate all patterns from all dialects that implement the + // interface. + for (Dialect *dialect : context->getLoadedDialects()) { + // First time we encounter this dialect: if it implements the interface, + // let's populate patterns ! + auto *iface = dyn_cast(dialect); + if (!iface) + continue; + visitor(iface); + } + } + return success(); +} + +//===----------------------------------------------------------------------===// +// API +//===----------------------------------------------------------------------===// + void mlir::registerConvertToLLVMDependentDialectLoading( DialectRegistry ®istry) { registry.addExtensions(); diff --git a/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp b/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp index 3a4e83b2a8838f..252245dfbf5417 100644 --- a/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp +++ b/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp @@ -30,3 +30,22 @@ void mlir::populateConversionTargetFromOperation( patterns); }); } + +void mlir::populateOpConvertToLLVMConversionPatterns( + Operation *op, ConversionTarget &target, LLVMTypeConverter &typeConverter, + RewritePatternSet &patterns) { + auto iface = dyn_cast(op); + if (!iface) + iface = op->getParentOfType(); + if (!iface) + return; + SmallVector attrs; + iface.getConvertToLLVMConversionAttrs(attrs); + for (ConvertToLLVMAttrInterface attr : attrs) + attr.populateConvertToLLVMConversionPatterns(target, typeConverter, + patterns); +} + +#include "mlir/Conversion/ConvertToLLVM/ToLLVMAttrInterface.cpp.inc" + +#include "mlir/Conversion/ConvertToLLVM/ToLLVMOpInterface.cpp.inc" diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp index 92b28ff9c58737..1497d662dcdbdd 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -22,6 +22,7 @@ #include "mlir/Conversion/ConvertToLLVM/ToLLVMPass.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h" +#include "mlir/Conversion/GPUCommon/GPUToLLVM.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" @@ -1762,3 +1763,34 @@ void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter, ConvertSetCsrPointersOpToGpuRuntimeCallPattern>(converter); patterns.add(converter, kernelBarePtrCallConv); } + +//===----------------------------------------------------------------------===// +// GPUModuleOp convert to LLVM op interface +//===----------------------------------------------------------------------===// + +namespace { +struct GPUModuleOpConvertToLLVMInterface + : public ConvertToLLVMOpInterface::ExternalModel< + GPUModuleOpConvertToLLVMInterface, gpu::GPUModuleOp> { + /// Get the conversion patterns from the target attribute. + void getConvertToLLVMConversionAttrs( + Operation *op, SmallVectorImpl &attrs) const; +}; +} // namespace + +void GPUModuleOpConvertToLLVMInterface::getConvertToLLVMConversionAttrs( + Operation *op, SmallVectorImpl &attrs) const { + auto module = cast(op); + ArrayAttr targetsAttr = module.getTargetsAttr(); + // Fail if there are no target attributes or there is more than one target. + if (!targetsAttr || targetsAttr.size() != 1) + return; + if (auto patternAttr = dyn_cast(targetsAttr[0])) + attrs.push_back(patternAttr); +} + +void mlir::gpu::registerConvertGpuToLLVMInterface(DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, gpu::GPUDialect *dialect) { + gpu::GPUModuleOp::attachInterface(*ctx); + }); +} diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 04e85c2b337dec..b343cf71e3a2e7 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -15,8 +15,10 @@ #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" +#include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" +#include "mlir/Conversion/GPUToNVVM/GPUToNVVM.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" @@ -274,29 +276,7 @@ struct LowerGpuOpsToNVVMOpsPass } LLVMTypeConverter converter(m.getContext(), options); - // NVVM uses alloca in the default address space to represent private - // memory allocations, so drop private annotations. NVVM uses address - // space 3 for shared memory. NVVM uses the default address space to - // represent global memory. - populateGpuMemorySpaceAttributeConversions( - converter, [](gpu::AddressSpace space) -> unsigned { - switch (space) { - case gpu::AddressSpace::Global: - return static_cast( - NVVM::NVVMMemorySpace::kGlobalMemorySpace); - case gpu::AddressSpace::Workgroup: - return static_cast( - NVVM::NVVMMemorySpace::kSharedMemorySpace); - case gpu::AddressSpace::Private: - return 0; - } - llvm_unreachable("unknown address space enum value"); - return 0; - }); - // Lowering for MMAMatrixType. - converter.addConversion([&](gpu::MMAMatrixType type) -> Type { - return convertMMAToLLVMType(type); - }); + configureGpuToNVVMTypeConverter(converter); RewritePatternSet llvmPatterns(m.getContext()); arith::populateArithToLLVMConversionPatterns(converter, llvmPatterns); @@ -332,6 +312,32 @@ void mlir::configureGpuToNVVMConversionLegality(ConversionTarget &target) { target.addLegalOp(); } +void mlir::configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter) { + // NVVM uses alloca in the default address space to represent private + // memory allocations, so drop private annotations. NVVM uses address + // space 3 for shared memory. NVVM uses the default address space to + // represent global memory. + populateGpuMemorySpaceAttributeConversions( + converter, [](gpu::AddressSpace space) -> unsigned { + switch (space) { + case gpu::AddressSpace::Global: + return static_cast( + NVVM::NVVMMemorySpace::kGlobalMemorySpace); + case gpu::AddressSpace::Workgroup: + return static_cast( + NVVM::NVVMMemorySpace::kSharedMemorySpace); + case gpu::AddressSpace::Private: + return 0; + } + llvm_unreachable("unknown address space enum value"); + return 0; + }); + // Lowering for MMAMatrixType. + converter.addConversion([&](gpu::MMAMatrixType type) -> Type { + return convertMMAToLLVMType(type); + }); +} + template static void populateOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, StringRef f32Func, @@ -467,3 +473,34 @@ void mlir::populateGpuToNVVMConversionPatterns( populateOpPatterns(converter, patterns, "__nv_tanhf", "__nv_tanh"); } + +//===----------------------------------------------------------------------===// +// NVVMTargetAttr convert to LLVM attr interface +//===----------------------------------------------------------------------===// + +namespace { +struct NVVMTargetConvertToLLVMAttrInterface + : public ConvertToLLVMAttrInterface::ExternalModel< + NVVMTargetConvertToLLVMAttrInterface, NVVM::NVVMTargetAttr> { + /// Configure GPU to NVVM. + void populateConvertToLLVMConversionPatterns( + Attribute attr, ConversionTarget &target, + LLVMTypeConverter &typeConverter, RewritePatternSet &patterns) const; +}; +} // namespace + +void NVVMTargetConvertToLLVMAttrInterface:: + populateConvertToLLVMConversionPatterns(Attribute attr, + ConversionTarget &target, + LLVMTypeConverter &typeConverter, + RewritePatternSet &patterns) const { + configureGpuToNVVMConversionLegality(target); + configureGpuToNVVMTypeConverter(typeConverter); + populateGpuToNVVMConversionPatterns(typeConverter, patterns); +} + +void mlir::NVVM::registerConvertGpuToNVVMInterface(DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, NVVMDialect *dialect) { + NVVMTargetAttr::attachInterface(*ctx); + }); +} diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-target-attr.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-target-attr.mlir new file mode 100644 index 00000000000000..ed7fa6508d5ade --- /dev/null +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-target-attr.mlir @@ -0,0 +1,42 @@ +// RUN: mlir-opt %s --pass-pipeline="builtin.module(gpu.module(convert-to-llvm{dynamic=true}))" | FileCheck %s + +// CHECK-LABEL: gpu.module @nvvm_module +gpu.module @nvvm_module [#nvvm.target] { + // CHECK-LABEL: llvm.func @kernel_0() + func.func @kernel_0() -> index { + // CHECK: = nvvm.read.ptx.sreg.tid.x : i32 + // CHECK: = llvm.sext %{{.*}} : i32 to i64 + %tIdX = gpu.thread_id x + // CHECK: = nvvm.read.ptx.sreg.laneid range : i32 + // CHECK: = llvm.sext %{{.*}} : i32 to i64 + %laneId = gpu.lane_id + %sum = index.add %tIdX, %laneId + func.return %sum : index + } + +// CHECK-LABEL: llvm.func @kernel_1 +// CHECK: (%{{.*}}: !llvm.ptr<1>, %arg1: !llvm.ptr<1>, %arg2: i64) +// CHECK: attributes {gpu.kernel, gpu.known_block_size = array, nvvm.kernel, nvvm.maxntid = array} + gpu.func @kernel_1(%arg0 : memref>) kernel attributes {known_block_size = array} { + gpu.return + } +} + +// CHECK-LABEL: gpu.module @nvvm_module_2 +gpu.module @nvvm_module_2 { + // CHECK-LABEL: llvm.func @kernel_0() + func.func @kernel_0() -> index { + // CHECK: = gpu.thread_id x + %tIdX = gpu.thread_id x + // CHECK: = gpu.lane_id + %laneId = gpu.lane_id + %sum = index.add %tIdX, %laneId + func.return %sum : index + } + +// CHECK-LABEL: gpu.func @kernel_1 +// CHECK: (%{{.*}}: memref>) kernel attributes {known_block_size = array} + gpu.func @kernel_1(%arg0 : memref>) kernel attributes {known_block_size = array} { + gpu.return + } +}