From 79a845ce49b05bf0b7c3e088872f4c0d65a2b16a Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 23 Nov 2023 16:04:29 -0300 Subject: [PATCH] Don't reuse passmanagers across compilations (#52054) --- doc/src/devdocs/locks.md | 1 + src/aotcompile.cpp | 3 +++ src/codegen.cpp | 7 +++++ src/jitlayers.cpp | 20 +++++++++------ src/jitlayers.h | 11 +++----- src/pipeline.cpp | 55 ++++++++++++++++++---------------------- 6 files changed, 52 insertions(+), 45 deletions(-) diff --git a/doc/src/devdocs/locks.md b/doc/src/devdocs/locks.md index 26de9351e40cd..33947035f51a0 100644 --- a/doc/src/devdocs/locks.md +++ b/doc/src/devdocs/locks.md @@ -30,6 +30,7 @@ The following are definitely leaf locks (level 1), and must not try to acquire a > * jl_in_stackwalk (Win32) > * ResourcePool::mutex > * RLST_mutex +> * llvm_printing_mutex > * jl_locked_stream::mutex > * debuginfo_asyncsafe > * inference_timing_mutex diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index b65d412b6bcda..b844d22337d31 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -1414,12 +1414,15 @@ static SmallVector add_output(Module &M, TargetMachine &TM, Stri return outputs; } +extern int jl_is_timing_passes; static unsigned compute_image_thread_count(const ModuleInfo &info) { // 32-bit systems are very memory-constrained #ifdef _P32 LLVM_DEBUG(dbgs() << "32-bit systems are restricted to a single thread\n"); return 1; #endif + if (jl_is_timing_passes) // LLVM isn't thread safe when timing the passes https://github.com/llvm/llvm-project/issues/44417 + return 1; // COFF has limits on external symbols (even hidden) up to 65536. We reserve the last few // for any of our other symbols that we insert during compilation. if (info.triple.isOSBinFormatCOFF() && info.globals > 64000) { diff --git a/src/codegen.cpp b/src/codegen.cpp index 16f0b6903aa28..42a62bae4bd80 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -9483,6 +9483,8 @@ char jl_using_oprofile_jitevents = 0; // Non-zero if running under OProfile char jl_using_perf_jitevents = 0; #endif +int jl_is_timing_passes = 0; + extern "C" void jl_init_llvm(void) { jl_page_size = jl_getpagesize(); @@ -9541,6 +9543,11 @@ extern "C" void jl_init_llvm(void) if (clopt && clopt->getNumOccurrences() == 0) { clopt->addOccurrence(1, clopt->ArgStr, "false", true); } + + clopt = llvmopts.lookup("time-passes"); + if (clopt && clopt->getNumOccurrences() > 0) + jl_is_timing_passes = 1; + jl_ExecutionEngine = new JuliaOJIT(); bool jl_using_gdb_jitevents = false; diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 2de5455903375..3ed86c688f6dd 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -1249,26 +1249,30 @@ namespace { orc::JITTargetMachineBuilder JTMB; OptimizationLevel O; SmallVector, 0> &printers; - PMCreator(TargetMachine &TM, int optlevel, SmallVector, 0> &printers) JL_NOTSAFEPOINT - : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers) {} + std::mutex &llvm_printing_mutex; + PMCreator(TargetMachine &TM, int optlevel, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT + : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers), llvm_printing_mutex(llvm_printing_mutex) {} auto operator()() JL_NOTSAFEPOINT { auto TM = cantFail(JTMB.createTargetMachine()); fixupTM(*TM); auto NPM = std::make_unique(std::move(TM), O); // TODO this needs to be locked, as different resource pools may add to the printer vector at the same time - printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT { - NPM->printTimers(); - }); + { + std::lock_guard lock(llvm_printing_mutex); + printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT { + NPM->printTimers(); + }); + } return NPM; } }; template struct OptimizerT { - OptimizerT(TargetMachine &TM, SmallVector, 0> &printers) JL_NOTSAFEPOINT { + OptimizerT(TargetMachine &TM, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT { for (size_t i = 0; i < N; i++) { - PMs[i] = std::make_unique>>(PMCreator(TM, i, printers)); + PMs[i] = std::make_unique>>(PMCreator(TM, i, printers, llvm_printing_mutex)); } } @@ -1706,7 +1710,7 @@ JuliaOJIT::JuliaOJIT() LockLayer(ObjectLayer), CompileLayer(ES, LockLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(SharedBytes, RLST_mutex))), - OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT(*TM, PrintLLVMTimers))), + OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT(*TM, PrintLLVMTimers, llvm_printing_mutex))), OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel)), DepsVerifyLayer(ES, OptSelLayer, orc::IRTransformLayer::TransformFunction(validateExternRelocations)), ExternalCompileLayer(ES, LockLayer, diff --git a/src/jitlayers.h b/src/jitlayers.h index ba2e81879a44b..622fe9131861d 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -101,14 +102,9 @@ struct OptimizationOptions { struct NewPM { std::unique_ptr TM; -#if JL_LLVM_VERSION < 160000 - StandardInstrumentations SI; -#endif - std::unique_ptr PIC; - PassBuilder PB; - ModulePassManager MPM; OptimizationLevel O; - + OptimizationOptions options; + TimePassesHandler TimePasses; NewPM(std::unique_ptr TM, OptimizationLevel O, OptimizationOptions options = OptimizationOptions::defaults()) JL_NOTSAFEPOINT; ~NewPM() JL_NOTSAFEPOINT; @@ -582,6 +578,7 @@ class JuliaOJIT { jl_locked_stream dump_compiles_stream; jl_locked_stream dump_llvm_opt_stream; + std::mutex llvm_printing_mutex{}; SmallVector, 0> PrintLLVMTimers; ResourcePool> ContextPool; diff --git a/src/pipeline.cpp b/src/pipeline.cpp index e73b49710bdc3..4b099521d33f9 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -701,21 +701,6 @@ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); PIC.addClassToPassName("AfterOptimizationMarkerPass", "AfterOptimization"); } -#if JL_LLVM_VERSION >= 160000 - auto createPIC() JL_NOTSAFEPOINT { - auto PIC = std::make_unique(); - adjustPIC(*PIC); - return PIC; - } -#else - auto createPIC(StandardInstrumentations &SI) JL_NOTSAFEPOINT { - auto PIC = std::make_unique(); - adjustPIC(*PIC); - SI.registerCallbacks(*PIC); - return PIC; - } -#endif - FunctionAnalysisManager createFAM(OptimizationLevel O, TargetMachine &TM) JL_NOTSAFEPOINT { FunctionAnalysisManager FAM; @@ -744,15 +729,8 @@ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); } NewPM::NewPM(std::unique_ptr TM, OptimizationLevel O, OptimizationOptions options) : - TM(std::move(TM)), -#if JL_LLVM_VERSION < 160000 - SI(false), - PIC(createPIC(SI)), -#else - PIC(createPIC()), -#endif - PB(this->TM.get(), PipelineTuningOptions(), None, PIC.get()), - MPM(createMPM(PB, O, options)), O(O) {} + TM(std::move(TM)), O(O), options(options), TimePasses() {} + NewPM::~NewPM() = default; @@ -778,17 +756,34 @@ void NewPM::run(Module &M) { //We must recreate the analysis managers every time //so that analyses from previous runs of the pass manager //do not hang around for the next run - AnalysisManagers AM{*TM, PB, O}; - +#if JL_LLVM_VERSION >= 160000 + StandardInstrumentations SI(M.getContext(),false); +#else + StandardInstrumentations SI(false); +#endif + FunctionAnalysisManager FAM(createFAM(O, *TM.get())); + PassInstrumentationCallbacks PIC; + adjustPIC(PIC); + TimePasses.registerCallbacks(PIC); + SI.registerCallbacks(PIC, &FAM); + SI.getTimePasses().setOutStream(nulls()); //TODO: figure out a better way of doing this + LoopAnalysisManager LAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + PassBuilder PB(TM.get(), PipelineTuningOptions(), None, &PIC); + PB.registerLoopAnalyses(LAM); + PB.registerFunctionAnalyses(FAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerModuleAnalyses(MAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + ModulePassManager MPM = createMPM(PB, O, options); #ifndef __clang_gcanalyzer__ /* the analyzer cannot prove we have not added instrumentation callbacks with safepoints */ - MPM.run(M, AM.MAM); + MPM.run(M, MAM); #endif } void NewPM::printTimers() { -#if JL_LLVM_VERSION < 160000 - SI.getTimePasses().print(); -#endif + TimePasses.print(); } OptimizationLevel getOptLevel(int optlevel) {