diff --git a/xla/stream_executor/cuda/BUILD b/xla/stream_executor/cuda/BUILD
index e059c81f99ff9..8deb0436d478e 100644
--- a/xla/stream_executor/cuda/BUILD
+++ b/xla/stream_executor/cuda/BUILD
@@ -1761,3 +1761,67 @@ xla_cc_test(
         "@tsl//tsl/platform:test",
     ],
 )
+
+cc_library(
+    name = "assemble_compilation_provider",
+    srcs = ["assemble_compilation_provider.cc"],
+    hdrs = ["assemble_compilation_provider.h"],
+    tags = [
+        "cuda-only",
+        "gpu",
+    ],
+    deps = [
+        ":compilation_provider",
+        ":composite_compilation_provider",
+        ":defer_relocatable_compilation_compilation_provider",
+        ":driver_compilation_provider",
+        ":nvjitlink_compilation_provider",
+        ":nvjitlink_known_issues",
+        ":nvjitlink_support",
+        ":nvptxcompiler_compilation_provider",
+        ":ptx_compiler_support",
+        ":subprocess_compilation",
+        ":subprocess_compilation_provider",
+        "//xla:xla_proto_cc",
+        "//xla/stream_executor:semantic_version",
+        "//xla/stream_executor:stream_executor_h",
+        "@com_google_absl//absl/log",
+        "@com_google_absl//absl/log:check",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/status:statusor",
+        "@com_google_absl//absl/strings",
+        "@tsl//tsl/platform:errors",
+        "@tsl//tsl/platform:statusor",
+    ],
+)
+
+xla_cc_test(
+    name = "assemble_compilation_provider_test",
+    srcs = ["assemble_compilation_provider_test.cc"],
+    data = [
+        ":nvlink",
+        ":ptxas",
+    ],
+    tags = [
+        "cuda-only",
+        "gpu",
+        "requires-gpu-nvidia",
+    ],
+    deps = [
+        ":assemble_compilation_provider",
+        ":compilation_provider",
+        ":cuda_platform",
+        ":nvjitlink_support",
+        ":ptx_compiler_support",
+        "//xla/stream_executor:platform",
+        "//xla/stream_executor:platform_manager",
+        "//xla/stream_executor:stream_executor_h",
+        "@com_google_absl//absl/status",
+        "@com_google_googletest//:gtest_main",
+        "@tsl//tsl/platform:cuda_root_path",
+        "@tsl//tsl/platform:path",
+        "@tsl//tsl/platform:status_matchers",
+        "@tsl//tsl/platform:statusor",
+        "@tsl//tsl/platform:test",
+    ],
+)
diff --git a/xla/stream_executor/cuda/assemble_compilation_provider.cc b/xla/stream_executor/cuda/assemble_compilation_provider.cc
new file mode 100644
index 0000000000000..9ed38dacb4f51
--- /dev/null
+++ b/xla/stream_executor/cuda/assemble_compilation_provider.cc
@@ -0,0 +1,285 @@
+/* Copyright 2024 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "xla/stream_executor/cuda/assemble_compilation_provider.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "absl/log/check.h"
+#include "absl/log/log.h"
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/str_cat.h"
+#include "xla/stream_executor/cuda/compilation_provider.h"
+#include "xla/stream_executor/cuda/composite_compilation_provider.h"
+#include "xla/stream_executor/cuda/defer_relocatable_compilation_compilation_provider.h"
+#include "xla/stream_executor/cuda/driver_compilation_provider.h"
+#include "xla/stream_executor/cuda/nvjitlink_compilation_provider.h"
+#include "xla/stream_executor/cuda/nvjitlink_known_issues.h"
+#include "xla/stream_executor/cuda/nvjitlink_support.h"
+#include "xla/stream_executor/cuda/nvptxcompiler_compilation_provider.h"
+#include "xla/stream_executor/cuda/ptx_compiler_support.h"
+#include "xla/stream_executor/cuda/subprocess_compilation.h"
+#include "xla/stream_executor/cuda/subprocess_compilation_provider.h"
+#include "xla/stream_executor/semantic_version.h"
+#include "tsl/platform/errors.h"
+
+namespace stream_executor::cuda {
+namespace {
+
+// Returns true if NvJitLink is supported and should be used.
+absl::Status HasNvJitLinkSupport(const xla::DebugOptions& debug_options) {
+  if (!IsLibNvJitLinkSupported()) {
+    return absl::UnavailableError(
+        "LibNvJitLink is not supported (disabled during compilation).");
+  }
+
+  if (debug_options.xla_gpu_libnvjitlink_mode() ==
+      xla::DebugOptions::LIB_NV_JIT_LINK_MODE_DISABLED) {
+    return absl::UnavailableError(
+        "LibNvJitLink is disabled (explicitly disabled via flag).");
+  }
+
+  if (debug_options.xla_gpu_libnvjitlink_mode() ==
+      xla::DebugOptions::LIB_NV_JIT_LINK_MODE_ENABLED) {
+    VLOG(4) << "Considering NvJitLink since it was explicitly enabled.";
+    return absl::OkStatus();
+  }
+
+  if (LoadedNvJitLinkHasKnownIssues()) {
+    return absl::UnavailableError(
+        "LibNvJitLink is disabled since the loaded library version has known "
+        "issues.");
+  }
+
+  VLOG(4)
+      << "Considering NvJitLink since the loaded library version has no known "
+         "issues.";
+  return absl::OkStatus();
+}
+
+// Returns true if LibNvPtxCompiler is supported and should be used.
+absl::Status HasNvptxcompilerSupport(const xla::DebugOptions& debug_options) {
+  if (!IsLibNvPtxCompilerSupported()) {
+    return absl::UnavailableError(
+        "LibNvPtxCompiler is not supported (disabled during compilation).");
+  }
+
+  if (!debug_options.xla_gpu_enable_libnvptxcompiler()) {
+    return absl::UnavailableError(
+        "LibNvPtxCompiler is disabled (explicitly disabled via flag).");
+  }
+
+  VLOG(4) << "Considering NvPtxCompiler since it was supported and enabled.";
+  return absl::OkStatus();
+}
+
+// Returns an error if the user-set flags are not compatible with each other and
+// the build of XLA.
+absl::Status CheckIncompatibleFlagSettings(
+    const xla::DebugOptions& debug_options) {
+  if (debug_options.xla_gpu_libnvjitlink_mode() ==
+          xla::DebugOptions::LIB_NV_JIT_LINK_MODE_ENABLED &&
+      !IsLibNvJitLinkSupported()) {
+    return absl::UnavailableError("LibNvJitLink is not supported.");
+  }
+
+  if (debug_options.xla_gpu_enable_libnvptxcompiler() &&
+      !IsLibNvPtxCompilerSupported()) {
+    return absl::UnavailableError("LibNvPtxCompiler is not supported.");
+  }
+
+  return absl::OkStatus();
+}
+
+// Calls `GetToolVersion` on the given path if it's OK. Otherwise returns the
+// error status.
+absl::StatusOr<SemanticVersion> GetToolVersionIfToolAvailable(
+    const absl::StatusOr<std::string>& path) {
+  if (!path.ok()) {
+    return path.status();
+  }
+
+  return GetToolVersion(path.value());
+}
+
+// Returns the given non-OK status or the value as a string.
+template <typename T>
+std::string ToDebugString(const absl::StatusOr<T>& status_or) {
+  if (status_or.ok()) {
+    return absl::StrCat(status_or.value());
+  }
+  return std::string{status_or.status().message()};
+}
+
+}  // namespace
+
+absl::StatusOr<std::unique_ptr<CompilationProvider>>
+AssembleCompilationProvider(const xla::DebugOptions& debug_options) {
+  // TODO(b/381059098): Simplify this logic
+
+  TF_RETURN_IF_ERROR(CheckIncompatibleFlagSettings(debug_options));
+
+  std::string decision_log;
+  const auto append_to_decision_log = [&](std::string_view decision) {
+    VLOG(4) << decision;
+    absl::StrAppend(&decision_log, " - ", decision, "\n");
+  };
+
+  const absl::Status has_nvjitlink = HasNvJitLinkSupport(debug_options);
+  append_to_decision_log(
+      absl::StrCat("Has NvJitLink support: ", has_nvjitlink.message()));
+
+  const absl::Status has_nvptxcompiler = HasNvptxcompilerSupport(debug_options);
+  append_to_decision_log(
+      absl::StrCat("Has NvPtxCompiler support: ", has_nvptxcompiler.message()));
+
+  const bool parallel_compilation_support_is_desired =
+      debug_options.xla_gpu_enable_llvm_module_compilation_parallelism();
+  append_to_decision_log(
+      absl::StrCat("Parallel compilation support is desired: ",
+                   parallel_compilation_support_is_desired));
+
+  if (has_nvjitlink.ok() && has_nvptxcompiler.ok()) {
+    // If both libraries are supported, we will use them together. This setup
+    // supports parallel compilation and we have the most control over the
+    // versions being used.
+    VLOG(3) << "Using libnvptxcompiler for compilation and libnvjitlink for "
+               "linking.";
+    std::vector<std::unique_ptr<CompilationProvider>> providers;
+    providers.push_back(std::make_unique<NvptxcompilerCompilationProvider>());
+    providers.push_back(std::make_unique<NvJitLinkCompilationProvider>());
+    return CompositeCompilationProvider::Create(std::move(providers));
+  }
+
+  if (has_nvjitlink.ok() && !has_nvptxcompiler.ok()) {
+    // If we only have libnvjitlink, we use it for both compilation and
+    // linking. To support parallel compilation we defer compilation into
+    // relocatable modules to the linking step by using the
+    // DeferRelocatableCompilationCompilationProvider.
+    VLOG(3) << "Using libnvjitlink for compilation and linking.";
+    return DeferRelocatableCompilationCompilationProvider::Create(
+        std::make_unique<NvJitLinkCompilationProvider>());
+  }
+
+  if (has_nvptxcompiler.ok() && !parallel_compilation_support_is_desired) {
+    // If we only have libnvptxcompiler, but don't need parallel compilation, we
+    // can just use the library on its own - no linking required.
+    VLOG(3) << "Using only libnvptxcompiler for compilation - no parallel "
+               "compilation support needed.";
+    return std::make_unique<NvptxcompilerCompilationProvider>();
+  }
+
+  absl::StatusOr<std::string> ptxas_path =
+      FindPtxAsExecutable(debug_options.xla_gpu_cuda_data_dir());
+  absl::StatusOr<SemanticVersion> ptxas_version =
+      GetToolVersionIfToolAvailable(ptxas_path);
+
+  absl::StatusOr<std::string> nvlink_path =
+      FindNvlinkExecutable(debug_options.xla_gpu_cuda_data_dir());
+  absl::StatusOr<SemanticVersion> nvlink_version =
+      GetToolVersionIfToolAvailable(nvlink_path);
+
+  append_to_decision_log(
+      absl::StrCat("ptxas_path: ", ToDebugString(ptxas_path)));
+  append_to_decision_log(
+      absl::StrCat("ptxas_version: ", ToDebugString(ptxas_version)));
+  append_to_decision_log(
+      absl::StrCat("nvlink_path: ", ToDebugString(nvlink_path)));
+  append_to_decision_log(
+      absl::StrCat("nvlink_version: ", ToDebugString(nvlink_version)));
+
+  const bool has_subprocess_compilation_support =
+      ptxas_path.ok() && nvlink_path.ok();
+
+  if (has_subprocess_compilation_support) {
+    VLOG(3) << "Using ptxas(path=" << ptxas_path.value()
+            << ", version=" << ptxas_version.value() << ") and "
+            << "nvlink(path=" << nvlink_path.value()
+            << ", version=" << nvlink_version.value()
+            << ") for compilation and linking.";
+    return std::make_unique<SubprocessCompilationProvider>(ptxas_path.value(),
+                                                           nvlink_path.value());
+  }
+
+  const bool has_driver_compilation_support =
+      debug_options.xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found();
+  append_to_decision_log(absl::StrCat("Driver compilation is enabled: ",
+                                      has_driver_compilation_support));
+
+  if (parallel_compilation_support_is_desired && has_nvptxcompiler.ok() &&
+      has_driver_compilation_support) {
+    // It's possible to use libnvptxcompiler for compilation and the driver for
+    // linking. This setup supports parallel compilation but is less desired
+    // because we don't control the driver version. A too old driver might lead
+    // to linking errors.
+    VLOG(3) << "Using libnvptxcompiler for compilation and the driver for "
+               "linking.";
+    std::vector<std::unique_ptr<CompilationProvider>> providers;
+    providers.push_back(std::make_unique<NvptxcompilerCompilationProvider>());
+    providers.push_back(std::make_unique<DriverCompilationProvider>());
+    return CompositeCompilationProvider::Create(std::move(providers));
+  }
+
+  if (ptxas_path.ok() && has_driver_compilation_support) {
+    // It's possible to use ptxas for compilation and the driver for linking.
+    // This setup supports parallel compilation but is less desired because we
+    // don't control the driver version. A too old driver might lead to linking
+    // errors.
+    VLOG(3) << "Using libnvptxcompiler for compilation and the driver for "
+               "linking.";
+    std::vector<std::unique_ptr<CompilationProvider>> providers;
+    auto ptxas_provider = std::make_unique<SubprocessCompilationProvider>(
+        ptxas_path.value(), std::string{});
+    providers.push_back(std::move(ptxas_provider));
+    providers.push_back(std::make_unique<DriverCompilationProvider>());
+    return CompositeCompilationProvider::Create(std::move(providers));
+  }
+
+  // Passed this point we won't be able to support parallel compilation, so we
+  // error out if it was requested.
+  if (parallel_compilation_support_is_desired) {
+    return absl::UnavailableError(
+        absl::StrCat("Parallel compilation was requested, but no available "
+                     "compilation provider supports it. Details: \n",
+                     decision_log));
+  }
+
+  if (ptxas_path.ok()) {
+    VLOG(3) << "Using ptxas(path=" << ptxas_path.value()
+            << ", version=" << ptxas_version.value()
+            << ") for compilation. nvlink is not available.";
+    return std::make_unique<SubprocessCompilationProvider>(ptxas_path.value(),
+                                                           std::string{});
+  }
+
+  if (has_driver_compilation_support) {
+    VLOG(3) << "Using the driver for compilation.";
+    return std::make_unique<DriverCompilationProvider>();
+  }
+
+  return absl::UnavailableError(absl::StrCat(
+      "No PTX compilation provider is available. Neither ptxas/nvlink nor "
+      "nvjtlink is available. As a fallback you can enable JIT compilation "
+      "in the CUDA driver via the flag "
+      "`--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found`. Details: \n",
+      decision_log));
+}
+
+}  // namespace stream_executor::cuda
diff --git a/xla/stream_executor/cuda/assemble_compilation_provider.h b/xla/stream_executor/cuda/assemble_compilation_provider.h
new file mode 100644
index 0000000000000..92bfd5c9eb60b
--- /dev/null
+++ b/xla/stream_executor/cuda/assemble_compilation_provider.h
@@ -0,0 +1,49 @@
+/* Copyright 2024 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef XLA_STREAM_EXECUTOR_CUDA_ASSEMBLE_COMPILATION_PROVIDER_H_
+#define XLA_STREAM_EXECUTOR_CUDA_ASSEMBLE_COMPILATION_PROVIDER_H_
+
+#include <memory>
+
+#include "absl/status/statusor.h"
+#include "xla/stream_executor/cuda/compilation_provider.h"
+#include "xla/xla.pb.h"
+
+namespace stream_executor::cuda {
+
+// Returns the best available CompilationProvider while considering the
+// following flags from DebugOptions:
+// - xla_gpu_enable_libnvptxcompiler
+// - xla_gpu_libnvjitlink_mode
+// - xla_gpu_cuda_data_dir
+// - xla_gpu_enable_llvm_module_compilation_parallelism
+// - xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found
+//
+// Considered compilation methods are:
+// - nvptxcompiler
+// - nvjitlink
+// - subprocess(ptxas, nvlink)
+// - driver
+//
+// Returns an error if either no compilation method is available or if
+// requested features like compilation parallelism are not supported.
+// Also returns an error if contradicting flags are set.
+absl::StatusOr<std::unique_ptr<CompilationProvider>>
+AssembleCompilationProvider(const xla::DebugOptions& debug_options);
+
+}  // namespace stream_executor::cuda
+
+#endif  // XLA_STREAM_EXECUTOR_CUDA_ASSEMBLE_COMPILATION_PROVIDER_H_
diff --git a/xla/stream_executor/cuda/assemble_compilation_provider_test.cc b/xla/stream_executor/cuda/assemble_compilation_provider_test.cc
new file mode 100644
index 0000000000000..3485d196726c2
--- /dev/null
+++ b/xla/stream_executor/cuda/assemble_compilation_provider_test.cc
@@ -0,0 +1,160 @@
+/* Copyright 2024 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "xla/stream_executor/cuda/assemble_compilation_provider.h"
+
+#include <memory>
+#include <string>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "absl/status/status.h"
+#include "xla/stream_executor/cuda/compilation_provider.h"
+#include "xla/stream_executor/cuda/nvjitlink_support.h"
+#include "xla/stream_executor/cuda/ptx_compiler_support.h"
+#include "tsl/platform/cuda_root_path.h"
+#include "tsl/platform/path.h"
+#include "tsl/platform/status_matchers.h"
+#include "tsl/platform/statusor.h"
+#include "tsl/platform/test.h"
+
+namespace stream_executor::cuda {
+
+namespace {
+using ::testing::AllOf;
+using ::testing::HasSubstr;
+using ::tsl::testing::StatusIs;
+
+TEST(AssembleCompilationProviderTest,
+     ReturnsErrorIfNoCompilationProviderIsAvailable) {
+  if (!tsl::CandidateCudaRoots().empty()) {
+    GTEST_SKIP() << "With the current API design We can't control whether "
+                    "`FindCudaExecutable` will find some ptxas installed on "
+                    "the testrunner machine. Therefore we skip this test.";
+  }
+
+  xla::DebugOptions debug_options;
+  debug_options.set_xla_gpu_enable_llvm_module_compilation_parallelism(false);
+  debug_options.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false);
+  debug_options.set_xla_gpu_enable_libnvptxcompiler(false);
+  debug_options.set_xla_gpu_libnvjitlink_mode(
+      xla::DebugOptions::LIB_NV_JIT_LINK_MODE_DISABLED);
+  debug_options.set_xla_gpu_cuda_data_dir("/does/not/exist");
+
+  EXPECT_THAT(AssembleCompilationProvider(debug_options),
+              StatusIs(absl::StatusCode::kUnavailable));
+}
+
+TEST(AssembleCompilationProviderTest,
+     OffersDriverCompilationIfAllowedAndNothingElseIsAvailable) {
+  if (!tsl::CandidateCudaRoots().empty()) {
+    GTEST_SKIP() << "With the current API design We can't control whether "
+                    "`FindCudaExecutable` will find some ptxas installed on "
+                    "the testrunner machine. Therefore we skip this test.";
+  }
+
+  xla::DebugOptions debug_options;
+  debug_options.set_xla_gpu_enable_llvm_module_compilation_parallelism(false);
+  debug_options.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(true);
+  debug_options.set_xla_gpu_enable_libnvptxcompiler(false);
+  debug_options.set_xla_gpu_libnvjitlink_mode(
+      xla::DebugOptions::LIB_NV_JIT_LINK_MODE_DISABLED);
+  debug_options.set_xla_gpu_cuda_data_dir("/does/not/exist");
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<CompilationProvider> compilation_provider,
+      AssembleCompilationProvider(debug_options));
+
+  EXPECT_THAT(compilation_provider->name(),
+              HasSubstr("DriverCompilationProvider"));
+}
+
+TEST(AssembleCompilationProviderTest,
+     OffersSubprocessCompilationIfLibraryCompilationIsDisabled) {
+  std::string cuda_dir;
+  if (!tsl::io::GetTestWorkspaceDir(&cuda_dir)) {
+    GTEST_SKIP() << "No test workspace directory found which means we can't "
+                    "run this test. Was this called in a Bazel environment?";
+  }
+
+  xla::DebugOptions debug_options;
+  debug_options.set_xla_gpu_enable_llvm_module_compilation_parallelism(false);
+  debug_options.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false);
+  debug_options.set_xla_gpu_enable_libnvptxcompiler(false);
+  debug_options.set_xla_gpu_libnvjitlink_mode(
+      xla::DebugOptions::LIB_NV_JIT_LINK_MODE_DISABLED);
+  debug_options.set_xla_gpu_cuda_data_dir(cuda_dir);
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<CompilationProvider> compilation_provider,
+      AssembleCompilationProvider(debug_options));
+
+  EXPECT_THAT(compilation_provider->name(),
+              HasSubstr("SubprocessCompilationProvider"));
+}
+
+TEST(
+    AssembleCompilationProviderTest,
+    OffersLibNvJitLinkWithParallelCompilationShimIfLibNvPtxCompilerIsDisabled) {
+  if (!IsLibNvJitLinkSupported()) {
+    GTEST_SKIP() << "LibNvJitLink is not supported in this build.";
+  }
+
+  xla::DebugOptions debug_options;
+  debug_options.set_xla_gpu_enable_llvm_module_compilation_parallelism(false);
+  debug_options.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false);
+  debug_options.set_xla_gpu_enable_libnvptxcompiler(false);
+  debug_options.set_xla_gpu_libnvjitlink_mode(
+      xla::DebugOptions::LIB_NV_JIT_LINK_MODE_AUTO);
+  debug_options.set_xla_gpu_cuda_data_dir("/does/not/exist");
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<CompilationProvider> compilation_provider,
+      AssembleCompilationProvider(debug_options));
+
+  EXPECT_THAT(compilation_provider->name(),
+              AllOf(HasSubstr("DeferRelocatableCompilation"),
+                    HasSubstr("NvJitLinkCompilationProvider")));
+}
+
+TEST(AssembleCompilationProviderTest,
+     OffersLibNvJitLinkAndLibNvPtxCompilerIfBothAreEnabled) {
+  if (!IsLibNvJitLinkSupported()) {
+    GTEST_SKIP() << "LibNvJitLink is not supported in this build.";
+  }
+  if (!IsLibNvPtxCompilerSupported()) {
+    GTEST_SKIP() << "LibNvPtxCompiler is not supported in this build.";
+  }
+
+  xla::DebugOptions debug_options;
+  debug_options.set_xla_gpu_enable_llvm_module_compilation_parallelism(false);
+  debug_options.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false);
+  debug_options.set_xla_gpu_enable_libnvptxcompiler(true);
+  debug_options.set_xla_gpu_libnvjitlink_mode(
+      xla::DebugOptions::LIB_NV_JIT_LINK_MODE_AUTO);
+  debug_options.set_xla_gpu_cuda_data_dir("/does/not/exist");
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<CompilationProvider> compilation_provider,
+      AssembleCompilationProvider(debug_options));
+
+  EXPECT_THAT(compilation_provider->name(),
+              AllOf(HasSubstr("CompositeCompilationProvider"),
+                    HasSubstr("NvJitLinkCompilationProvider"),
+                    HasSubstr("NvptxcompilerCompilationProvider")));
+}
+
+}  // namespace
+}  // namespace stream_executor::cuda
diff --git a/xla/stream_executor/cuda/subprocess_compilation.cc b/xla/stream_executor/cuda/subprocess_compilation.cc
index b3885becc7b05..93e6cc73f5c2b 100644
--- a/xla/stream_executor/cuda/subprocess_compilation.cc
+++ b/xla/stream_executor/cuda/subprocess_compilation.cc
@@ -213,7 +213,7 @@ absl::StatusOr<std::string> FindCudaExecutable(
                             kNoExcludedVersions);
 }
 
-static absl::StatusOr<std::string> FindPtxAsExecutable(
+absl::StatusOr<std::string> FindPtxAsExecutable(
     std::string_view preferred_cuda_dir) {
   static constexpr SemanticVersion kMinimumSupportedPtxAsVersion{11, 8, 0};
   static constexpr SemanticVersion kBuggyPtxAsVersions[] = {{12, 3, 103}};
@@ -453,7 +453,7 @@ absl::StatusOr<std::vector<uint8_t>> BundleGpuAsmUsingFatbin(
   return std::vector<uint8_t>(result_blob.begin(), result_blob.end());
 }
 
-static absl::StatusOr<std::string> FindNvlinkExecutable(
+absl::StatusOr<std::string> FindNvlinkExecutable(
     std::string_view preferred_cuda_dir) {
   static constexpr SemanticVersion kMinimumNvlinkVersion{11, 8, 0};
   static constexpr absl::Span<const SemanticVersion> kNoExcludedVersions{};
diff --git a/xla/stream_executor/cuda/subprocess_compilation.h b/xla/stream_executor/cuda/subprocess_compilation.h
index b38b59d2977b5..6bb374b9d6af6 100644
--- a/xla/stream_executor/cuda/subprocess_compilation.h
+++ b/xla/stream_executor/cuda/subprocess_compilation.h
@@ -61,6 +61,16 @@ absl::StatusOr<std::string> FindCudaExecutable(
 absl::StatusOr<std::string> FindCudaExecutable(
     std::string_view binary_name, std::string_view preferred_cuda_dir);
 
+// Returns the path to the first found ptxas binary that fulfills our version
+// requirements.
+absl::StatusOr<std::string> FindPtxAsExecutable(
+    std::string_view preferred_cuda_dir);
+
+// Returns the path to the first found nvlink binary that fulfills our version
+// requirements.
+absl::StatusOr<std::string> FindNvlinkExecutable(
+    std::string_view preferred_cuda_dir);
+
 // Runs tool --version and parses its version string. All the usual CUDA
 // tools are supported.
 absl::StatusOr<SemanticVersion> GetToolVersion(std::string_view tool_path);