Skip to content

Commit

Permalink
Allow overriding the Lmod GPU driver check
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Top committed May 16, 2024
1 parent 458d0e1 commit 9a09643
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
3 changes: 3 additions & 0 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ fi
# if not, an error is produced, and the bot flags the whole build as failed (even when not installing GPU software)
# ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh

# Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway)
export EESSI_OVERRIDE_GPU_CHECK=1

# use PR patch file to determine in which easystack files stuff was added
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing')
if [ -z "${changed_easystacks}" ]; then
Expand Down
9 changes: 6 additions & 3 deletions create_lmodsitepackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,9 @@
end
-- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the NESSI linker,
-- otherwise, refuse to load the requested module and print error message
local haveGpu = mt:haveProperty(simpleName,"arch","gpu")
if haveGpu then
local checkGpu = mt:haveProperty(simpleName,"arch","gpu")
local overrideGpuCheck = os.getenv("EESSI_OVERRIDE_GPU_CHECK")
if checkGpu and (overrideGpuCheck == nil) then
local arch = os.getenv("EESSI_CPU_FAMILY") or ""
local cudaVersionFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt"
local cudaDriverFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/libcuda.so"
Expand All @@ -140,7 +141,9 @@
if not (cudaDriverExists or singularityCudaExists) then
local advice = "which relies on the CUDA runtime environment and driver libraries. "
advice = advice .. "In order to be able to use the module, you will need "
advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system.\\n"
advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system. You can "
advice = advice .. "override this check by setting the environment variable EESSI_OVERRIDE_GPU_CHECK but "
advice = advice .. "the loaded application will not be able to execute on your system.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYou requested to load ", simpleName, " ", advice)
else
Expand Down

0 comments on commit 9a09643

Please sign in to comment.