Skip to content

Commit

Permalink
Sync local branch with remote
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Top committed May 21, 2024
2 parents 4c9b5d0 + 34a29a4 commit 7bef856
Show file tree
Hide file tree
Showing 10 changed files with 341 additions and 7 deletions.
14 changes: 13 additions & 1 deletion EESSI-extend-2023.06-easybuild.eb
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,19 @@ elseif (os.getenv("NESSI_SITE_INSTALL") ~= nil) then
if ((os.getenv("NESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("NESSI_USER_INSTALL") ~= nil)) then
LmodError("You cannot use NESSI_SITE_INSTALL in combination with any other NESSI_*_INSTALL environment variables")
end
easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), 'versions', 'host_injections')
site_install = os.getenv("NESSI_SITE_INSTALL")
site_modulepath = nil
if (site_install ~= nil) then
-- Check the folder exists
if not isDir(site_install) then
LmodError("The location of NESSI_SITE_INSTALL (" .. site_install .. ") does not exist or is not a folder")
end
if (mode() == "load") then
LmodMessage("Configuring for use of NESSI_SITE_INSTALL under " .. site_install)
end
easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), os.getenv("EESSI_CVMFS_REPO"), site_install)
site_modulepath = pathJoin(easybuild_installpath, 'modules', 'all')
end
else
-- Deal with user and project installs
project_install = os.getenv("NESSI_PROJECT_INSTALL")
Expand Down
5 changes: 3 additions & 2 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,15 @@ pr_diff=$(ls [0-9]*.diff | head -1)
# for now, this just reinstalls all scripts. Note the most elegant, but works
${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}

# Install full CUDA SDK in host_injections
# Install full CUDA SDK and cu* libraries in host_injections
# Hardcode this for now, see if it works
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
# Allow skipping CUDA SDK install in e.g. CI environments
if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh -c 12.1.1 -d 8.9.2.26
else
echo "Skipping installation of CUDA SDK in host_injections, since the --skip-cuda-install flag was passed"
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed"
fi

# Install drivers in host_injections
Expand Down
29 changes: 27 additions & 2 deletions create_lmodsitepackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,13 +174,38 @@
end
end
local function eessi_cudnn_enabled_load_hook(t)
local frameStk = require("FrameStk"):singleton()
local mt = frameStk:mt()
local simpleName = string.match(t.modFullName, "(.-)/")
-- If we try to load cuDNN itself, check if the full cuDNN package was installed on the host in host_injections.
-- This is required for end users to build additional cuDNN dependent software. If the full SDK isn't present, refuse
-- to load the cuDNN module and print an informative message on how to set up GPU support for NESSI
local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n"
if simpleName == 'cuDNN' then
-- get the full host_injections path
local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
-- build final path where the cuDNN software should be installed
local cudnnEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
local cudnnDirExists = isDir(cudnnEasyBuildDir)
if not cudnnDirExists then
local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI "
advice = advice .. "due to licencing. You will need to install a full copy of the cuDNN package where NESSI "
advice = advice .. "can find it.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYou requested to load ", simpleName, " ", advice)
end
end
end
-- Combine both functions into a single one, as we can only register one function as load hook in lmod
-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed
function eessi_load_hook(t)
-- Only apply CUDA hooks if the loaded module is in the NESSI prefix
-- This avoids getting an Lmod Error when trying to load a CUDA module from a local software stack
-- Only apply CUDA and cuDNN hooks if the loaded module is in the NESSI prefix
-- This avoids getting an Lmod Error when trying to load a CUDA or cuDNN module from a local software stack
if from_eessi_prefix(t) then
eessi_cuda_enabled_load_hook(t)
eessi_cudnn_enabled_load_hook(t)
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ easyconfigs:
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451;
options:
from-pr: 19451
- cuDNN-8.9.2.26-CUDA-12.1.1.eb
- OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# 2024-05-19
# Rebuild NESSI-extend/2023.06-easybuild
# The current version does not handle NESSI_SITE_INSTALL correctly.
easyconfigs:
- EESSI-extend-2023.06-easybuild.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# 2024-05-20
# Rebuild NESSI-extend/2023.06-easybuild
# Need to revert to the original version.
easyconfigs:
- EESSI-extend-2023.06-easybuild.eb
76 changes: 76 additions & 0 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,62 @@ def post_sanitycheck_cuda(self, *args, **kwargs):
raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!")


def post_sanitycheck_cuDNN(self, *args, **kwargs):
"""
Remove files from cuDNN installation that we are not allowed to ship,
and replace them with a symlink to a corresponding installation under host_injections.
"""
if self.name == 'cuDNN':
print_msg("Replacing files in cuDNN installation that we can not ship with symlinks to host_injections...")

allowlist = ['LICENSE']

# read cuDNN LICENSE, construct allowlist based on section 2.6 that specifies list of files that can be shipped
license_path = os.path.join(self.installdir, 'LICENSE')
search_string = "2. Distribution. The following portions of the SDK are distributable under the Agreement:"
with open(license_path) as infile:
for line in infile:
if line.strip().startswith(search_string):
# remove search string, split into words, remove trailing
# dots '.' and only retain words starting with a dot '.'
distributable = line[len(search_string):]
for word in distributable.split():
if word[0] == '.':
allowlist.append(word.rstrip('.'))

allowlist = sorted(set(allowlist))
self.log.info("Allowlist for files in cuDNN installation that can be redistributed: " + ', '.join(allowlist))

# iterate over all files in the CUDA installation directory
for dir_path, _, files in os.walk(self.installdir):
for filename in files:
full_path = os.path.join(dir_path, filename)
# we only really care about real files, i.e. not symlinks
if not os.path.islink(full_path):
# check if the current file is part of the allowlist
basename = filename.split('.')[0]
if '.' in filename:
extension = '.' + filename.split('.')[1]
if basename in allowlist:
self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path)
elif '.' in filename and extension in allowlist:
self.log.debug("%s is found in allowlist, so keeping it: %s", extension, full_path)
else:
self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s",
filename, full_path)
# if it is not in the allowlist, delete the file and create a symlink to host_injections
host_inj_path = full_path.replace('versions', 'host_injections')
# make sure source and target of symlink are not the same
if full_path == host_inj_path:
raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you "
"are using this hook for a NESSI installation?",
full_path, host_inj_path)
remove_file(full_path)
symlink(host_inj_path, full_path)
else:
raise EasyBuildError("cuDNN-specific hook triggered for non-cuDNN easyconfig?!")


def inject_gpu_property(ec):
"""
Add 'gpu' property, via modluafooter easyconfig parameter
Expand All @@ -712,6 +768,25 @@ def inject_gpu_property(ec):
ec[key] = '\n'.join([ec_dict[key], value])
else:
ec[key] = value
# Check if cuDNN is in the dependencies, if so add the 'gpu' Lmod property
if ('cuDNN' in [dep[0] for dep in iter(ec_dict['dependencies'])]):
ec.log.info("Injecting gpu as Lmod arch property and envvar with cuDNN version")
key = 'modluafooter'
value = 'add_property("arch","gpu")'
cudnn_version = 0
for dep in iter(ec_dict['dependencies']):
# Make cuDNN a build dependency only (rpathing saves us from link errors)
if 'cuDNN' in dep[0]:
cudnn_version = dep[1]
ec_dict['dependencies'].remove(dep)
if dep not in ec_dict['builddependencies']:
ec_dict['builddependencies'].append(dep)
value = '\n'.join([value, 'setenv("EESSICUDNNVERSION","%s")' % cudnn_version])
if key in ec_dict:
if not value in ec_dict[key]:
ec[key] = '\n'.join([ec_dict[key], value])
else:
ec[key] = value
return ec


Expand Down Expand Up @@ -768,4 +843,5 @@ def inject_gpu_property(ec):

POST_SANITYCHECK_HOOKS = {
'CUDA': post_sanitycheck_cuda,
'cuDNN': post_sanitycheck_cuDNN,
}
1 change: 0 additions & 1 deletion eessi_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,6 @@ if [[ "${ACCESS}" == "rw" ]]; then
EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}")

EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs"
EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}"
if [[ ! -z ${LOWER_DIRS} ]]; then
# need to convert ':' in LOWER_DIRS to ',' because bind mounts use ',' as
# separator while the lowerdir overlayfs option uses ':'
Expand Down
2 changes: 1 addition & 1 deletion install_scripts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ copy_files_by_list ${TOPDIR}/scripts ${INSTALL_PREFIX}/scripts "${script_files[@

# Copy files for the scripts/gpu_support/nvidia directory
nvidia_files=(
install_cuda_host_injections.sh link_nvidia_host_libraries.sh
install_cuda_host_injections.sh install_cuDNN_host_injections.sh link_nvidia_host_libraries.sh
)
copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}"

Expand Down
Loading

0 comments on commit 7bef856

Please sign in to comment.