From 2050a89364e6bc7c45c8a7e75187180bcba09336 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 5 May 2024 14:08:51 +0200 Subject: [PATCH 01/25] {2023.06}[foss/2023a] cuDNN v8.9.2.26 --- .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml index 3018901ca9..e948a2e55d 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml @@ -26,3 +26,4 @@ easyconfigs: # from-commit: ae2fc38307b56ae7ac12dff95c9d07404e1a8530 # trying from-pr as an alternative from-pr: 20379 + - cuDNN-8.9.2.26-CUDA-12.1.1.eb From 1f0206f56a6ce9a3797d7c8e60402de7390289b7 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 5 May 2024 19:00:41 +0200 Subject: [PATCH 02/25] add post sanitycheck hook for cuDNN --- eb_hooks.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/eb_hooks.py b/eb_hooks.py index 199dab8e54..44877103ea 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -688,6 +688,62 @@ def post_sanitycheck_cuda(self, *args, **kwargs): raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!") +def post_sanitycheck_cuDNN(self, *args, **kwargs): + """ + Remove files from cuDNN installation that we are not allowed to ship, + and replace them with a symlink to a corresponding installation under host_injections. + """ + if self.name == 'cuDNN': + print_msg("Replacing files in cuDNN installation that we can not ship with symlinks to host_injections...") + + allowlist = ['LICENSE'] + + # read cuDNN LICENSE, construct allowlist based on section 2.6 that specifies list of files that can be shipped + license_path = os.path.join(self.installdir, 'LICENSE') + search_string = "2. Distribution. The following portions of the SDK are distributable under the Agreement:" + with open(license_path) as infile: + for line in infile: + if line.strip().startswidth(search_string): + # remove search string, split into words, remove trailing + # dots '.' and only retain words starting with a dot '.' + distributable = line[len(search_string):] + for word in distributable.split(): + if word[0] == '.': + allowlist.append(word.rstrip('.')) + + allowlist = sorted(set(allowlist)) + self.log.info("Allowlist for files in cuDNN installation that can be redistributed: " + ', '.join(allowlist)) + + # iterate over all files in the CUDA installation directory + for dir_path, _, files in os.walk(self.installdir): + for filename in files: + full_path = os.path.join(dir_path, filename) + # we only really care about real files, i.e. not symlinks + if not os.path.islink(full_path): + # check if the current file is part of the allowlist + basename = filename.split('.')[0] + if '.' in filename: + extension = '.' + filename.split('.')[1] + if basename in allowlist: + self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) + elif '.' in filename and extension in allowlist: + self.log.debug("%s is found in allowlist, so keeping it: %s", extension, full_path) + else: + self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s", + filename, full_path) + # if it is not in the allowlist, delete the file and create a symlink to host_injections + host_inj_path = full_path.replace('versions', 'host_injections') + # make sure source and target of symlink are not the same + if full_path == host_inj_path: + raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " + "are using this hook for a NESSI installation?", + full_path, host_inj_path) + remove_file(full_path) + symlink(host_inj_path, full_path) + else: + raise EasyBuildError("cuDNN-specific hook triggered for non-cuDNN easyconfig?!") + + def inject_gpu_property(ec): """ Add 'gpu' property, via modluafooter easyconfig parameter @@ -768,4 +824,5 @@ def inject_gpu_property(ec): POST_SANITYCHECK_HOOKS = { 'CUDA': post_sanitycheck_cuda, + 'cuDNN': post_sanitycheck_cuDNN, } From 889ee40a9e4422b20249faa77638127db8d8f1e4 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 5 May 2024 20:12:45 +0200 Subject: [PATCH 03/25] fix function name typo --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 44877103ea..223b14455d 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -703,7 +703,7 @@ def post_sanitycheck_cuDNN(self, *args, **kwargs): search_string = "2. Distribution. The following portions of the SDK are distributable under the Agreement:" with open(license_path) as infile: for line in infile: - if line.strip().startswidth(search_string): + if line.strip().startswith(search_string): # remove search string, split into words, remove trailing # dots '.' and only retain words starting with a dot '.' distributable = line[len(search_string):] From 31c5e800f7b3955d5fb226ca20194f889a8a0e54 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 6 May 2024 17:50:04 +0200 Subject: [PATCH 04/25] add installation of cuDNN under host_injections --- EESSI-install-software.sh | 5 +- .../nvidia/install_cuDNN_host_injections.sh | 210 ++++++++++++++++++ 2 files changed, 213 insertions(+), 2 deletions(-) create mode 100755 scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 6c680571e2..a1591958d1 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -199,14 +199,15 @@ pr_diff=$(ls [0-9]*.diff | head -1) # for now, this just reinstalls all scripts. Note the most elegant, but works ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} -# Install full CUDA SDK in host_injections +# Install full CUDA SDK and cu* libraries in host_injections # Hardcode this for now, see if it works # TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install # Allow skipping CUDA SDK install in e.g. CI environments if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cudnn_host_injections.sh -c 12.1.1 -d 8.9.2.26 else - echo "Skipping installation of CUDA SDK in host_injections, since the --skip-cuda-install flag was passed" + echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed" fi # Install drivers in host_injections diff --git a/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh b/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh new file mode 100755 index 0000000000..7585e51458 --- /dev/null +++ b/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh @@ -0,0 +1,210 @@ +#!/usr/bin/env bash + +# This script can be used to install cuDNN under the `.../host_injections` directory. +# This provides the parts of the cuDNN installation that cannot be redistributed as +# part of NESSI due to license limitations. While GPU-based software from NESSI will +# _run_ without these, installation of additional software that requires the cuDNN +# installation(s) under `host_injections` to be present. +# +# The `host_injections` directory is a variant symlink that by default points to +# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see +# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the +# installation to be successful, this directory needs to be writeable by the user +# executing this script. + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../../utils.sh + +# Function to display help message +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --help Display this help message" + echo " -c, --cuda-version CUDA_VERSION Specify a version of CUDA to be used" + echo " when installing cuDNN (must" + echo " have a corresponding easyconfig in the" + echo " EasyBuild release)" + echo " -d, --cudnn-version CUDNN_VERSION Specify a version of cuDNN to install (must" + echo " have a corresponding easyconfig in the" + echo " EasyBuild release)" + echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" + echo " storage during the cuDNN install" + echo " (must have >10GB available)" +} + +# Initialize variables +cuda_version="" +cudnn_version="" + +# Parse command-line options +while [[ $# -gt 0 ]]; do + case "$1" in + --help) + show_help + exit 0 + ;; + -c|--cuda-version) + if [ -n "$2" ]; then + cuda_version="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + -d|--cudnn-version) + if [ -n "$2" ]; then + cudnn_version="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + -t|--temp-dir) + if [ -n "$2" ]; then + CUDA_TEMP_DIR="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + *) + show_help + fatal_error "Error: Unknown option: $1" + ;; + esac +done + +# Make sure NESSI is initialised +check_eessi_initialised + +# Make sure the CUDA version supplied is a semantic version +is_semantic_version() { + local version=$1 + local regex='^[0-9]+\.[0-9]+\.[0-9]+$' + + if [[ $version =~ $regex ]]; then + return 0 # Return success (0) if it's a semantic version + else + return 1 # Return failure (1) if it's not a semantic version + fi +} +if ! is_semantic_version "$cuda_version"; then + show_help + error="\nYou must provide a semantic version for CUDA (e.g., 12.1.1) via the appropriate\n" + error="${error}command line option. This script is intended for use with NESSI so the 'correct'\n" + error="${error}version to provide is probably one of those available under\n" + error="${error}$EESSI_SOFTWARE_PATH/software/cuDNN\n" + fatal_error "${error}" +fi + +# As an installation location just use $EESSI_SOFTWARE_PATH but replacing `versions` with `host_injections` +cudnn_install_parent=${EESSI_SOFTWARE_PATH/versions/host_injections} + +# Only install cuDNN if specified version is not found. +# (existence of easybuild subdir implies a successful install) +if [ -d "${cudnn_install_parent}"/software/cuDNN/*-CUDA-"${cuda_version}"/easybuild ]; then + echo_green "cuDNN software found! No need to install cuDNN again." +else + # We need to be able write to the installation space so let's make sure we can + if ! create_directory_structure "${cudnn_install_parent}"/software/cuDNN ; then + fatal_error "No write permissions to directory ${cudnn_install_parent}/software/cuDNN" + fi + + # we need a directory we can use for temporary storage + if [[ -z "${CUDA_TEMP_DIR}" ]]; then + tmpdir=$(mktemp -d) + else + tmpdir="${CUDA_TEMP_DIR}"/temp + if ! mkdir "$tmpdir" ; then + fatal_error "Could not create directory ${tmpdir}" + fi + fi + + required_space_in_tmpdir=50000 + # Let's see if we have sources and build locations defined if not, we use the temporary space + if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then + export EASYBUILD_BUILDPATH=${tmpdir}/build + required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000)) + fi + if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then + export EASYBUILD_SOURCEPATH=${tmpdir}/sources + required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000)) + fi + + # The install is pretty fat, you need lots of space for download/unpack/install (~3*5GB), + # need to do a space check before we proceed + avail_space=$(df --output=avail "${cudnn_install_parent}"/ | tail -n 1 | awk '{print $1}') + if (( avail_space < 5000000 )); then + fatal_error "Need at least 5GB disk space to install cuDNN under ${cudnn_install_parent}, exiting now..." + fi + avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}') + if (( avail_space < required_space_in_tmpdir )); then + error="Need at least ${required_space_in_tmpdir} disk space under ${tmpdir}.\n" + error="${error}Set the environment variable CUDA_TEMP_DIR to a location with adequate space to pass this check." + error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH " + error="${error}to reduce this requirement. Exiting now..." + fatal_error "${error}" + fi + + if ! command -v "eb" &>/dev/null; then + echo_yellow "Attempting to load an EasyBuild module to do actual install" + module load EasyBuild + # There are some scenarios where this may fail + if [ $? -ne 0 ]; then + error="'eb' command not found in your environment and\n" + error="${error} module load EasyBuild\n" + error="${error}failed for some reason.\n" + error="${error}Please re-run this script with the 'eb' command available." + fatal_error "${error}" + fi + fi + + cudnn_easyconfig="cuDNN-${cudnn_version}-CUDA-${cuda_version}.eb" + + # Check the easyconfig file is available in the release + # (eb search always returns 0, so we need a grep to ensure a usable exit code) + eb --search ^${cudnn_easyconfig}|grep cuDNN > /dev/null 2>&1 + # Check the exit code + if [ $? -ne 0 ]; then + eb_version=$(eb --version) + available_cudnn_easyconfigs=$(eb --search ^cuDNN-*.eb|grep cuDNN) + + error="The easyconfig ${cudnn_easyconfig} was not found in EasyBuild version:\n" + error="${error} ${eb_version}\n" + error="${error}You either need to give a different version of CUDA to install _or_ \n" + error="${error}use a different version of EasyBuild for the installation.\n" + error="${error}\nThe versions of available with the current eb command are:\n" + error="${error}${available_cudnn_easyconfigs}" + fatal_error "${error}" + fi + + # We need the --rebuild option, as the cuDNN module may or may not be on the + # `MODULEPATH` yet. Even if it is, we still want to redo this installation + # since it will provide the symlinked targets for the parts of the cuDNN + # installation in the `.../versions/...` prefix + # We install the module in our `tmpdir` since we do not need the modulefile, + # we only care about providing the targets for the symlinks. + extra_args="--rebuild --installpath-modules=${tmpdir}" + + # We don't want hooks used in this install, we need a vanilla cuDNN installation + touch "$tmpdir"/none.py + # shellcheck disable=SC2086 # Intended splitting of extra_args + eb --prefix="$tmpdir" ${extra_args} --hooks="$tmpdir"/none.py --installpath="${cudnn_install_parent}"/ "${cudnn_easyconfig}" + ret=$? + if [ $ret -ne 0 ]; then + eb_last_log=$(unset EB_VERBOSE; eb --last-log) + cp -a ${eb_last_log} . + fatal_error "cuDNN installation failed, please check EasyBuild logs $(basename ${eb_last_log})..." + else + echo_green "cuDNN installation at ${cudnn_install_parent}/software/cuDNN/${cudnn_version}-CUDA-${cuda_version} succeeded!" + fi + # clean up tmpdir + rm -rf "${tmpdir}" +fi From f57b76dc53e0517f8e2ae405daedad3ee66590e4 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 15 May 2024 19:57:18 +0200 Subject: [PATCH 05/25] drop dependency on cuDNN to builddependency --- eb_hooks.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/eb_hooks.py b/eb_hooks.py index a778f8b7c1..cf1c911b23 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -768,6 +768,25 @@ def inject_gpu_property(ec): ec[key] = '\n'.join([ec_dict[key], value]) else: ec[key] = value + # Check if cuDNN is in the dependencies, if so add the 'gpu' Lmod property + if ('cuDNN' in [dep[0] for dep in iter(ec_dict['dependencies'])]): + ec.log.info("Injecting gpu as Lmod arch property and envvar with cuDNN version") + key = 'modluafooter' + value = 'add_property("arch","gpu")' + cudnn_version = 0 + for dep in iter(ec_dict['dependencies']): + # Make cuDNN a build dependency only (rpathing saves us from link errors) + if 'cuDNN' in dep[0]: + cudnn_version = dep[1] + ec_dict['dependencies'].remove(dep) + if dep not in ec_dict['builddependencies']: + ec_dict['builddependencies'].append(dep) + value = '\n'.join([value, 'setenv("EESSICUDNNVERSION","%s")' % cudnn_version]) + if key in ec_dict: + if not value in ec_dict[key]: + ec[key] = '\n'.join([ec_dict[key], value]) + else: + ec[key] = value return ec From e5f9fa719aad28a26d4e6da8925b910f9a3e360c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 15 May 2024 20:03:17 +0200 Subject: [PATCH 06/25] Lmod hook for cuDNN --- create_lmodsitepackage.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index f9053cdf9e..5b32578d24 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -171,13 +171,38 @@ end end +local function eessi_cudnn_enabled_load_hook(t) + local frameStk = require("FrameStk"):singleton() + local mt = frameStk:mt() + local simpleName = string.match(t.modFullName, "(.-)/") + -- If we try to load cuDNN itself, check if the full cuDNN package was installed on the host in host_injections. + -- This is required for end users to build additional cuDNN dependent software. If the full SDK isn't present, refuse + -- to load the cuDNN module and print an informative message on how to set up GPU support for NESSI + local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" + if simpleName == 'cuDNN' then + -- get the full host_injections path + local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') + -- build final path where the cuDNN software should be installed + local cudnnEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" + local cudnnDirExists = isDir(cudnnEasyBuildDir) + if not cudnnDirExists then + local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI " + advice = advice .. "due to licencing. You will need to install a full copy of the cuDNN package where NESSI " + advice = advice .. "can find it.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYou requested to load ", simpleName, " ", advice) + end + end +end + -- Combine both functions into a single one, as we can only register one function as load hook in lmod -- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed function eessi_load_hook(t) - -- Only apply CUDA hooks if the loaded module is in the NESSI prefix - -- This avoids getting an Lmod Error when trying to load a CUDA module from a local software stack + -- Only apply CUDA and cuDNN hooks if the loaded module is in the NESSI prefix + -- This avoids getting an Lmod Error when trying to load a CUDA or cuDNN module from a local software stack if from_eessi_prefix(t) then eessi_cuda_enabled_load_hook(t) + eessi_cudnn_enabled_load_hook(t) end end From 0205e890f5810ab46836372798880b99a3e57edd Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 16 May 2024 19:50:48 +0200 Subject: [PATCH 07/25] copy cuDNN install file and fix name --- EESSI-install-software.sh | 2 +- install_scripts.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 4e06abf3d0..d840910516 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -205,7 +205,7 @@ ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} # Allow skipping CUDA SDK install in e.g. CI environments if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cudnn_host_injections.sh -c 12.1.1 -d 8.9.2.26 + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh -c 12.1.1 -d 8.9.2.26 else echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed" fi diff --git a/install_scripts.sh b/install_scripts.sh index 17f0b81008..8bbcb6a7bf 100755 --- a/install_scripts.sh +++ b/install_scripts.sh @@ -110,7 +110,7 @@ copy_files_by_list ${TOPDIR}/scripts ${INSTALL_PREFIX}/scripts "${script_files[@ # Copy files for the scripts/gpu_support/nvidia directory nvidia_files=( - install_cuda_host_injections.sh link_nvidia_host_libraries.sh + install_cuda_host_injections.sh install_cuDNN_host_injections.sh link_nvidia_host_libraries.sh ) copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}" From 020c2332f98b2c566d3a19bd783a635df586cfab Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 16:20:32 +0200 Subject: [PATCH 08/25] use NESSI_SITE_INSTALL when it is set --- EESSI-extend-2023.06-easybuild.eb | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/EESSI-extend-2023.06-easybuild.eb b/EESSI-extend-2023.06-easybuild.eb index ed71ee5b53..d514293706 100644 --- a/EESSI-extend-2023.06-easybuild.eb +++ b/EESSI-extend-2023.06-easybuild.eb @@ -95,7 +95,19 @@ elseif (os.getenv("NESSI_SITE_INSTALL") ~= nil) then if ((os.getenv("NESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("NESSI_USER_INSTALL") ~= nil)) then LmodError("You cannot use NESSI_SITE_INSTALL in combination with any other NESSI_*_INSTALL environment variables") end - easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), 'versions', 'host_injections') + site_install = os.getenv("NESSI_SITE_INSTALL") + site_modulepath = nil + if (site_install ~= nil) then + -- Check the folder exists + if not isDir(site_install) then + LmodError("The location of NESSI_SITE_INSTALL (" .. site_install .. ") does not exist or is not a folder") + end + if (mode() == "load") then + LmodMessage("Configuring for use of NESSI_SITE_INSTALL under " .. site_install) + end + easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), os.getenv("EESSI_CVMFS_REPO"), site_install) + site_modulepath = pathJoin(easybuild_installpath, 'modules', 'all') + end else -- Deal with user and project installs project_install = os.getenv("NESSI_PROJECT_INSTALL") From e250652be692576fd6e2b4e348032791f16d2b84 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 16:59:06 +0200 Subject: [PATCH 09/25] rebuild NESSI-extend module --- .../2023.06/rebuilds/20240519-update-NESSI-extend-module.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml diff --git a/easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml new file mode 100644 index 0000000000..fbb323ff2e --- /dev/null +++ b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml @@ -0,0 +1,5 @@ +# 2024-05-19 +# Rebuild NESSI-extend/2023.06-easybuild +# The current version does not handle NESSI_SITE_INSTALL correctly. +easyconfigs: + - EESSI-extend-2023.06-easybuild.eb From 0bd90fa141564d0deabb3af15a0380ffa2656fac Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 18:27:42 +0200 Subject: [PATCH 10/25] rename rebuild easystack file --- ...dule.yml => 20240519-eb-4.9.1-rebuild-NESSI-extend-module.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename easystacks/pilot.nessi.no/2023.06/rebuilds/{20240519-update-NESSI-extend-module.yml => 20240519-eb-4.9.1-rebuild-NESSI-extend-module.yml} (100%) diff --git a/easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-eb-4.9.1-rebuild-NESSI-extend-module.yml similarity index 100% rename from easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml rename to easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-eb-4.9.1-rebuild-NESSI-extend-module.yml From a261b4c133ff4eab789a36b7cf824beae57f3dad Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 18:51:43 +0200 Subject: [PATCH 11/25] drop extra lowerdir parameter --- eessi_container.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi_container.sh b/eessi_container.sh index ad9397318a..962ce2c101 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -625,7 +625,7 @@ if [[ "${ACCESS}" == "rw" ]]; then EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs" - EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}" + # EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}" if [[ ! -z ${LOWER_DIRS} ]]; then # need to convert ':' in LOWER_DIRS to ',' because bind mounts use ',' as # separator while the lowerdir overlayfs option uses ':' From 22c5cd4a98baa10a945da8cc9e492d47ceec8a39 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 19:01:08 +0200 Subject: [PATCH 12/25] show contents of extra lowerdir --- bot/build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bot/build.sh b/bot/build.sh index 23f5fd952b..0d9a314a4c 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -231,6 +231,8 @@ else chmod u+rw ${STORAGE}/lower_dirs/${remove_file} done + ls -lR ${STORAGE}/lower_dirs + # prepare directory to store tarball of tmp for removal and build steps TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} From 27ca2fafbde0f3e7e79b0c0a8bbf066c98faa213 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 20:26:09 +0200 Subject: [PATCH 13/25] use lower dirs also for build step --- bot/build.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bot/build.sh b/bot/build.sh index 0d9a314a4c..2a690ecb20 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -272,6 +272,9 @@ BUILD_STEP_ARGS+=("--nvidia" "all") if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi +if [[ ! -z ${LOWER_DIRS} ]]; then + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") +fi # create tmp file for output of build step build_outerr=$(mktemp build.outerr.XXXX) From 51671ee5055d75d543c6202127ac2f279c60d42f Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 20:48:41 +0200 Subject: [PATCH 14/25] list directory contents --- EESSI-install-software.sh | 2 ++ EESSI-remove-software.sh | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index d840910516..4c80a2649a 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -245,6 +245,8 @@ else if [ -f ${easystack_file} ]; then echo_green "Feeding easystack file ${easystack_file} to EasyBuild..." + ls -lisaR /cvmfs/pilot.nessi.no/versions/2023.06/software/linux/x86_64/amd/zen2/software/NESSI-extend + ${EB} --easystack ${TOPDIR}/${easystack_file} --robot ec=$? diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 651a22f311..e464a586c6 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -112,9 +112,13 @@ else for app in ${rebuild_apps}; do app_dir=${EASYBUILD_INSTALLPATH}/software/${app} app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua + ls -lisaR ${app_dir} + ls -lisaR ${app_module} echo_yellow "Removing ${app_dir} and ${app_module}..." rm -rdfv ${app_dir} rm -rdfv ${app_module} + ls -lisaR ${app_dir} + ls -lisaR ${app_module} done else fatal_error "Easystack file ${easystack_file} not found!" From a960a5fecbd76f9330665a6f0f61ab0a564b26cc Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 22:02:45 +0200 Subject: [PATCH 15/25] create copy of lower dirs (dirs only) + skip test step --- bot/{test.sh => _test.sh} | 0 bot/build.sh | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) rename bot/{test.sh => _test.sh} (100%) diff --git a/bot/test.sh b/bot/_test.sh similarity index 100% rename from bot/test.sh rename to bot/_test.sh diff --git a/bot/build.sh b/bot/build.sh index 2a690ecb20..c5fc9bb8a5 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -273,7 +273,20 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi if [[ ! -z ${LOWER_DIRS} ]]; then - BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") + # make copy of LOWER_DIRS but only retain directories + lower_parent_dir=$(dirname ${LOWER_DIRS}) + the_lower_dir=$(basename ${LOWER_DIRS}) + LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" + mkdir -p ${LOWER_DIRS_ONLY} + echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" + ls -lisaR ${LOWER_DIRS_ONLY} + cp -a ${LOWER_DIRS}/ ${LOWER_DIRS_ONLY} + echo "contents of LOWER_DIRS_ONLY (after cp -a)" + ls -lisaR ${LOWER_DIRS_ONLY} + find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; + echo "contents of LOWER_DIRS_ONLY (find ... rm)" + ls -lisaR ${LOWER_DIRS_ONLY} + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") fi # create tmp file for output of build step From 57555acd4a5ba98a0d5d7e04a6b01854a2a33ac7 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 22:14:20 +0200 Subject: [PATCH 16/25] fix copy command --- bot/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/build.sh b/bot/build.sh index c5fc9bb8a5..a12d3669fc 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -280,7 +280,7 @@ if [[ ! -z ${LOWER_DIRS} ]]; then mkdir -p ${LOWER_DIRS_ONLY} echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" ls -lisaR ${LOWER_DIRS_ONLY} - cp -a ${LOWER_DIRS}/ ${LOWER_DIRS_ONLY} + cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} echo "contents of LOWER_DIRS_ONLY (after cp -a)" ls -lisaR ${LOWER_DIRS_ONLY} find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; From 9f41c8eebe90a5589dd289707d6562659b59d011 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 22:41:39 +0200 Subject: [PATCH 17/25] move removal step into installation script --- EESSI-install-software.sh | 40 +++++++++++++++++++ bot/build.sh | 81 ++++++++++++++++++++------------------- 2 files changed, 82 insertions(+), 39 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 4c80a2649a..1d0f3ed470 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -218,6 +218,46 @@ fi # Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway) export EESSI_OVERRIDE_GPU_CHECK=1 +# before we actually install software, we need to remove software that is requested +# to be rebuilt (need to do this here because installations of software are read-only; +# also, it should be done in the same container run or fuse-overlayfs might get confused) +changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") +if [ -z ${changed_easystacks_rebuilds} ]; then + echo "No software needs to be removed." +else + for easystack_file in ${changed_easystacks_rebuilds}; do + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file + eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') + + # load EasyBuild module (will be installed if it's not available yet) + source ${TOPDIR}/load_easybuild_module.sh ${eb_version} + + if [ -f ${easystack_file} ]; then + echo_green "Software rebuild(s) requested in ${easystack_file}, so" + echo_green " determining which existing installation have to be removed (assuming contents" + echo_green " have been made writable/deletable)..." + # we need to remove existing installation directories first, + # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) + # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) + # rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + rebuild_apps=$(eb --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + for app in ${rebuild_apps}; do + app_dir=${EASYBUILD_INSTALLPATH}/software/${app} + app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua + ls -lisaR ${app_dir} + ls -lisaR ${app_module} + echo_yellow "Removing ${app_dir} and ${app_module}..." + rm -rdfv ${app_dir} + rm -rdfv ${app_module} + ls -lisaR ${app_dir} + ls -lisaR ${app_module} + done + else + fatal_error "Easystack file ${easystack_file} not found!" + fi + done +fi + # use PR patch file to determine in which easystack files stuff was added changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing') if [ -z "${changed_easystacks}" ]; then diff --git a/bot/build.sh b/bot/build.sh index a12d3669fc..2b08a8599c 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -233,31 +233,33 @@ else ls -lR ${STORAGE}/lower_dirs - # prepare directory to store tarball of tmp for removal and build steps - TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step - mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} - - # prepare arguments to eessi_container.sh specific to remove step - declare -a REMOVAL_STEP_ARGS=() - REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") - REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") - if [[ ! -z ${LOWER_DIRS} ]]; then - REMOVAL_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") - fi - - # create tmp file for output of removal step - removal_outerr=$(mktemp remove.outerr.XXXX) - - echo "Executing command to remove software:" - echo "./eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" - echo " -- ./EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" - ./eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ - -- ./EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} - - # make sure that the build step resumes from the same temporary directory - # this is important, as otherwise the removed software will still be there - REMOVAL_TMPDIR=$(grep ' as tmp directory ' ${removal_outerr} | cut -d ' ' -f 2) - BUILD_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") +# # prepare directory to store tarball of tmp for removal and build steps +# TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step +# mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} +# +#### +# # prepare arguments to eessi_container.sh specific to remove step +# declare -a REMOVAL_STEP_ARGS=() +# REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") +# REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") +# if [[ ! -z ${LOWER_DIRS} ]]; then +# REMOVAL_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") +# fi +# +# # create tmp file for output of removal step +# removal_outerr=$(mktemp remove.outerr.XXXX) +# +# echo "Executing command to remove software:" +# echo "./eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" +# echo " -- ./EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" +# ./eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ +# -- ./EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} +# +# # make sure that the build step resumes from the same temporary directory +# # this is important, as otherwise the removed software will still be there +# REMOVAL_TMPDIR=$(grep ' as tmp directory ' ${removal_outerr} | cut -d ' ' -f 2) +# BUILD_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") +#### fi # prepare directory to store tarball of tmp for build step @@ -273,20 +275,21 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi if [[ ! -z ${LOWER_DIRS} ]]; then - # make copy of LOWER_DIRS but only retain directories - lower_parent_dir=$(dirname ${LOWER_DIRS}) - the_lower_dir=$(basename ${LOWER_DIRS}) - LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" - mkdir -p ${LOWER_DIRS_ONLY} - echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" - ls -lisaR ${LOWER_DIRS_ONLY} - cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} - echo "contents of LOWER_DIRS_ONLY (after cp -a)" - ls -lisaR ${LOWER_DIRS_ONLY} - find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; - echo "contents of LOWER_DIRS_ONLY (find ... rm)" - ls -lisaR ${LOWER_DIRS_ONLY} - BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") +# # make copy of LOWER_DIRS but only retain directories +# lower_parent_dir=$(dirname ${LOWER_DIRS}) +# the_lower_dir=$(basename ${LOWER_DIRS}) +# LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" +# mkdir -p ${LOWER_DIRS_ONLY} +# echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" +# ls -lisaR ${LOWER_DIRS_ONLY} +# cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} +# echo "contents of LOWER_DIRS_ONLY (after cp -a)" +# ls -lisaR ${LOWER_DIRS_ONLY} +# find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; +# echo "contents of LOWER_DIRS_ONLY (find ... rm)" +# ls -lisaR ${LOWER_DIRS_ONLY} +# BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") fi # create tmp file for output of build step From a25cc1af28633dbff91037f6c009518b49cd1292 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:13:06 +0200 Subject: [PATCH 18/25] Revert "move removal step into installation script" This reverts commit 9f41c8eebe90a5589dd289707d6562659b59d011. --- EESSI-install-software.sh | 40 ------------------- bot/build.sh | 81 +++++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 82 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 1d0f3ed470..4c80a2649a 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -218,46 +218,6 @@ fi # Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway) export EESSI_OVERRIDE_GPU_CHECK=1 -# before we actually install software, we need to remove software that is requested -# to be rebuilt (need to do this here because installations of software are read-only; -# also, it should be done in the same container run or fuse-overlayfs might get confused) -changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") -if [ -z ${changed_easystacks_rebuilds} ]; then - echo "No software needs to be removed." -else - for easystack_file in ${changed_easystacks_rebuilds}; do - # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file - eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') - - # load EasyBuild module (will be installed if it's not available yet) - source ${TOPDIR}/load_easybuild_module.sh ${eb_version} - - if [ -f ${easystack_file} ]; then - echo_green "Software rebuild(s) requested in ${easystack_file}, so" - echo_green " determining which existing installation have to be removed (assuming contents" - echo_green " have been made writable/deletable)..." - # we need to remove existing installation directories first, - # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) - # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) - # rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') - rebuild_apps=$(eb --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') - for app in ${rebuild_apps}; do - app_dir=${EASYBUILD_INSTALLPATH}/software/${app} - app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua - ls -lisaR ${app_dir} - ls -lisaR ${app_module} - echo_yellow "Removing ${app_dir} and ${app_module}..." - rm -rdfv ${app_dir} - rm -rdfv ${app_module} - ls -lisaR ${app_dir} - ls -lisaR ${app_module} - done - else - fatal_error "Easystack file ${easystack_file} not found!" - fi - done -fi - # use PR patch file to determine in which easystack files stuff was added changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing') if [ -z "${changed_easystacks}" ]; then diff --git a/bot/build.sh b/bot/build.sh index 2b08a8599c..a12d3669fc 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -233,33 +233,31 @@ else ls -lR ${STORAGE}/lower_dirs -# # prepare directory to store tarball of tmp for removal and build steps -# TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step -# mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} -# -#### -# # prepare arguments to eessi_container.sh specific to remove step -# declare -a REMOVAL_STEP_ARGS=() -# REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") -# REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") -# if [[ ! -z ${LOWER_DIRS} ]]; then -# REMOVAL_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") -# fi -# -# # create tmp file for output of removal step -# removal_outerr=$(mktemp remove.outerr.XXXX) -# -# echo "Executing command to remove software:" -# echo "./eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" -# echo " -- ./EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" -# ./eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ -# -- ./EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} -# -# # make sure that the build step resumes from the same temporary directory -# # this is important, as otherwise the removed software will still be there -# REMOVAL_TMPDIR=$(grep ' as tmp directory ' ${removal_outerr} | cut -d ' ' -f 2) -# BUILD_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") -#### + # prepare directory to store tarball of tmp for removal and build steps + TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step + mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} + + # prepare arguments to eessi_container.sh specific to remove step + declare -a REMOVAL_STEP_ARGS=() + REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") + REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") + if [[ ! -z ${LOWER_DIRS} ]]; then + REMOVAL_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") + fi + + # create tmp file for output of removal step + removal_outerr=$(mktemp remove.outerr.XXXX) + + echo "Executing command to remove software:" + echo "./eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" + echo " -- ./EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" + ./eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ + -- ./EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} + + # make sure that the build step resumes from the same temporary directory + # this is important, as otherwise the removed software will still be there + REMOVAL_TMPDIR=$(grep ' as tmp directory ' ${removal_outerr} | cut -d ' ' -f 2) + BUILD_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") fi # prepare directory to store tarball of tmp for build step @@ -275,21 +273,20 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi if [[ ! -z ${LOWER_DIRS} ]]; then -# # make copy of LOWER_DIRS but only retain directories -# lower_parent_dir=$(dirname ${LOWER_DIRS}) -# the_lower_dir=$(basename ${LOWER_DIRS}) -# LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" -# mkdir -p ${LOWER_DIRS_ONLY} -# echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" -# ls -lisaR ${LOWER_DIRS_ONLY} -# cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} -# echo "contents of LOWER_DIRS_ONLY (after cp -a)" -# ls -lisaR ${LOWER_DIRS_ONLY} -# find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; -# echo "contents of LOWER_DIRS_ONLY (find ... rm)" -# ls -lisaR ${LOWER_DIRS_ONLY} -# BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") - BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") + # make copy of LOWER_DIRS but only retain directories + lower_parent_dir=$(dirname ${LOWER_DIRS}) + the_lower_dir=$(basename ${LOWER_DIRS}) + LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" + mkdir -p ${LOWER_DIRS_ONLY} + echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" + ls -lisaR ${LOWER_DIRS_ONLY} + cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} + echo "contents of LOWER_DIRS_ONLY (after cp -a)" + ls -lisaR ${LOWER_DIRS_ONLY} + find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; + echo "contents of LOWER_DIRS_ONLY (find ... rm)" + ls -lisaR ${LOWER_DIRS_ONLY} + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") fi # create tmp file for output of build step From 1305649e986e4f743ccb418206461a06fa80d0b2 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:13:21 +0200 Subject: [PATCH 19/25] Revert "fix copy command" This reverts commit 57555acd4a5ba98a0d5d7e04a6b01854a2a33ac7. --- bot/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/build.sh b/bot/build.sh index a12d3669fc..c5fc9bb8a5 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -280,7 +280,7 @@ if [[ ! -z ${LOWER_DIRS} ]]; then mkdir -p ${LOWER_DIRS_ONLY} echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" ls -lisaR ${LOWER_DIRS_ONLY} - cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} + cp -a ${LOWER_DIRS}/ ${LOWER_DIRS_ONLY} echo "contents of LOWER_DIRS_ONLY (after cp -a)" ls -lisaR ${LOWER_DIRS_ONLY} find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; From fa4b77371d2d57133fc7b7ae73a1d6696bf3af7e Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:13:23 +0200 Subject: [PATCH 20/25] Revert "create copy of lower dirs (dirs only) + skip test step" This reverts commit a960a5fecbd76f9330665a6f0f61ab0a564b26cc. --- bot/build.sh | 15 +-------------- bot/{_test.sh => test.sh} | 0 2 files changed, 1 insertion(+), 14 deletions(-) rename bot/{_test.sh => test.sh} (100%) diff --git a/bot/build.sh b/bot/build.sh index c5fc9bb8a5..2a690ecb20 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -273,20 +273,7 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi if [[ ! -z ${LOWER_DIRS} ]]; then - # make copy of LOWER_DIRS but only retain directories - lower_parent_dir=$(dirname ${LOWER_DIRS}) - the_lower_dir=$(basename ${LOWER_DIRS}) - LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" - mkdir -p ${LOWER_DIRS_ONLY} - echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" - ls -lisaR ${LOWER_DIRS_ONLY} - cp -a ${LOWER_DIRS}/ ${LOWER_DIRS_ONLY} - echo "contents of LOWER_DIRS_ONLY (after cp -a)" - ls -lisaR ${LOWER_DIRS_ONLY} - find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; - echo "contents of LOWER_DIRS_ONLY (find ... rm)" - ls -lisaR ${LOWER_DIRS_ONLY} - BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") fi # create tmp file for output of build step diff --git a/bot/_test.sh b/bot/test.sh similarity index 100% rename from bot/_test.sh rename to bot/test.sh From 2d14ffebe6d44330197fc74bd867485d3e86e9f4 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:13:25 +0200 Subject: [PATCH 21/25] Revert "list directory contents" This reverts commit 51671ee5055d75d543c6202127ac2f279c60d42f. --- EESSI-install-software.sh | 2 -- EESSI-remove-software.sh | 4 ---- 2 files changed, 6 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 4c80a2649a..d840910516 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -245,8 +245,6 @@ else if [ -f ${easystack_file} ]; then echo_green "Feeding easystack file ${easystack_file} to EasyBuild..." - ls -lisaR /cvmfs/pilot.nessi.no/versions/2023.06/software/linux/x86_64/amd/zen2/software/NESSI-extend - ${EB} --easystack ${TOPDIR}/${easystack_file} --robot ec=$? diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index e464a586c6..651a22f311 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -112,13 +112,9 @@ else for app in ${rebuild_apps}; do app_dir=${EASYBUILD_INSTALLPATH}/software/${app} app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua - ls -lisaR ${app_dir} - ls -lisaR ${app_module} echo_yellow "Removing ${app_dir} and ${app_module}..." rm -rdfv ${app_dir} rm -rdfv ${app_module} - ls -lisaR ${app_dir} - ls -lisaR ${app_module} done else fatal_error "Easystack file ${easystack_file} not found!" From d3cf7065e40d3595b3126b29f2a99f0caf33b2f4 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:13:27 +0200 Subject: [PATCH 22/25] Revert "use lower dirs also for build step" This reverts commit 27ca2fafbde0f3e7e79b0c0a8bbf066c98faa213. --- bot/build.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 2a690ecb20..0d9a314a4c 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -272,9 +272,6 @@ BUILD_STEP_ARGS+=("--nvidia" "all") if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi -if [[ ! -z ${LOWER_DIRS} ]]; then - BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") -fi # create tmp file for output of build step build_outerr=$(mktemp build.outerr.XXXX) From 2afb50c87b31c05c48f9ebf7753855bf34270e2f Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:21:07 +0200 Subject: [PATCH 23/25] less noise --- bot/build.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 0d9a314a4c..23f5fd952b 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -231,8 +231,6 @@ else chmod u+rw ${STORAGE}/lower_dirs/${remove_file} done - ls -lR ${STORAGE}/lower_dirs - # prepare directory to store tarball of tmp for removal and build steps TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} From 5e4e2940ab49fffa9e44037a40d8b957cd27539d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:22:47 +0200 Subject: [PATCH 24/25] only user lowerdir arg once --- eessi_container.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/eessi_container.sh b/eessi_container.sh index 962ce2c101..c9ed97e5c6 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -625,7 +625,6 @@ if [[ "${ACCESS}" == "rw" ]]; then EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs" - # EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}" if [[ ! -z ${LOWER_DIRS} ]]; then # need to convert ':' in LOWER_DIRS to ',' because bind mounts use ',' as # separator while the lowerdir overlayfs option uses ':' From 05773d3e5155d8d86d5175d38e9888e45909fb0d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 20 May 2024 19:53:21 +0200 Subject: [PATCH 25/25] Build NESSI-extend from scratch --- .../20240520-eb-4.9.1-rebuild-NESSI-extend-module.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 easystacks/pilot.nessi.no/2023.06/rebuilds/20240520-eb-4.9.1-rebuild-NESSI-extend-module.yml diff --git a/easystacks/pilot.nessi.no/2023.06/rebuilds/20240520-eb-4.9.1-rebuild-NESSI-extend-module.yml b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240520-eb-4.9.1-rebuild-NESSI-extend-module.yml new file mode 100644 index 0000000000..76ba2740c2 --- /dev/null +++ b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240520-eb-4.9.1-rebuild-NESSI-extend-module.yml @@ -0,0 +1,5 @@ +# 2024-05-20 +# Rebuild NESSI-extend/2023.06-easybuild +# Need to revert to the original version. +easyconfigs: + - EESSI-extend-2023.06-easybuild.eb