From fedf9e3722e0c8a354b1d7f8bb98f1ae303e2b81 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 9 Apr 2024 20:47:43 +0200 Subject: [PATCH 1/9] removed a few extra whitespaces in empty lines --- EESSI-install-software.sh | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 6c08a95757..84afa786e3 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -207,26 +207,27 @@ changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z if [ -z ${changed_easystacks} ]; then echo "No missing installations, party time!" # Ensure the bot report success, as there was nothing to be build here else + for easystack_file in ${changed_easystacks}; do - + echo -e "Processing easystack file ${easystack_file}...\n\n" - + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') - + # load EasyBuild module (will be installed if it's not available yet) source ${TOPDIR}/load_easybuild_module.sh ${eb_version} - + ${EB} --show-config - + echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..." - + if [ -f ${easystack_file} ]; then echo_green "Feeding easystack file ${easystack_file} to EasyBuild..." - + ${EB} --easystack ${TOPDIR}/${easystack_file} --robot ec=$? - + # copy EasyBuild log file if EasyBuild exited with an error if [ ${ec} -ne 0 ]; then eb_last_log=$(unset EB_VERBOSE; eb --last-log) @@ -236,12 +237,12 @@ else # copy to build logs dir (with context added) copy_build_log "${eb_last_log}" "${build_logs_dir}" fi - + $TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file} ${TOPDIR}/${pr_diff} else fatal_error "Easystack file ${easystack_file} not found!" fi - + done fi From 012560808a75ecdd0c84968e4cc66050521e3d73 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 9 Apr 2024 20:48:47 +0200 Subject: [PATCH 2/9] new script to remove a software --- EESSI-remove-software.sh | 125 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100755 EESSI-remove-software.sh diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh new file mode 100755 index 0000000000..446a156cb8 --- /dev/null +++ b/EESSI-remove-software.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# +# Script to remove part of the EESSI software stack (version set through init/eessi_defaults) + +# see example parsing of command line arguments at +# https://wiki.bash-hackers.org/scripting/posparams#using_a_while_loop +# https://stackoverflow.com/questions/192249/how-do-i-parse-command-line-arguments-in-bash + +display_help() { + echo "usage: $0 [OPTIONS]" + echo " -g | --generic - instructs script to build for generic architecture target" + echo " -h | --help - display this usage information" +} + +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case $1 in + -g|--generic) + DETECTION_PARAMETERS="--generic" + shift + ;; + -h|--help) + display_help # Call your function + # no shifting needed here, we're done. + exit 0 + ;; + -*|--*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +TOPDIR=$(dirname $(realpath $0)) + +export TMPDIR=$(mktemp -d /tmp/eessi-remove.XXXXXXXX) + +source $TOPDIR/scripts/utils.sh + +echo ">> Determining software subdirectory to use for current build host..." +if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) + echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script" +else + echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" +fi + +echo ">> Setting up environment..." + +source $TOPDIR/init/bash + +if [ -d $EESSI_CVMFS_REPO ]; then + echo_green "$EESSI_CVMFS_REPO available, OK!" +else + fatal_error "$EESSI_CVMFS_REPO is not available!" +fi + +if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then + fatal_error "Failed to determine software subdirectory?!" +elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then + fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!" +else + echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!" +fi + +echo ">> Configuring EasyBuild..." +EB="eb" +source $TOPDIR/configure_easybuild + +echo ">> Setting up \$MODULEPATH..." +# make sure no modules are loaded +module --force purge +# ignore current $MODULEPATH entirely +module unuse $MODULEPATH +module use $EASYBUILD_INSTALLPATH/modules/all +if [[ -z ${MODULEPATH} ]]; then + fatal_error "Failed to set up \$MODULEPATH?!" +else + echo_green ">> MODULEPATH set up: ${MODULEPATH}" +fi + +# assume there's only one diff file that corresponds to the PR patch file +pr_diff=$(ls [0-9]*.diff | head -1) + +# if this script is run as root, use PR patch file to determine if software needs to be removed first +if [ $EUID -eq 0 ]; then + changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") + if [ -z ${changed_easystacks_rebuilds} ]; then + echo "No software needs to be removed." + else + for easystack_file in ${changed_easystacks_rebuilds}; do + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file + eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') + + # load EasyBuild module (will be installed if it's not available yet) + source ${TOPDIR}/load_easybuild_module.sh ${eb_version} + + if [ -f ${easystack_file} ]; then + echo_green "Software rebuild(s) requested in ${easystack_file}, so determining which existing installation have to be removed..." + # we need to remove existing installation directories first, + # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) + # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) + rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + for app in ${rebuild_apps}; do + app_dir=${EASYBUILD_INSTALLPATH}/software/${app} + app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua + echo_yellow "Removing ${app_dir} and ${app_module}..." + rm -rf ${app_dir} + rm -rf ${app_module} + done + else + fatal_error "Easystack file ${easystack_file} not found!" + fi + done + fi +else + fatal_error "This script can only be run by root!" +fi From 42d79af27d592062ebd00f95560d1ab2a968817e Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 9 Apr 2024 20:52:05 +0200 Subject: [PATCH 3/9] bot/build.sh from EESSI PR 488 --- bot/build.sh | 57 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 7eb15f319d..99e61655c1 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -168,12 +168,56 @@ COMMON_ARGS+=("--mode" "run") # make sure to use the same parent dir for storing tarballs of tmp PREVIOUS_TMP_DIR=${PWD}/previous_tmp +# prepare arguments to install_software_layer.sh (specific to build step) +declare -a BUILD_STEP_ARGS=() +declare -a INSTALL_SCRIPT_ARGS=() +declare -a REMOVAL_SCRIPT_ARGS=() +if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then + INSTALL_SCRIPT_ARGS+=("--generic") + REMOVAL_SCRIPT_ARGS+=("--generic") +fi +[[ ! -z ${BUILD_LOGS_DIR} ]] && INSTALL_SCRIPT_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}") +[[ ! -z ${SHARED_FS_PATH} ]] && INSTALL_SCRIPT_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}") + +# determine if the removal step has to be run +# assume there's only one diff file that corresponds to the PR patch file +pr_diff=$(ls [0-9]*.diff | head -1) +changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | grep "/rebuilds/") +if [[ -z ${changed_easystacks_rebuilds} ]]; then + echo "This PR does not add any easystack files in a rebuilds subdirectory, so let's skip the removal step." +else + # prepare directory to store tarball of tmp for removal and build steps + TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step + mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} + + # prepare arguments to eessi_container.sh specific to remove step + declare -a REMOVAL_STEP_ARGS=() + REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}") + REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") + # add fakeroot option in order to be able to remove software, see: + # https://github.com/EESSI/software-layer/issues/312 + REMOVAL_STEP_ARGS+=("--fakeroot") + + # create tmp file for output of removal step + removal_outerr=$(mktemp remove.outerr.XXXX) + + echo "Executing command to remove software:" + echo "./eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" + echo " -- ./EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" + ./eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ + -- ./EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} + + # make sure that the build step resumes from the same temporary directory + # this is important, as otherwise the removed software will still be there + REMOVAL_TMPDIR=$(grep ' as tmp directory ' ${removal_outerr} | cut -d ' ' -f 2) + BUILD_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") +fi + # prepare directory to store tarball of tmp for build step TARBALL_TMP_BUILD_STEP_DIR=${PREVIOUS_TMP_DIR}/build_step mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR} # prepare arguments to eessi_container.sh specific to build step -declare -a BUILD_STEP_ARGS=() BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}") BUILD_STEP_ARGS+=("--storage" "${STORAGE}") # add options required to handle NVIDIA support @@ -182,14 +226,6 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi -# prepare arguments to install_software_layer.sh (specific to build step) -declare -a INSTALL_SCRIPT_ARGS=() -if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then - INSTALL_SCRIPT_ARGS+=("--generic") -fi -[[ ! -z ${BUILD_LOGS_DIR} ]] && INSTALL_SCRIPT_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}") -[[ ! -z ${SHARED_FS_PATH} ]] && INSTALL_SCRIPT_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}") - # create tmp file for output of build step build_outerr=$(mktemp build.outerr.XXXX) @@ -211,8 +247,7 @@ declare -a TARBALL_STEP_ARGS=() TARBALL_STEP_ARGS+=("--save" "${TARBALL_TMP_TARBALL_STEP_DIR}") # determine temporary directory to resume from -BUILD_TMPDIR=$(grep ' as tmp directory ' ${build_outerr} | cut -d ' ' -f 2) -TARBALL_STEP_ARGS+=("--resume" "${BUILD_TMPDIR}") +TARBALL_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") timestamp=$(date +%s) # to set EESSI_VERSION we need to source init/eessi_defaults now From 7582f5af5fb55dc4f350be4bae154dae3231dd2e Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 9 Apr 2024 20:59:23 +0200 Subject: [PATCH 4/9] create_lmodsitepackage.py from EESSI PR 488 --- create_lmodsitepackage.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index 9a4a232863..5a7a915494 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -84,31 +84,10 @@ end end -local function eessi_openmpi_load_hook(t) - -- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1, - -- to work around hang/crash due to bug in OpenMPI; - -- see https://gitlab.com/eessi/support/-/issues/41 - local frameStk = require("FrameStk"):singleton() - local mt = frameStk:mt() - local moduleName = string.match(t.modFullName, "(.-)/") - local cpuTarget = os.getenv("EESSI_SOFTWARE_SUBDIR") or "" - if (moduleName == "OpenMPI") and (cpuTarget == "aarch64/neoverse_v1") then - local msg = "Adding '^smcuda' to $OMPI_MCA_btl to work around bug in OpenMPI" - LmodMessage(msg .. " (see https://gitlab.com/eessi/support/-/issues/41)") - local ompiMcaBtl = os.getenv("OMPI_MCA_btl") - if ompiMcaBtl == nil then - setenv("OMPI_MCA_btl", "^smcuda") - else - setenv("OMPI_MCA_btl", ompiMcaBtl .. ",^smcuda") - end - end -end - -- Combine both functions into a single one, as we can only register one function as load hook in lmod -- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed function eessi_load_hook(t) eessi_cuda_enabled_load_hook(t) - eessi_openmpi_load_hook(t) end From 4951647d2b2b0f00d95f9e7cb71ba106f0445427 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 9 Apr 2024 21:02:58 +0200 Subject: [PATCH 5/9] eessi_container.sh from EESSI PR 488 --- eessi_container.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/eessi_container.sh b/eessi_container.sh index e84912951c..5b41f4832a 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -73,6 +73,7 @@ display_help() { echo " -a | --access {ro,rw} - ro (read-only), rw (read & write) [default: ro]" echo " -c | --container IMG - image file or URL defining the container to use" echo " [default: docker://ghcr.io/eessi/build-node:debian11]" + echo " -f | --fakeroot - run the container with --fakeroot [default: false]" echo " -g | --storage DIR - directory space on host machine (used for" echo " temporary data) [default: 1. TMPDIR, 2. /tmp]" echo " -h | --help - display this usage information [default: false]" @@ -113,6 +114,7 @@ display_help() { ACCESS="ro" CONTAINER="docker://ghcr.io/eessi/build-node:debian11" #DRY_RUN=0 +FAKEROOT=0 VERBOSE=0 STORAGE= LIST_REPOS=0 @@ -140,6 +142,10 @@ while [[ $# -gt 0 ]]; do # DRY_RUN=1 # shift 1 # ;; + -f|--fakeroot) + FAKEROOT=1 + shift 1 + ;; -g|--storage) STORAGE="$2" shift 2 @@ -466,6 +472,11 @@ if [[ ${SETUP_NVIDIA} -eq 1 ]]; then fi fi +# Configure the fakeroot setting for the container +if [[ ${FAKEROOT} -eq 1 ]]; then + ADDITIONAL_CONTAINER_OPTIONS+=("--fakeroot") +fi + # set up repository config (always create directory repos_cfg and populate it with info when # arg -r|--repository is used) mkdir -p ${EESSI_TMPDIR}/repos_cfg From 1e229d79ce7ad6ee6aec16e3e9cb7b294005f2ff Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 9 Apr 2024 21:09:00 +0200 Subject: [PATCH 6/9] easystack file for rebuilding three installations of OpenMPI --- ...20240409-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 easystacks/pilot.nessi.no/2023.06/rebuilds/20240409-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml diff --git a/easystacks/pilot.nessi.no/2023.06/rebuilds/20240409-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240409-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml new file mode 100644 index 0000000000..d9fce380ff --- /dev/null +++ b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240409-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml @@ -0,0 +1,15 @@ +# 2024-04-09 +# Rebuild all OpenMPI 4.1.x versions due to an issue with smcuda: +# https://github.com/open-mpi/ompi/issues/12270 +# https://github.com/open-mpi/ompi/pull/12344 +# https://github.com/easybuilders/easybuild-easyconfigs/pull/19940 +easyconfigs: + - OpenMPI-4.1.4-GCC-12.2.0.eb: + options: + from-pr: 19940 + - OpenMPI-4.1.5-GCC-12.3.0: + options: + from-pr: 19940 + - OpenMPI-4.1.6-GCC-13.2.0: + options: + from-pr: 19940 From 0083ddffe8aea51f42d7c394be14e139773e6ac2 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 9 Apr 2024 21:22:53 +0200 Subject: [PATCH 7/9] add modifications from EESSI PR 518 --- EESSI-install-software.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 84afa786e3..e64798532e 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -204,11 +204,15 @@ ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12 # use PR patch file to determine in which easystack files stuff was added changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing') -if [ -z ${changed_easystacks} ]; then +if [ -z "${changed_easystacks}" ]; then echo "No missing installations, party time!" # Ensure the bot report success, as there was nothing to be build here else - for easystack_file in ${changed_easystacks}; do + # first process rebuilds, if any, then easystack files for new installations + # "|| true" is used to make sure that the grep command always returns success + rebuild_easystacks=$(echo "${changed_easystacks}" | (grep "/rebuilds/" || true)) + new_easystacks=$(echo "${changed_easystacks}" | (grep -v "/rebuilds/" || true)) + for easystack_file in ${rebuild_easystacks} ${new_easystacks}; do echo -e "Processing easystack file ${easystack_file}...\n\n" From 172242b9491c4ea3f9094395338689f342e6f435 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 9 Apr 2024 21:28:39 +0200 Subject: [PATCH 8/9] modifications to bot/build.sh from EESSI PR 518 --- bot/build.sh | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 99e61655c1..12c849205d 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -182,8 +182,10 @@ fi # determine if the removal step has to be run # assume there's only one diff file that corresponds to the PR patch file pr_diff=$(ls [0-9]*.diff | head -1) -changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | grep "/rebuilds/") -if [[ -z ${changed_easystacks_rebuilds} ]]; then +# the true at the end of the next command is important: grep will expectedly return 1 if there is no easystack file being added under rebuilds, +# but due to "set -e" the entire script would otherwise fail +changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | (grep "/rebuilds/" || true)) +if [[ -z "${changed_easystacks_rebuilds}" ]]; then echo "This PR does not add any easystack files in a rebuilds subdirectory, so let's skip the removal step." else # prepare directory to store tarball of tmp for removal and build steps @@ -192,7 +194,7 @@ else # prepare arguments to eessi_container.sh specific to remove step declare -a REMOVAL_STEP_ARGS=() - REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}") + REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") # add fakeroot option in order to be able to remove software, see: # https://github.com/EESSI/software-layer/issues/312 @@ -247,7 +249,14 @@ declare -a TARBALL_STEP_ARGS=() TARBALL_STEP_ARGS+=("--save" "${TARBALL_TMP_TARBALL_STEP_DIR}") # determine temporary directory to resume from -TARBALL_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") +if [[ -z ${REMOVAL_TMPDIR} ]]; then + # no rebuild step was done, so the tarball step should resume from the build directory + BUILD_TMPDIR=$(grep ' as tmp directory ' ${build_outerr} | cut -d ' ' -f 2) + TARBALL_STEP_ARGS+=("--resume" "${BUILD_TMPDIR}") +else + # a removal step was done, so resume from its temporary directory (which was also used for the build step) + TARBALL_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") +fi timestamp=$(date +%s) # to set EESSI_VERSION we need to source init/eessi_defaults now From 4afaaad2bf5bd1acc56ec258d7c2cad76f969f9d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 10 Apr 2024 20:51:59 +0200 Subject: [PATCH 9/9] revert changes to SitePackage.lua + don't rebuild OpenMPI --- create_lmodsitepackage.py | 21 +++++++++++++++++++ ...0409-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml | 15 ------------- 2 files changed, 21 insertions(+), 15 deletions(-) delete mode 100644 easystacks/pilot.nessi.no/2023.06/rebuilds/20240409-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index 5a7a915494..9a4a232863 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -84,10 +84,31 @@ end end +local function eessi_openmpi_load_hook(t) + -- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1, + -- to work around hang/crash due to bug in OpenMPI; + -- see https://gitlab.com/eessi/support/-/issues/41 + local frameStk = require("FrameStk"):singleton() + local mt = frameStk:mt() + local moduleName = string.match(t.modFullName, "(.-)/") + local cpuTarget = os.getenv("EESSI_SOFTWARE_SUBDIR") or "" + if (moduleName == "OpenMPI") and (cpuTarget == "aarch64/neoverse_v1") then + local msg = "Adding '^smcuda' to $OMPI_MCA_btl to work around bug in OpenMPI" + LmodMessage(msg .. " (see https://gitlab.com/eessi/support/-/issues/41)") + local ompiMcaBtl = os.getenv("OMPI_MCA_btl") + if ompiMcaBtl == nil then + setenv("OMPI_MCA_btl", "^smcuda") + else + setenv("OMPI_MCA_btl", ompiMcaBtl .. ",^smcuda") + end + end +end + -- Combine both functions into a single one, as we can only register one function as load hook in lmod -- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed function eessi_load_hook(t) eessi_cuda_enabled_load_hook(t) + eessi_openmpi_load_hook(t) end diff --git a/easystacks/pilot.nessi.no/2023.06/rebuilds/20240409-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240409-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml deleted file mode 100644 index d9fce380ff..0000000000 --- a/easystacks/pilot.nessi.no/2023.06/rebuilds/20240409-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml +++ /dev/null @@ -1,15 +0,0 @@ -# 2024-04-09 -# Rebuild all OpenMPI 4.1.x versions due to an issue with smcuda: -# https://github.com/open-mpi/ompi/issues/12270 -# https://github.com/open-mpi/ompi/pull/12344 -# https://github.com/easybuilders/easybuild-easyconfigs/pull/19940 -easyconfigs: - - OpenMPI-4.1.4-GCC-12.2.0.eb: - options: - from-pr: 19940 - - OpenMPI-4.1.5-GCC-12.3.0: - options: - from-pr: 19940 - - OpenMPI-4.1.6-GCC-13.2.0: - options: - from-pr: 19940