From 173332f479d74cd220cb25976a17035317e613ae Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 2 Apr 2024 13:02:46 +0200 Subject: [PATCH 01/10] Try to retrigger issue 517 --- install_apptainer_ubuntu.sh | 6 ++---- scripts/gpu_support/nvidia/install_cuda_host_injections.sh | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/install_apptainer_ubuntu.sh b/install_apptainer_ubuntu.sh index 5eb513db2c..8642d62999 100755 --- a/install_apptainer_ubuntu.sh +++ b/install_apptainer_ubuntu.sh @@ -5,11 +5,9 @@ set -e # see https://github.com/apptainer/singularity/issues/5390#issuecomment-899111181 sudo apt-get install alien alien --version -# stick to Apptainer < 1.3.0 by downloading from EPEL 8.8 archive, -# since CI workflow for testing scripts hangs/fails when using Apptainer 1.3.0 -# cfr. https://github.com/EESSI/software-layer/pull/514 +# Switch back to Apptainer >= 1.3.0 to retrigger +# https://github.com/EESSI/software-layer/pull/514 epel_subdir="pub/epel/8" -epel_subdir="pub/archive/epel/8.8" apptainer_rpm=$(curl --silent -L https://dl.fedoraproject.org/${epel_subdir}/Everything/x86_64/Packages/a/ | grep 'apptainer-[0-9]' | sed 's/.*\(apptainer[0-9._a-z-]*.rpm\).*/\1/g') curl -OL https://dl.fedoraproject.org/${epel_subdir}/Everything/x86_64/Packages/a/${apptainer_rpm} sudo alien -d ${apptainer_rpm} diff --git a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh index a9310d817a..1f1548f7dd 100755 --- a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh +++ b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh @@ -12,6 +12,9 @@ # installation to be successful, this directory needs to be writeable by the user # executing this script. +# TRIGGER SCRIPT CHANGE TO REPRODUCE https://github.com/EESSI/software-layer/issues/517 +# THIS COMMENT SHOULD NEVER BE MERGED! + # Initialise our bash functions TOPDIR=$(dirname $(realpath $BASH_SOURCE)) source "$TOPDIR"/../../utils.sh From 2ace856380267a9e35c44326d5ad2dd2bc61c9a2 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 2 Apr 2024 13:10:28 +0200 Subject: [PATCH 02/10] Undo change --- scripts/gpu_support/nvidia/install_cuda_host_injections.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh index 1f1548f7dd..a9310d817a 100755 --- a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh +++ b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh @@ -12,9 +12,6 @@ # installation to be successful, this directory needs to be writeable by the user # executing this script. -# TRIGGER SCRIPT CHANGE TO REPRODUCE https://github.com/EESSI/software-layer/issues/517 -# THIS COMMENT SHOULD NEVER BE MERGED! - # Initialise our bash functions TOPDIR=$(dirname $(realpath $BASH_SOURCE)) source "$TOPDIR"/../../utils.sh From dc986adc8b46f622d2d4d899922f5ef4731c305b Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 2 Apr 2024 13:11:11 +0200 Subject: [PATCH 03/10] Retrigger 517 --- scripts/utils.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/utils.sh b/scripts/utils.sh index b2be3f6221..bbe17584b8 100644 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -1,3 +1,6 @@ +## THIS COMMENT IS ONLY TO RETRIGGER #517 +## IT SHOULD NEVER BE MERGED + function echo_green() { echo -e "\e[32m$1\e[0m" } From 6d5a6ae36e86253b4811e9a9f2ec2ca028558e77 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 2 Apr 2024 14:05:30 +0200 Subject: [PATCH 04/10] Add option to skip CUDA SDK installation, e.g. for CI environments --- EESSI-install-software.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index a905d966f6..2fff281ff5 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -14,6 +14,7 @@ display_help() { echo " -x | --http-proxy URL - provides URL for the environment variable http_proxy" echo " -y | --https-proxy URL - provides URL for the environment variable https_proxy" echo " --shared-fs-path - path to directory on shared filesystem that can be used" + echo " --skip-cuda-install - disable installing a full CUDA SDK in the host_injections prefix (e.g. in CI)" } function copy_build_log() { @@ -76,6 +77,10 @@ while [[ $# -gt 0 ]]; do export shared_fs_path="${2}" shift 2 ;; + --skip-cuda-install) + export skip_cuda_install=True + shift 1 + ;; -*|--*) echo "Error: Unknown option: $1" >&2 exit 1 @@ -195,7 +200,10 @@ ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} # Install full CUDA SDK in host_injections # Hardcode this for now, see if it works # TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install -${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula +# Allow skipping CUDA SDK install in e.g. CI environments +if [ ! -z "${skip_cuda_install}" ] && [ ! "${skip_cuda_install}" ]; then + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula +fi # Install drivers in host_injections # TODO: this is commented out for now, because the script assumes that nvidia-smi is available and works; From 6155e02b317624f287f5bccb694c7b4285d7e388 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 2 Apr 2024 14:18:40 +0200 Subject: [PATCH 05/10] Make sure the install_software_layer.sh test skips installing the CUDA SDK - that is simply too heave for CI --- .github/workflows/tests_scripts.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/tests_scripts.yml b/.github/workflows/tests_scripts.yml index df1884dd8c..b1d2426774 100644 --- a/.github/workflows/tests_scripts.yml +++ b/.github/workflows/tests_scripts.yml @@ -83,6 +83,8 @@ jobs: export SINGULARITY_BIND="${PWD}:/software-layer" # force using x86_64/generic, to avoid triggering an installation from scratch sed -i "s@./EESSI-install-software.sh@\"export EESSI_SOFTWARE_SUBDIR_OVERRIDE='x86_64/generic'; ./EESSI-install-software.sh\"@g" install_software_layer.sh + # skip installation of CUDA SDKs, since this is too heavy for CI + sed -i "s@./EESSI-install-software.sh@./EESSI-install-software.sh --skip-cuda-install@g" install_software_layer.sh ./eessi_container.sh --mode run --verbose /software-layer/install_software_layer.sh - name: test create_directory_tarballs.sh script From 877f364e677a31f8218a973640269d003f95fa4b Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 2 Apr 2024 14:23:23 +0200 Subject: [PATCH 06/10] Fix logic in EESSI-install-software.sh and prove that logic is ok by now first retriggering the issue --- .github/workflows/tests_scripts.yml | 2 +- EESSI-install-software.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests_scripts.yml b/.github/workflows/tests_scripts.yml index b1d2426774..a245ee1ad5 100644 --- a/.github/workflows/tests_scripts.yml +++ b/.github/workflows/tests_scripts.yml @@ -84,7 +84,7 @@ jobs: # force using x86_64/generic, to avoid triggering an installation from scratch sed -i "s@./EESSI-install-software.sh@\"export EESSI_SOFTWARE_SUBDIR_OVERRIDE='x86_64/generic'; ./EESSI-install-software.sh\"@g" install_software_layer.sh # skip installation of CUDA SDKs, since this is too heavy for CI - sed -i "s@./EESSI-install-software.sh@./EESSI-install-software.sh --skip-cuda-install@g" install_software_layer.sh + # sed -i "s@./EESSI-install-software.sh@./EESSI-install-software.sh --skip-cuda-install@g" install_software_layer.sh ./eessi_container.sh --mode run --verbose /software-layer/install_software_layer.sh - name: test create_directory_tarballs.sh script diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 2fff281ff5..0374d8e758 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -201,7 +201,7 @@ ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} # Hardcode this for now, see if it works # TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install # Allow skipping CUDA SDK install in e.g. CI environments -if [ ! -z "${skip_cuda_install}" ] && [ ! "${skip_cuda_install}" ]; then +if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula fi From ba142caeea7d6223354b3aa7554f605ef4468fe4 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 2 Apr 2024 14:28:16 +0200 Subject: [PATCH 07/10] Now make sure CI skips CUDA SDK installation, to prove that this fixes the CI run --- .github/workflows/tests_scripts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests_scripts.yml b/.github/workflows/tests_scripts.yml index a245ee1ad5..b1d2426774 100644 --- a/.github/workflows/tests_scripts.yml +++ b/.github/workflows/tests_scripts.yml @@ -84,7 +84,7 @@ jobs: # force using x86_64/generic, to avoid triggering an installation from scratch sed -i "s@./EESSI-install-software.sh@\"export EESSI_SOFTWARE_SUBDIR_OVERRIDE='x86_64/generic'; ./EESSI-install-software.sh\"@g" install_software_layer.sh # skip installation of CUDA SDKs, since this is too heavy for CI - # sed -i "s@./EESSI-install-software.sh@./EESSI-install-software.sh --skip-cuda-install@g" install_software_layer.sh + sed -i "s@./EESSI-install-software.sh@./EESSI-install-software.sh --skip-cuda-install@g" install_software_layer.sh ./eessi_container.sh --mode run --verbose /software-layer/install_software_layer.sh - name: test create_directory_tarballs.sh script From dc915fb0b9ec5577787fa58cd618a44a77ff1ff3 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 2 Apr 2024 16:01:39 +0200 Subject: [PATCH 08/10] Print sensible message to log when the cuda installation is skipped --- EESSI-install-software.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 0374d8e758..567fed8e79 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -203,6 +203,8 @@ ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} # Allow skipping CUDA SDK install in e.g. CI environments if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula +else + echo "Skipping installation of CUDA SDK in host_injections, since the --skip-cuda-install flag was passed" fi # Install drivers in host_injections From 4cf4d76cc80ef5e5aa5df73cef628fca3ad56b99 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 2 Apr 2024 16:03:14 +0200 Subject: [PATCH 09/10] Issue is resolved, so we can use Apptainer 1.3.0 --- install_apptainer_ubuntu.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/install_apptainer_ubuntu.sh b/install_apptainer_ubuntu.sh index 8642d62999..5c4f37ac2d 100755 --- a/install_apptainer_ubuntu.sh +++ b/install_apptainer_ubuntu.sh @@ -5,8 +5,6 @@ set -e # see https://github.com/apptainer/singularity/issues/5390#issuecomment-899111181 sudo apt-get install alien alien --version -# Switch back to Apptainer >= 1.3.0 to retrigger -# https://github.com/EESSI/software-layer/pull/514 epel_subdir="pub/epel/8" apptainer_rpm=$(curl --silent -L https://dl.fedoraproject.org/${epel_subdir}/Everything/x86_64/Packages/a/ | grep 'apptainer-[0-9]' | sed 's/.*\(apptainer[0-9._a-z-]*.rpm\).*/\1/g') curl -OL https://dl.fedoraproject.org/${epel_subdir}/Everything/x86_64/Packages/a/${apptainer_rpm} From c92ceb2e07ddf37fc12ec86b0f6d90ded0423482 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 2 Apr 2024 16:04:12 +0200 Subject: [PATCH 10/10] Revert changes to utils.sh --- scripts/utils.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/utils.sh b/scripts/utils.sh index bbe17584b8..b2be3f6221 100644 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -1,6 +1,3 @@ -## THIS COMMENT IS ONLY TO RETRIGGER #517 -## IT SHOULD NEVER BE MERGED - function echo_green() { echo -e "\e[32m$1\e[0m" }