diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index ec018bd049..dc18fd584a 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -35,7 +35,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: persist-credentials: false diff --git a/.github/workflows/test-software.eessi.io.yml b/.github/workflows/test-software.eessi.io.yml new file mode 100644 index 0000000000..8cfb023bc6 --- /dev/null +++ b/.github/workflows/test-software.eessi.io.yml @@ -0,0 +1,85 @@ +# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions +name: Check for missing software installations in software.eessi.io +on: [push, pull_request, workflow_dispatch] +permissions: + contents: read # to fetch code (actions/checkout) +jobs: + check_missing: + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + EESSI_VERSION: + - 2023.06 + EESSI_SOFTWARE_SUBDIR_OVERRIDE: + - aarch64/generic + - aarch64/neoverse_n1 + - aarch64/neoverse_v1 + - x86_64/amd/zen2 + - x86_64/amd/zen3 + - x86_64/intel/haswell + - x86_64/intel/skylake_avx512 + - x86_64/generic + steps: + - name: Check out software-layer repository + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Mount EESSI CernVM-FS pilot repository + uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0 + with: + cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb + cvmfs_http_proxy: DIRECT + cvmfs_repositories: software.eessi.io + + - name: Test check_missing_installations.sh script + run: | + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} + source /cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}}/init/bash + # set $EESSI_CPU_FAMILY to the CPU architecture that corresponds to $EESSI_SOFTWARE_SUBDIR_OVERRIDE (part before the first slash), + # to prevent issues with checks in the Easybuild configuration that use this variable + export EESSI_CPU_FAMILY=${EESSI_SOFTWARE_SUBDIR_OVERRIDE%%/*} + module load EasyBuild + which eb + eb --version + export EESSI_PREFIX=/cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}} + export EESSI_OS_TYPE=linux + env | grep ^EESSI | sort + echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml)" + for easystack_file in $(ls easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do + echo "check missing installations for ${easystack_file}..." + ./check_missing_installations.sh ${easystack_file} + ec=$? + if [[ ${ec} -ne 0 ]]; then echo "missing installations found for ${easystack_file}!" >&2; exit ${ec}; fi + done + + - name: Test check_missing_installations.sh with missing package (GCC/8.3.0) + run: | + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} + source /cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}}/init/bash + # set $EESSI_CPU_FAMILY to the CPU architecture that corresponds to $EESSI_SOFTWARE_SUBDIR_OVERRIDE (part before the first slash), + # to prevent issues with checks in the Easybuild configuration that use this variable + export EESSI_CPU_FAMILY=${EESSI_SOFTWARE_SUBDIR_OVERRIDE%%/*} + module load EasyBuild + which eb + eb --version + export EESSI_PREFIX=/cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}} + export EESSI_OS_TYPE=linux + env | grep ^EESSI | sort + # create dummy easystack file with a single entry (something that is not installed in EESSI) + easystack_file="test.yml" + echo "easyconfigs:" > ${easystack_file} + echo " - GCC-8.3.0:" >> ${easystack_file} + echo "created easystack file '${easystack_file}' with a missing installation (GCC/8.3.0):" + cat ${easystack_file} + # note, check_missing_installations.sh exits 1 if a package was + # missing, which is intepreted as false (exit code based, not + # boolean logic), hence when the script exits 0 if no package was + # missing it is interpreted as true, thus the test did not capture + # the missing package + if ./check_missing_installations.sh ${easystack_file}; then + echo "did NOT capture missing package; test FAILED" + exit 1 + else + echo "captured missing package; test PASSED" + exit 0 + fi diff --git a/.github/workflows/test_eessi.yml b/.github/workflows/test_eessi.yml deleted file mode 100644 index 04195dd619..0000000000 --- a/.github/workflows/test_eessi.yml +++ /dev/null @@ -1,72 +0,0 @@ -# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions -name: Tests relying on having EESSI pilot repo mounted -on: [push, pull_request, workflow_dispatch] -permissions: - contents: read # to fetch code (actions/checkout) -jobs: - eessi_pilot_repo: - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - EESSI_VERSION: - - 2021.12 - EESSI_SOFTWARE_SUBDIR: - - aarch64/generic - - aarch64/graviton2 - - aarch64/graviton3 - - x86_64/amd/zen2 - - x86_64/amd/zen3 - - x86_64/intel/haswell - - x86_64/intel/skylake_avx512 - - x86_64/generic - steps: - - name: Check out software-layer repository - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 - - - name: Mount EESSI CernVM-FS pilot repository - uses: cvmfs-contrib/github-action-cvmfs@d4641d0d591c9a5c3be23835ced2fb648b44c04b # v3.1 - with: - cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb - cvmfs_http_proxy: DIRECT - cvmfs_repositories: pilot.eessi-hpc.org - - - name: Test check_missing_installations.sh script - run: | - source /cvmfs/pilot.eessi-hpc.org/versions/${{matrix.EESSI_VERSION}}/init/bash - module load EasyBuild - eb --version - export EESSI_PREFIX=/cvmfs/pilot.eessi-hpc.org/versions/${{matrix.EESSI_VERSION}} - export EESSI_OS_TYPE=linux - export EESSI_SOFTWARE_SUBDIR=${{matrix.EESSI_SOFTWARE_SUBDIR}} - env | grep ^EESSI | sort - echo "just run check_missing_installations.sh (should use eessi-${{matrix.EESSI_VERSION}}.yml)" - ./check_missing_installations.sh - - - name: Test check_missing_installations.sh with missing package (GCC/8.3.0) - run: | - source /cvmfs/pilot.eessi-hpc.org/versions/${{matrix.EESSI_VERSION}}/init/bash - module load EasyBuild - eb --version - export EESSI_PREFIX=/cvmfs/pilot.eessi-hpc.org/versions/${{matrix.EESSI_VERSION}} - export EESSI_OS_TYPE=linux - export EESSI_SOFTWARE_SUBDIR=${{matrix.EESSI_SOFTWARE_SUBDIR}} - env | grep ^EESSI | sort - echo "modify eessi-${{matrix.EESSI_VERSION}}.yml by adding a missing package (GCC/8.3.0)" - echo " GCC:" >> eessi-${{matrix.EESSI_VERSION}}.yml - echo " toolchains:" >> eessi-${{matrix.EESSI_VERSION}}.yml - echo " SYSTEM:" >> eessi-${{matrix.EESSI_VERSION}}.yml - echo " versions: '8.3.0'" >> eessi-${{matrix.EESSI_VERSION}}.yml - tail -n 4 eessi-${{matrix.EESSI_VERSION}}.yml - # note, check_missing_installations.sh exits 1 if a package was - # missing, which is intepreted as false (exit code based, not - # boolean logic), hence when the script exits 0 if no package was - # missing it is interpreted as true, thus the test did not capture - # the missing package - if ./check_missing_installations.sh; then - echo "did NOT capture missing package; test FAILED" - exit 1 - else - echo "captured missing package; test PASSED" - exit 0 - fi diff --git a/.github/workflows/test_eessi_container_script.yml b/.github/workflows/test_eessi_container_script.yml index 929fb22cec..32120d0087 100644 --- a/.github/workflows/test_eessi_container_script.yml +++ b/.github/workflows/test_eessi_container_script.yml @@ -22,7 +22,7 @@ jobs: #- save steps: - name: Check out software-layer repository - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: install Apptainer run: | @@ -45,7 +45,7 @@ jobs: elif [[ ${{matrix.SCRIPT_TEST}} == 'listrepos_default' ]]; then outfile=out_listrepos.txt ./eessi_container.sh --verbose --list-repos | tee ${outfile} - grep "EESSI-pilot" ${outfile} + grep "EESSI" ${outfile} # test use of --list-repos with custom repos.cfg elif [[ ${{matrix.SCRIPT_TEST}} == 'listrepos_custom' ]]; then @@ -57,7 +57,7 @@ jobs: echo "[EESSI/20HT.TP]" >> cfg/repos.cfg echo "repo_version = 20HT.TP" >> cfg/repos.cfg ./eessi_container.sh --verbose --list-repos | tee ${outfile} - grep "EESSI-pilot" ${outfile} + grep "EESSI" ${outfile} export EESSI_REPOS_CFG_DIR_OVERRIDE=${PWD}/cfg ./eessi_container.sh --verbose --list-repos | tee ${outfile2} @@ -90,15 +90,15 @@ jobs: elif [[ ${{matrix.SCRIPT_TEST}} == 'readwrite' ]]; then outfile=out_readwrite.txt fn="test_${RANDOM}.txt" - echo "touch /cvmfs/pilot.eessi-hpc.org/${fn}" > test_script.sh + echo "touch /cvmfs/software.eessi.io/${fn}" > test_script.sh chmod u+x test_script.sh export SINGULARITY_BIND="$PWD:/test" ./eessi_container.sh --verbose --access rw --mode run /test/test_script.sh > ${outfile} tmpdir=$(grep "\-\-resume" ${outfile} | sed "s/.*--resume \([^']*\).*/\1/g") # note: must use '--access rw' again here, since touched file is in overlay upper dir - ./eessi_container.sh --verbose --resume ${tmpdir} --access rw --mode shell <<< "ls -l /cvmfs/pilot.eessi-hpc.org/${fn}" > ${outfile} - grep "/cvmfs/pilot.eessi-hpc.org/${fn}$" $outfile + ./eessi_container.sh --verbose --resume ${tmpdir} --access rw --mode shell <<< "ls -l /cvmfs/software.eessi.io/${fn}" > ${outfile} + grep "/cvmfs/software.eessi.io/${fn}$" $outfile # test use of --resume elif [[ ${{matrix.SCRIPT_TEST}} == 'resume' ]]; then @@ -120,12 +120,12 @@ jobs: elif [[ ${{matrix.SCRIPT_TEST}} == 'save' ]]; then outfile=out_save.txt fn="test_${RANDOM}.txt" - test_cmd="touch /cvmfs/pilot.eessi-hpc.org/${fn}" + test_cmd="touch /cvmfs/software.eessi.io/${fn}" ./eessi_container.sh --verbose --mode shell --access rw --save test-save.tar <<< "${test_cmd}" 2>&1 | tee ${outfile} rm -f ${outfile} - ./eessi_container.sh --verbose --mode shell --access rw --resume test-save.tar <<< "ls -l /cvmfs/pilot.eessi-hpc.org/${fn}" > ${outfile} - grep "/cvmfs/pilot.eessi-hpc.org/${fn}$" $outfile + ./eessi_container.sh --verbose --mode shell --access rw --resume test-save.tar <<< "ls -l /cvmfs/software.eessi.io/${fn}" > ${outfile} + grep "/cvmfs/software.eessi.io/${fn}$" $outfile tar tfv test-save.tar | grep "overlay-upper/${fn}" diff --git a/.github/workflows/test_licenses.yml b/.github/workflows/test_licenses.yml new file mode 100644 index 0000000000..3b9675d523 --- /dev/null +++ b/.github/workflows/test_licenses.yml @@ -0,0 +1,20 @@ +# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions +name: Test software licenses +on: [push, pull_request] +permissions: + contents: read # to fetch code (actions/checkout) +jobs: + build: + runs-on: ubuntu-20.04 + steps: + - name: Check out software-layer repository + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: set up Python + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: '3.9' + + - name: Check software licenses + run: | + python licenses/spdx.py licenses/licenses.json diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cc00685a40..faa7eb82ff 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,17 +12,17 @@ jobs: fail-fast: false steps: - name: checkout - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: set up Python - uses: actions/setup-python@13ae5bb136fac2878aff31522b9efb785519f984 # v4.3.0 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: python-version: ${{matrix.python}} architecture: x64 - name: install Python packages run: | - pip install archspec + pip install archspec==0.2.2 - name: test eessi_software_subdir.py script run: | diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index 37338693c5..47f77cc589 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -13,37 +13,60 @@ jobs: - x86_64/intel/skylake_avx512/archspec-linux-6132 - x86_64/amd/zen2/Azure-CentOS7-7V12 - x86_64/amd/zen3/Azure-CentOS7-7V73X - - ppc64le/power9le/unknown-power9le - aarch64/neoverse_n1/Azure-Ubuntu20-Altra - aarch64/neoverse_n1/AWS-awslinux-graviton2 - aarch64/neoverse_v1/AWS-awslinux-graviton3 + - aarch64/cortex_a72/debian-rpi4 + # commented out since these targets are currently not supported in software.eessi.io repo + # (and some tests assume that the corresponding subdirectory in software layer is there) + # - ppc64le/power9le/unknown-power9le fail-fast: false steps: - name: checkout - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 - - name: Enable EESSI - uses: eessi/github-action-eessi@58b50fd2eead2162c2b9ac258d4fb60cc9f30503 # v2.0.13 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Mount EESSI CernVM-FS pilot repository + uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0 + with: + cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb + cvmfs_http_proxy: DIRECT + cvmfs_repositories: software.eessi.io + - name: test eessi_archdetect.sh run: | export EESSI_MACHINE_TYPE=${{matrix.proc_cpuinfo}} export EESSI_MACHINE_TYPE=${EESSI_MACHINE_TYPE%%/*} export EESSI_PROC_CPUINFO=./tests/archdetect/${{matrix.proc_cpuinfo}}.cpuinfo + # check that printing of best match works correctly CPU_ARCH=$(./init/eessi_archdetect.sh cpupath) if [[ $CPU_ARCH == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.output )" ]]; then - echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCH" >&2 + echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCH" else echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCH" >&2 exit 1 fi + # check that $EESSI_SOFTWARE_SUBDIR_OVERRIDE is honored + export EESSI_SOFTWARE_SUBDIR_OVERRIDE='dummy/cpu' + CPU_ARCH=$(./init/eessi_archdetect.sh cpupath) + if [[ $CPU_ARCH == "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then + echo "Test for picking up on \$EESSI_SOFTWARE_SUBDIR_OVERRIDE PASSED" + else + echo "Test for picking up on \$EESSI_SOFTWARE_SUBDIR_OVERRIDE FAILED" >&2 + exit 1 + fi + unset EESSI_SOFTWARE_SUBDIR_OVERRIDE + # check that printing of all matches works correctly (-a option for cpupath action) CPU_ARCHES=$(./init/eessi_archdetect.sh -a cpupath) if [[ $CPU_ARCHES == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.all.output )" ]]; then - echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCHES" >&2 + echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCHES" else echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCHES" >&2 exit 1 fi - # Check all those architectures actually exist - for dir in $(echo "$CPU_ARCHES" | tr ':' '\n'); do - # Search all EESSI versions as we may drop support at some point - ls -d "$EESSI_PREFIX"/../*/software/linux/"$dir" - done + # Check all those architectures actually exist (if this EESSI version has been populated already) + if [ -d ${EESSI_PREFIX}/software/linux ]; then + for dir in $(echo "$CPU_ARCHES" | tr ':' '\n'); do + # Search all EESSI versions as we may drop support at some point + ls -d ${EESSI_PREFIX}/software/linux/${dir} + done + fi diff --git a/.github/workflows/tests_init.yml b/.github/workflows/tests_init.yml index 417b7851f1..053acb9730 100644 --- a/.github/workflows/tests_init.yml +++ b/.github/workflows/tests_init.yml @@ -12,17 +12,17 @@ jobs: fail-fast: false steps: - name: checkout - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: set up Python - uses: actions/setup-python@13ae5bb136fac2878aff31522b9efb785519f984 # v4.3.0 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: python-version: ${{matrix.python}} architecture: x64 - name: install Python packages run: | - pip install archspec pytest + pip install archspec==0.2.2 pytest - name: unit tests for eessi_software_subdir_for_host.py script run: diff --git a/.github/workflows/tests_readme.yml b/.github/workflows/tests_readme.yml index 5c6d0318d4..efdb796e5e 100644 --- a/.github/workflows/tests_readme.yml +++ b/.github/workflows/tests_readme.yml @@ -7,8 +7,6 @@ on: - init/eessi_defaults pull_request: - branches: - - main paths: - README.md - init/eessi_defaults @@ -19,12 +17,12 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Check out software-layer repository - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - name: verify if README.md is consistent with EESSI_PILOT_VERSION from init/eessi_defaults + - name: verify if README.md is consistent with EESSI_VERSION from init/eessi_defaults run: | source init/eessi_defaults - grep "${EESSI_PILOT_VERSION}" README.md + grep "${EESSI_VERSION}" README.md - name: verify if README.md is consistent with EESSI_CVMFS_REPO from init/eessi_defaults run: | diff --git a/.github/workflows/tests_scripts.yml b/.github/workflows/tests_scripts.yml index 5c0b3893ae..4944d9beaa 100644 --- a/.github/workflows/tests_scripts.yml +++ b/.github/workflows/tests_scripts.yml @@ -5,7 +5,8 @@ on: paths: - build_container.sh - create_directory_tarballs.sh - - EESSI-pilot-install-software.sh + - create_lmodsitepackage.py + - EESSI-install-software.sh - install_software_layer.sh - load_easybuild_module.sh - run_in_compat_layer_env.sh @@ -13,12 +14,11 @@ on: - update_lmod_cache.sh pull_request: - branches: - - main paths: - build_container.sh - create_directory_tarballs.sh - - EESSI-pilot-install-software.sh + - create_lmodsitepackage.py + - EESSI-install-software.sh - install_software_layer.sh - load_easybuild_module.sh - run_in_compat_layer_env.sh @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: checkout - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: install Apptainer run: | @@ -42,7 +42,10 @@ jobs: # bind current directory into container as /software-layer export SINGULARITY_BIND="${PWD}:/software-layer" - for EB_VERSION in '4.5.0' '4.5.1' '4.7.2'; do + # can't test with EasyBuild versions older than v4.5.2 when using EESSI 2023.06, + # since Python in compat layer is Python 3.11.x; + # testing with a single EasyBuild version takes a while in GitHub Actions, so stick to a single sensible version + for EB_VERSION in '4.6.0'; do # Create script that uses load_easybuild_module.sh which we can run in compat layer environment # note: Be careful with single vs double quotes below! # ${EB_VERSION} should be expanded, so use double quotes; @@ -78,20 +81,41 @@ jobs: - name: test install_software_layer.sh script run: | - # scripts need to be copied to /tmp, - # since install_software_layer.sh must be accessible from within build container - cp -a * /tmp/ - cd /tmp + # bind current directory into container as /software-layer + export SINGULARITY_BIND="${PWD}:/software-layer" # force using x86_64/generic, to avoid triggering an installation from scratch - sed -i "s@./EESSI-pilot-install-software.sh@\"export EESSI_SOFTWARE_SUBDIR_OVERRIDE='x86_64/generic'; ./EESSI-pilot-install-software.sh\"@g" install_software_layer.sh - ./build_container.sh run /tmp/$USER/EESSI /tmp/install_software_layer.sh + sed -i "s@./EESSI-install-software.sh@\"export EESSI_SOFTWARE_SUBDIR_OVERRIDE='x86_64/generic'; ./EESSI-install-software.sh\"@g" install_software_layer.sh + # skip installation of CUDA SDKs, since this is too heavy for CI + sed -i "s@./EESSI-install-software.sh@./EESSI-install-software.sh --skip-cuda-install@g" install_software_layer.sh + ./eessi_container.sh --mode run --verbose /software-layer/install_software_layer.sh - name: test create_directory_tarballs.sh script run: | + # bind current directory into container as /software-layer + export SINGULARITY_BIND="${PWD}:/software-layer" # scripts need to be copied to /tmp, # since create_directory_tarballs.sh must be accessible from within build container - cp -a * /tmp/ - cd /tmp - ./build_container.sh run /tmp/$USER/EESSI /tmp/create_directory_tarballs.sh 2021.12 + ./eessi_container.sh --mode run --verbose /software-layer/create_directory_tarballs.sh 2023.06 # check if tarballs have been produced ls -l *.tar.gz + + - name: test create_lmodsitepackage.py script + run: | + # bind current directory into container as /software-layer + export SINGULARITY_BIND="${PWD}:/software-layer" + + # Creates .lmod/SitePackage.lua in current dir, which then gets bind-mounted into /software-layer + python3 create_lmodsitepackage.py . + # run some commands to make sure that generated Lmod SitePackage file works + test_script="${PWD}/test_lmod_sitepackage.sh" + echo '#!/bin/bash' > ${test_script} + echo 'export LMOD_PACKAGE_PATH="/software-layer/.lmod"' > ${test_script} + echo 'ml --config' >> ${test_script} + + chmod u+x ${test_script} + + out="${PWD}/test_create_lmodsitepackage.out" + ./eessi_container.sh --mode run --verbose /software-layer/run_in_compat_layer_env.sh /software-layer/test_lmod_sitepackage.sh 2>&1 | tee ${out} + for pattern in "^Site Pkg location.*/software-layer/.lmod/SitePackage.lua" "LMOD_SITEPACKAGE_LOCATION.*/software-layer/.lmod/SitePackage.lua"; do + grep "${pattern}" ${out} || (echo "Pattern '${pattern}' not found in output!" && exit 1) + done diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh new file mode 100755 index 0000000000..567fed8e79 --- /dev/null +++ b/EESSI-install-software.sh @@ -0,0 +1,284 @@ +#!/bin/bash +# +# Script to install EESSI software stack (version set through init/eessi_defaults) + +# see example parsing of command line arguments at +# https://wiki.bash-hackers.org/scripting/posparams#using_a_while_loop +# https://stackoverflow.com/questions/192249/how-do-i-parse-command-line-arguments-in-bash + +display_help() { + echo "usage: $0 [OPTIONS]" + echo " --build-logs-dir - location to copy EasyBuild logs to for failed builds" + echo " -g | --generic - instructs script to build for generic architecture target" + echo " -h | --help - display this usage information" + echo " -x | --http-proxy URL - provides URL for the environment variable http_proxy" + echo " -y | --https-proxy URL - provides URL for the environment variable https_proxy" + echo " --shared-fs-path - path to directory on shared filesystem that can be used" + echo " --skip-cuda-install - disable installing a full CUDA SDK in the host_injections prefix (e.g. in CI)" +} + +function copy_build_log() { + # copy specified build log to specified directory, with some context added + build_log=${1} + build_logs_dir=${2} + + # also copy to build logs directory, if specified + if [ ! -z "${build_logs_dir}" ]; then + log_filename="$(basename ${build_log})" + if [ ! -z "${SLURM_JOB_ID}" ]; then + # use subdirectory for build log in context of a Slurm job + build_log_path="${build_logs_dir}/jobs/${SLURM_JOB_ID}/${log_filename}" + else + build_log_path="${build_logs_dir}/non-jobs/${log_filename}" + fi + mkdir -p $(dirname ${build_log_path}) + cp -a ${build_log} ${build_log_path} + chmod 0644 ${build_log_path} + + # add context to end of copied log file + echo >> ${build_log_path} + echo "Context from which build log was copied:" >> ${build_log_path} + echo "- original path of build log: ${build_log}" >> ${build_log_path} + echo "- working directory: ${PWD}" >> ${build_log_path} + echo "- Slurm job ID: ${SLURM_OUT}" >> ${build_log_path} + echo "- EasyBuild version: ${eb_version}" >> ${build_log_path} + echo "- easystack file: ${easystack_file}" >> ${build_log_path} + + echo "EasyBuild log file ${build_log} copied to ${build_log_path} (with context appended)" + fi +} + +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case $1 in + -g|--generic) + EASYBUILD_OPTARCH="GENERIC" + shift + ;; + -h|--help) + display_help # Call your function + # no shifting needed here, we're done. + exit 0 + ;; + -x|--http-proxy) + export http_proxy="$2" + shift 2 + ;; + -y|--https-proxy) + export https_proxy="$2" + shift 2 + ;; + --build-logs-dir) + export build_logs_dir="${2}" + shift 2 + ;; + --shared-fs-path) + export shared_fs_path="${2}" + shift 2 + ;; + --skip-cuda-install) + export skip_cuda_install=True + shift 1 + ;; + -*|--*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +TOPDIR=$(dirname $(realpath $0)) + +source $TOPDIR/scripts/utils.sh + +# honor $TMPDIR if it is already defined, use /tmp otherwise +if [ -z $TMPDIR ]; then + export WORKDIR=/tmp/$USER +else + export WORKDIR=$TMPDIR/$USER +fi + +TMPDIR=$(mktemp -d) + +echo ">> Setting up environment..." + +source $TOPDIR/init/minimal_eessi_env + +if [ -d $EESSI_CVMFS_REPO ]; then + echo_green "$EESSI_CVMFS_REPO available, OK!" +else + fatal_error "$EESSI_CVMFS_REPO is not available!" +fi + +# make sure we're in Prefix environment by checking $SHELL +if [[ ${SHELL} = ${EPREFIX}/bin/bash ]]; then + echo_green ">> It looks like we're in a Gentoo Prefix environment, good!" +else + fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!" +fi + +# avoid that pyc files for EasyBuild are stored in EasyBuild installation directory +export PYTHONPYCACHEPREFIX=$TMPDIR/pycache + +DETECTION_PARAMETERS='' +GENERIC=0 +EB='eb' +if [[ "$EASYBUILD_OPTARCH" == "GENERIC" ]]; then + echo_yellow ">> GENERIC build requested, taking appropriate measures!" + DETECTION_PARAMETERS="$DETECTION_PARAMETERS --generic" + GENERIC=1 + EB='eb --optarch=GENERIC' +fi + +echo ">> Determining software subdirectory to use for current build host..." +if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) + echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script" +else + echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" +fi + +# Set all the EESSI environment variables (respecting $EESSI_SOFTWARE_SUBDIR_OVERRIDE) +# $EESSI_SILENT - don't print any messages +# $EESSI_BASIC_ENV - give a basic set of environment variables +EESSI_SILENT=1 EESSI_BASIC_ENV=1 source $TOPDIR/init/eessi_environment_variables + +if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then + fatal_error "Failed to determine software subdirectory?!" +elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then + fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!" +else + echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!" +fi + +echo ">> Initializing Lmod..." +source $EPREFIX/usr/share/Lmod/init/bash +ml_version_out=$TMPDIR/ml.out +ml --version &> $ml_version_out +if [[ $? -eq 0 ]]; then + echo_green ">> Found Lmod ${LMOD_VERSION}" +else + fatal_error "Failed to initialize Lmod?! (see output in ${ml_version_out}" +fi + +echo ">> Configuring EasyBuild..." +source $TOPDIR/configure_easybuild + +if [ ! -z "${shared_fs_path}" ]; then + shared_eb_sourcepath=${shared_fs_path}/easybuild/sources + echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path" + export EASYBUILD_SOURCEPATH=${shared_eb_sourcepath}:${EASYBUILD_SOURCEPATH} +fi + +echo ">> Setting up \$MODULEPATH..." +# make sure no modules are loaded +module --force purge +# ignore current $MODULEPATH entirely +module unuse $MODULEPATH +module use $EASYBUILD_INSTALLPATH/modules/all +if [[ -z ${MODULEPATH} ]]; then + fatal_error "Failed to set up \$MODULEPATH?!" +else + echo_green ">> MODULEPATH set up: ${MODULEPATH}" +fi + +# assume there's only one diff file that corresponds to the PR patch file +pr_diff=$(ls [0-9]*.diff | head -1) + +# install any additional required scripts +# order is important: these are needed to install a full CUDA SDK in host_injections +# for now, this just reinstalls all scripts. Note the most elegant, but works +${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} + +# Install full CUDA SDK in host_injections +# Hardcode this for now, see if it works +# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install +# Allow skipping CUDA SDK install in e.g. CI environments +if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula +else + echo "Skipping installation of CUDA SDK in host_injections, since the --skip-cuda-install flag was passed" +fi + +# Install drivers in host_injections +# TODO: this is commented out for now, because the script assumes that nvidia-smi is available and works; +# if not, an error is produced, and the bot flags the whole build as failed (even when not installing GPU software) +# ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh + +# use PR patch file to determine in which easystack files stuff was added +changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing') +if [ -z "${changed_easystacks}" ]; then + echo "No missing installations, party time!" # Ensure the bot report success, as there was nothing to be build here +else + + # first process rebuilds, if any, then easystack files for new installations + # "|| true" is used to make sure that the grep command always returns success + rebuild_easystacks=$(echo "${changed_easystacks}" | (grep "/rebuilds/" || true)) + new_easystacks=$(echo "${changed_easystacks}" | (grep -v "/rebuilds/" || true)) + for easystack_file in ${rebuild_easystacks} ${new_easystacks}; do + + echo -e "Processing easystack file ${easystack_file}...\n\n" + + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file + eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') + + # load EasyBuild module (will be installed if it's not available yet) + source ${TOPDIR}/load_easybuild_module.sh ${eb_version} + + ${EB} --show-config + + echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..." + + if [ -f ${easystack_file} ]; then + echo_green "Feeding easystack file ${easystack_file} to EasyBuild..." + + ${EB} --easystack ${TOPDIR}/${easystack_file} --robot + ec=$? + + # copy EasyBuild log file if EasyBuild exited with an error + if [ ${ec} -ne 0 ]; then + eb_last_log=$(unset EB_VERBOSE; eb --last-log) + # copy to current working directory + cp -a ${eb_last_log} . + echo "Last EasyBuild log file copied from ${eb_last_log} to ${PWD}" + # copy to build logs dir (with context added) + copy_build_log "${eb_last_log}" "${build_logs_dir}" + fi + + $TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file} ${TOPDIR}/${pr_diff} + else + fatal_error "Easystack file ${easystack_file} not found!" + fi + + done +fi + +### add packages here + +echo ">> Creating/updating Lmod RC file..." +export LMOD_CONFIG_DIR="${EASYBUILD_INSTALLPATH}/.lmod" +lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua" +lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?) +if [ ! -f $lmod_rc_file ] || [ ${lmodrc_changed} == '0' ]; then + python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH} + check_exit_code $? "$lmod_rc_file created" "Failed to create $lmod_rc_file" +fi + +echo ">> Creating/updating Lmod SitePackage.lua ..." +export LMOD_PACKAGE_PATH="${EASYBUILD_INSTALLPATH}/.lmod" +lmod_sitepackage_file="$LMOD_PACKAGE_PATH/SitePackage.lua" +sitepackage_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodsitepackage.py$' > /dev/null; echo $?) +if [ ! -f "$lmod_sitepackage_file" ] || [ "${sitepackage_changed}" == '0' ]; then + python3 $TOPDIR/create_lmodsitepackage.py ${EASYBUILD_INSTALLPATH} + check_exit_code $? "$lmod_sitepackage_file created" "Failed to create $lmod_sitepackage_file" +fi + +echo ">> Cleaning up ${TMPDIR}..." +rm -r ${TMPDIR} diff --git a/EESSI-pilot-install-software.sh b/EESSI-pilot-install-software.sh deleted file mode 100755 index d9bcf20231..0000000000 --- a/EESSI-pilot-install-software.sh +++ /dev/null @@ -1,384 +0,0 @@ -#!/bin/bash -# -# Script to install EESSI pilot software stack (version set through init/eessi_defaults) - -# see example parsing of command line arguments at -# https://wiki.bash-hackers.org/scripting/posparams#using_a_while_loop -# https://stackoverflow.com/questions/192249/how-do-i-parse-command-line-arguments-in-bash - -display_help() { - echo "usage: $0 [OPTIONS]" - echo " -g | --generic - instructs script to build for generic architecture target" - echo " -h | --help - display this usage information" - echo " -x | --http-proxy URL - provides URL for the environment variable http_proxy" - echo " -y | --https-proxy URL - provides URL for the environment variable https_proxy" -} - -POSITIONAL_ARGS=() - -while [[ $# -gt 0 ]]; do - case $1 in - -g|--generic) - EASYBUILD_OPTARCH="GENERIC" - shift - ;; - -h|--help) - display_help # Call your function - # no shifting needed here, we're done. - exit 0 - ;; - -x|--http-proxy) - export http_proxy="$2" - shift 2 - ;; - -y|--https-proxy) - export https_proxy="$2" - shift 2 - ;; - -*|--*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) # No more options - POSITIONAL_ARGS+=("$1") # save positional arg - shift - ;; - esac -done - -set -- "${POSITIONAL_ARGS[@]}" - -TOPDIR=$(dirname $(realpath $0)) - -source $TOPDIR/scripts/utils.sh - -# honor $TMPDIR if it is already defined, use /tmp otherwise -if [ -z $TMPDIR ]; then - export WORKDIR=/tmp/$USER -else - export WORKDIR=$TMPDIR/$USER -fi - -TMPDIR=$(mktemp -d) - -echo ">> Setting up environment..." - -source $TOPDIR/init/minimal_eessi_env - -if [ -d $EESSI_CVMFS_REPO ]; then - echo_green "$EESSI_CVMFS_REPO available, OK!" -else - fatal_error "$EESSI_CVMFS_REPO is not available!" -fi - -# make sure we're in Prefix environment by checking $SHELL -if [[ ${SHELL} = ${EPREFIX}/bin/bash ]]; then - echo_green ">> It looks like we're in a Gentoo Prefix environment, good!" -else - fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!" -fi - -# avoid that pyc files for EasyBuild are stored in EasyBuild installation directory -export PYTHONPYCACHEPREFIX=$TMPDIR/pycache - -DETECTION_PARAMETERS='' -GENERIC=0 -EB='eb' -if [[ "$EASYBUILD_OPTARCH" == "GENERIC" ]]; then - echo_yellow ">> GENERIC build requested, taking appropriate measures!" - DETECTION_PARAMETERS="$DETECTION_PARAMETERS --generic" - GENERIC=1 - EB='eb --optarch=GENERIC' -fi - -echo ">> Determining software subdirectory to use for current build host..." -if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then - export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) - echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script" -else - echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" -fi - -# Set all the EESSI environment variables (respecting $EESSI_SOFTWARE_SUBDIR_OVERRIDE) -# $EESSI_SILENT - don't print any messages -# $EESSI_BASIC_ENV - give a basic set of environment variables -EESSI_SILENT=1 EESSI_BASIC_ENV=1 source $TOPDIR/init/eessi_environment_variables - -if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then - fatal_error "Failed to determine software subdirectory?!" -elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then - fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!" -else - echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!" -fi - -echo ">> Initializing Lmod..." -source $EPREFIX/usr/share/Lmod/init/bash -ml_version_out=$TMPDIR/ml.out -ml --version &> $ml_version_out -if [[ $? -eq 0 ]]; then - echo_green ">> Found Lmod ${LMOD_VERSION}" -else - fatal_error "Failed to initialize Lmod?! (see output in ${ml_version_out}" -fi - -echo ">> Configuring EasyBuild..." -source $TOPDIR/configure_easybuild - -echo ">> Setting up \$MODULEPATH..." -# make sure no modules are loaded -module --force purge -# ignore current $MODULEPATH entirely -module unuse $MODULEPATH -module use $EASYBUILD_INSTALLPATH/modules/all -if [[ -z ${MODULEPATH} ]]; then - fatal_error "Failed to set up \$MODULEPATH?!" -else - echo_green ">> MODULEPATH set up: ${MODULEPATH}" -fi - -REQ_EB_VERSION='4.5.0' - -# load EasyBuild module (will be installed if it's not available yet) -source ${TOPDIR}/load_easybuild_module.sh ${REQ_EB_VERSION} - -echo_green "All set, let's start installing some software in ${EASYBUILD_INSTALLPATH}..." - -# install Java with fixed custom easyblock that uses patchelf to ensure right glibc is picked up, -# see https://github.com/EESSI/software-layer/issues/123 -# and https://github.com/easybuilders/easybuild-easyblocks/pull/2557 -ok_msg="Java installed, off to a good (?) start!" -fail_msg="Failed to install Java, woopsie..." -$EB Java-11.eb --robot --include-easyblocks-from-pr 2557 -check_exit_code $? "${ok_msg}" "${fail_msg}" - -# install GCC for foss/2020a -export GCC_EC="GCC-9.3.0.eb" -echo ">> Starting slow with ${GCC_EC}..." -ok_msg="${GCC_EC} installed, yippy! Off to a good start..." -fail_msg="Installation of ${GCC_EC} failed!" -# pull in easyconfig from https://github.com/easybuilders/easybuild-easyconfigs/pull/14453, -# which includes patch to fix build of GCC 9.3 when recent kernel headers are in place -$EB ${GCC_EC} --robot --from-pr 14453 GCCcore-9.3.0.eb -check_exit_code $? "${ok_msg}" "${fail_msg}" - -# install CMake with custom easyblock that patches CMake when --sysroot is used -echo ">> Install CMake with fixed easyblock to take into account --sysroot" -ok_msg="CMake installed!" -fail_msg="Installation of CMake failed, what the ..." -$EB CMake-3.16.4-GCCcore-9.3.0.eb --robot --include-easyblocks-from-pr 2248 -check_exit_code $? "${ok_msg}" "${fail_msg}" - -# If we're building OpenBLAS for GENERIC, we need https://github.com/easybuilders/easybuild-easyblocks/pull/1946 -echo ">> Installing OpenBLAS..." -ok_msg="Done with OpenBLAS!" -fail_msg="Installation of OpenBLAS failed!" -if [[ $GENERIC -eq 1 ]]; then - echo_yellow ">> Using https://github.com/easybuilders/easybuild-easyblocks/pull/1946 to build generic OpenBLAS." - openblas_include_easyblocks_from_pr="--include-easyblocks-from-pr 1946" -else - openblas_include_easyblocks_from_pr='' -fi -$EB $openblas_include_easyblocks_from_pr OpenBLAS-0.3.9-GCC-9.3.0.eb --robot -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing OpenMPI..." -ok_msg="OpenMPI installed, w00!" -fail_msg="Installation of OpenMPI failed, that's not good..." -$EB OpenMPI-4.0.3-GCC-9.3.0.eb --robot -check_exit_code $? "${ok_msg}" "${fail_msg}" - -# install Python -echo ">> Install Python 2.7.18 and Python 3.8.2..." -ok_msg="Python 2.7.18 and 3.8.2 installed, yaay!" -fail_msg="Installation of Python failed, oh no..." -$EB Python-2.7.18-GCCcore-9.3.0.eb Python-3.8.2-GCCcore-9.3.0.eb --robot -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing Perl..." -ok_msg="Perl installed, making progress..." -fail_msg="Installation of Perl failed, this never happens..." -# use enhanced Perl easyblock from https://github.com/easybuilders/easybuild-easyblocks/pull/2640 -# to avoid trouble when using long installation prefix (for example with EESSI pilot 2021.12 on skylake_avx512...) -$EB Perl-5.30.2-GCCcore-9.3.0.eb --robot --include-easyblocks-from-pr 2640 -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing Qt5..." -ok_msg="Qt5 installed, phieuw, that was a big one!" -fail_msg="Installation of Qt5 failed, that's frustrating..." -$EB Qt5-5.14.1-GCCcore-9.3.0.eb --robot -check_exit_code $? "${ok_msg}" "${fail_msg}" - -# skip test step when installing SciPy-bundle on aarch64, -# to dance around problem with broken numpy tests; -# cfr. https://github.com/easybuilders/easybuild-easyconfigs/issues/11959 -echo ">> Installing SciPy-bundle" -ok_msg="SciPy-bundle installed, yihaa!" -fail_msg="SciPy-bundle installation failed, bummer..." -SCIPY_EC=SciPy-bundle-2020.03-foss-2020a-Python-3.8.2.eb -if [[ "$(uname -m)" == "aarch64" ]]; then - $EB $SCIPY_EC --robot --skip-test-step -else - $EB $SCIPY_EC --robot -fi -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing GROMACS..." -ok_msg="GROMACS installed, wow!" -fail_msg="Installation of GROMACS failed, damned..." -$EB GROMACS-2020.1-foss-2020a-Python-3.8.2.eb GROMACS-2020.4-foss-2020a-Python-3.8.2.eb --robot -check_exit_code $? "${ok_msg}" "${fail_msg}" - -# note: compiling OpenFOAM is memory hungry (16GB is not enough with 8 cores)! -# 32GB is sufficient to build with 16 cores -echo ">> Installing OpenFOAM (twice!)..." -ok_msg="OpenFOAM installed, now we're talking!" -fail_msg="Installation of OpenFOAM failed, we were so close..." -$EB OpenFOAM-8-foss-2020a.eb OpenFOAM-v2006-foss-2020a.eb --robot -check_exit_code $? "${ok_msg}" "${fail_msg}" - -if [ ! "${EESSI_CPU_FAMILY}" = "ppc64le" ]; then - echo ">> Installing QuantumESPRESSO..." - ok_msg="QuantumESPRESSO installed, let's go quantum!" - fail_msg="Installation of QuantumESPRESSO failed, did somebody observe it?!" - $EB QuantumESPRESSO-6.6-foss-2020a.eb --robot - check_exit_code $? "${ok_msg}" "${fail_msg}" -fi - -echo ">> Installing R 4.0.0 (better be patient)..." -ok_msg="R installed, wow!" -fail_msg="Installation of R failed, so sad..." -$EB R-4.0.0-foss-2020a.eb --robot --parallel-extensions-install --experimental -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing Bioconductor 3.11 bundle..." -ok_msg="Bioconductor installed, enjoy!" -fail_msg="Installation of Bioconductor failed, that's annoying..." -$EB R-bundle-Bioconductor-3.11-foss-2020a-R-4.0.0.eb --robot -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing TensorFlow 2.3.1..." -ok_msg="TensorFlow 2.3.1 installed, w00!" -fail_msg="Installation of TensorFlow failed, why am I not surprised..." -$EB TensorFlow-2.3.1-foss-2020a-Python-3.8.2.eb --robot --include-easyblocks-from-pr 2218 -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing Horovod 0.21.3..." -ok_msg="Horovod installed! Go do some parallel training!" -fail_msg="Horovod installation failed. There comes the headache..." -$EB Horovod-0.21.3-foss-2020a-TensorFlow-2.3.1-Python-3.8.2.eb --robot -check_exit_code $? "${ok_msg}" "${fail_msg}" - -if [ ! "${EESSI_CPU_FAMILY}" = "ppc64le" ]; then - - echo ">> Installing code-server 3.7.3..." - ok_msg="code-server 3.7.3 installed, now you can use VS Code!" - fail_msg="Installation of code-server failed, that's going to be hard to fix..." - $EB code-server-3.7.3.eb --robot - check_exit_code $? "${ok_msg}" "${fail_msg}" -fi - -echo ">> Installing RStudio-Server 1.3.1093..." -ok_msg="RStudio-Server installed, enjoy!" -fail_msg="Installation of RStudio-Server failed, might be OS deps..." -$EB RStudio-Server-1.3.1093-foss-2020a-Java-11-R-4.0.0.eb --robot -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing OSU-Micro-Benchmarks 5.6.3..." -ok_msg="OSU-Micro-Benchmarks installed, yihaa!" -fail_msg="Installation of OSU-Micro-Benchmarks failed, that's unexpected..." -$EB OSU-Micro-Benchmarks-5.6.3-gompi-2020a.eb -r -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing Spark 3.1.1..." -ok_msg="Spark installed, set off the fireworks!" -fail_msg="Installation of Spark failed, no fireworks this time..." -$EB Spark-3.1.1-foss-2020a-Python-3.8.2.eb -r -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing IPython 7.15.0..." -ok_msg="IPython installed, launch your Jupyter Notebooks!" -fail_msg="Installation of IPython failed, that's unexpected..." -$EB IPython-7.15.0-foss-2020a-Python-3.8.2.eb -r -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing WRF 3.9.1.1..." -ok_msg="WRF installed, it's getting hot in here!" -fail_msg="Installation of WRF failed, that's unexpected..." -OMPI_MCA_pml=ucx UCX_TLS=tcp $EB WRF-3.9.1.1-foss-2020a-dmpar.eb -r --include-easyblocks-from-pr 2648 -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing R 4.1.0 (better be patient)..." -ok_msg="R installed, wow!" -fail_msg="Installation of R failed, so sad..." -$EB --from-pr 14821 X11-20210518-GCCcore-10.3.0.eb -r && $EB --from-pr 16011 R-4.1.0-foss-2021a.eb --robot --parallel-extensions-install --experimental -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing Nextflow 22.10.1..." -ok_msg="Nextflow installed, the work must flow..." -fail_msg="Installation of Nextflow failed, that's unexpected..." -$EB -r --from-pr 16531 Nextflow-22.10.1.eb -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing OSU-Micro-Benchmarks/5.7.1-gompi-2021a..." -ok_msg="OSU-Micro-Benchmarks installed, yihaa!" -fail_msg="Installation of OSU-Micro-Benchmarks failed, that's unexpected..." -$EB OSU-Micro-Benchmarks-5.7.1-gompi-2021a.eb -r -check_exit_code $? "${ok_msg}" "${fail_msg}" - -echo ">> Installing EasyBuild 4.5.1..." -ok_msg="EasyBuild v4.5.1 installed" -fail_msg="EasyBuild v4.5.1 failed to install" -$EB --from-pr 14545 --include-easyblocks-from-pr 2805 -check_exit_code $? "${ok_msg}" "${fail_msg}" - -LMOD_IGNORE_CACHE=1 module swap EasyBuild/4.5.1 -check_exit_code $? "Swapped to EasyBuild/4.5.1" "Couldn't swap to EasyBuild/4.5.1" - -echo ">> Installing SciPy-bundle with foss/2021a..." -ok_msg="SciPy-bundle with foss/2021a installed, welcome to the modern age" -fail_msg="Installation of SciPy-bundle with foss/2021a failed, back to the stone age..." -# use GCCcore easyconfig from https://github.com/easybuilders/easybuild-easyconfigs/pull/14454 -# which includes patch to fix installation with recent Linux kernel headers -$EB --from-pr 14454 GCCcore-10.3.0.eb --robot -# use enhanced Perl easyblock from https://github.com/easybuilders/easybuild-easyblocks/pull/2640 -# to avoid trouble when using long installation prefix (for example with EESSI pilot 2021.12 on skylake_avx512...) -$EB Perl-5.32.1-GCCcore-10.3.0.eb --robot --include-easyblocks-from-pr 2640 -# use enhanced CMake easyblock to patch CMake's UnixPaths.cmake script if --sysroot is set -# from https://github.com/easybuilders/easybuild-easyblocks/pull/2248 -$EB CMake-3.20.1-GCCcore-10.3.0.eb --robot --include-easyblocks-from-pr 2248 -# use Rust easyconfig from https://github.com/easybuilders/easybuild-easyconfigs/pull/14584 -# that includes patch to fix bootstrap problem when using alternate sysroot -$EB --from-pr 14584 Rust-1.52.1-GCCcore-10.3.0.eb --robot -# use OpenBLAS easyconfig from https://github.com/easybuilders/easybuild-easyconfigs/pull/15885 -# which includes a patch to fix installation on POWER -$EB $openblas_include_easyblocks_from_pr --from-pr 15885 OpenBLAS-0.3.15-GCC-10.3.0.eb --robot -# ignore failing FlexiBLAS tests when building on POWER; -# some tests are failing due to a segmentation fault due to "invalid memory reference", -# see also https://github.com/easybuilders/easybuild-easyconfigs/pull/12476; -# using -fstack-protector-strong -fstack-clash-protection should fix that, -# but it doesn't for some reason when building for ppc64le/generic... -if [ "${EESSI_SOFTWARE_SUBDIR}" = "ppc64le/generic" ]; then - $EB FlexiBLAS-3.0.4-GCC-10.3.0.eb --ignore-test-failure -else - $EB FlexiBLAS-3.0.4-GCC-10.3.0.eb -fi - -$EB SciPy-bundle-2021.05-foss-2021a.eb --robot -check_exit_code $? "${ok_msg}" "${fail_msg}" - -### add packages here - -echo ">> Creating/updating Lmod cache..." -export LMOD_RC="${EASYBUILD_INSTALLPATH}/.lmod/lmodrc.lua" -if [ ! -f $LMOD_RC ]; then - python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH} - check_exit_code $? "$LMOD_RC created" "Failed to create $LMOD_RC" -fi - -$TOPDIR/update_lmod_cache.sh ${EPREFIX} ${EASYBUILD_INSTALLPATH} - -$TOPDIR/check_missing_installations.sh - -echo ">> Cleaning up ${TMPDIR}..." -rm -r ${TMPDIR} diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh new file mode 100755 index 0000000000..446a156cb8 --- /dev/null +++ b/EESSI-remove-software.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# +# Script to remove part of the EESSI software stack (version set through init/eessi_defaults) + +# see example parsing of command line arguments at +# https://wiki.bash-hackers.org/scripting/posparams#using_a_while_loop +# https://stackoverflow.com/questions/192249/how-do-i-parse-command-line-arguments-in-bash + +display_help() { + echo "usage: $0 [OPTIONS]" + echo " -g | --generic - instructs script to build for generic architecture target" + echo " -h | --help - display this usage information" +} + +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case $1 in + -g|--generic) + DETECTION_PARAMETERS="--generic" + shift + ;; + -h|--help) + display_help # Call your function + # no shifting needed here, we're done. + exit 0 + ;; + -*|--*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +TOPDIR=$(dirname $(realpath $0)) + +export TMPDIR=$(mktemp -d /tmp/eessi-remove.XXXXXXXX) + +source $TOPDIR/scripts/utils.sh + +echo ">> Determining software subdirectory to use for current build host..." +if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) + echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script" +else + echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" +fi + +echo ">> Setting up environment..." + +source $TOPDIR/init/bash + +if [ -d $EESSI_CVMFS_REPO ]; then + echo_green "$EESSI_CVMFS_REPO available, OK!" +else + fatal_error "$EESSI_CVMFS_REPO is not available!" +fi + +if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then + fatal_error "Failed to determine software subdirectory?!" +elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then + fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!" +else + echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!" +fi + +echo ">> Configuring EasyBuild..." +EB="eb" +source $TOPDIR/configure_easybuild + +echo ">> Setting up \$MODULEPATH..." +# make sure no modules are loaded +module --force purge +# ignore current $MODULEPATH entirely +module unuse $MODULEPATH +module use $EASYBUILD_INSTALLPATH/modules/all +if [[ -z ${MODULEPATH} ]]; then + fatal_error "Failed to set up \$MODULEPATH?!" +else + echo_green ">> MODULEPATH set up: ${MODULEPATH}" +fi + +# assume there's only one diff file that corresponds to the PR patch file +pr_diff=$(ls [0-9]*.diff | head -1) + +# if this script is run as root, use PR patch file to determine if software needs to be removed first +if [ $EUID -eq 0 ]; then + changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") + if [ -z ${changed_easystacks_rebuilds} ]; then + echo "No software needs to be removed." + else + for easystack_file in ${changed_easystacks_rebuilds}; do + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file + eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') + + # load EasyBuild module (will be installed if it's not available yet) + source ${TOPDIR}/load_easybuild_module.sh ${eb_version} + + if [ -f ${easystack_file} ]; then + echo_green "Software rebuild(s) requested in ${easystack_file}, so determining which existing installation have to be removed..." + # we need to remove existing installation directories first, + # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) + # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) + rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + for app in ${rebuild_apps}; do + app_dir=${EASYBUILD_INSTALLPATH}/software/${app} + app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua + echo_yellow "Removing ${app_dir} and ${app_module}..." + rm -rf ${app_dir} + rm -rf ${app_module} + done + else + fatal_error "Easystack file ${easystack_file} not found!" + fi + done + fi +else + fatal_error "This script can only be run by root!" +fi diff --git a/README.md b/README.md index daf02eebc2..ab73ad7579 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,22 @@ # Software layer -The software layer of the EESSI project uses [EasyBuild](https://easybuild.readthedocs.io), [Lmod](https://lmod.readthedocs.io) and [archspec](https://archspec.readthedocs.io). +The software layer of the EESSI project uses [EasyBuild](https://docs.easybuild.io), [Lmod](https://lmod.readthedocs.io) and [archspec](https://archspec.readthedocs.io). -See also https://eessi.github.io/docs/software_layer. +See also https://www.eessi.io/docs/software_layer . ## Pilot software stack You can set up your environment by sourcing the init script: ``` -$ source /cvmfs/pilot.eessi-hpc.org/versions/2021.12/init/bash -Found EESSI pilot repo @ /cvmfs/pilot.eessi-hpc.org/versions/2021.12! +$ source /cvmfs/software.eessi.io/versions/2023.06/init/bash +Found EESSI repo @ /cvmfs/software.eessi.io/versions/2023.06! Derived subdirectory for software layer: x86_64/intel/haswell -Using x86_64/intel/haswell subdirectory for software layer (HARDCODED) +Using x86_64/intel/haswell subdirectory for software layer Initializing Lmod... -Prepending /cvmfs/pilot.eessi-hpc.org/versions/2021.12/software/x86_64/intel/haswell/modules/all to $MODULEPATH... -Environment set up to use EESSI pilot software stack, have fun! -[EESSI pilot 2021.12] $ +Prepending /cvmfs/software.eessi.io/versions/2023.06/software/x86_64/intel/haswell/modules/all to $MODULEPATH... +Environment set up to use EESSI (2023.06), have fun! +[EESSI 2023.06] $ ``` ### Accessing EESSI via a container diff --git a/bot/build.sh b/bot/build.sh index c8def2cdd3..dcc61c19d4 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -53,6 +53,33 @@ LOCAL_TMP=$(cfg_get_value "site_config" "local_tmp") echo "bot/build.sh: LOCAL_TMP='${LOCAL_TMP}'" # TODO should local_tmp be mandatory? --> then we check here and exit if it is not provided +# check if path to copy build logs to is specified, so we can copy build logs for failing builds there +BUILD_LOGS_DIR=$(cfg_get_value "site_config" "build_logs_dir") +echo "bot/build.sh: BUILD_LOGS_DIR='${BUILD_LOGS_DIR}'" +# if $BUILD_LOGS_DIR is set, add it to $SINGULARITY_BIND so the path is available in the build container +if [[ ! -z ${BUILD_LOGS_DIR} ]]; then + mkdir -p ${BUILD_LOGS_DIR} + if [[ -z ${SINGULARITY_BIND} ]]; then + export SINGULARITY_BIND="${BUILD_LOGS_DIR}" + else + export SINGULARITY_BIND="${SINGULARITY_BIND},${BUILD_LOGS_DIR}" + fi +fi + +# check if path to directory on shared filesystem is specified, +# and use it as location for source tarballs used by EasyBuild if so +SHARED_FS_PATH=$(cfg_get_value "site_config" "shared_fs_path") +echo "bot/build.sh: SHARED_FS_PATH='${SHARED_FS_PATH}'" +# if $SHARED_FS_PATH is set, add it to $SINGULARITY_BIND so the path is available in the build container +if [[ ! -z ${SHARED_FS_PATH} ]]; then + mkdir -p ${SHARED_FS_PATH} + if [[ -z ${SINGULARITY_BIND} ]]; then + export SINGULARITY_BIND="${SHARED_FS_PATH}" + else + export SINGULARITY_BIND="${SINGULARITY_BIND},${SHARED_FS_PATH}" + fi +fi + SINGULARITY_CACHEDIR=$(cfg_get_value "site_config" "container_cachedir") echo "bot/build.sh: SINGULARITY_CACHEDIR='${SINGULARITY_CACHEDIR}'" if [[ ! -z ${SINGULARITY_CACHEDIR} ]]; then @@ -101,12 +128,12 @@ EESSI_REPOS_CFG_DIR_OVERRIDE=$(cfg_get_value "repository" "repos_cfg_dir") export EESSI_REPOS_CFG_DIR_OVERRIDE=${EESSI_REPOS_CFG_DIR_OVERRIDE:-${PWD}/cfg} echo "bot/build.sh: EESSI_REPOS_CFG_DIR_OVERRIDE='${EESSI_REPOS_CFG_DIR_OVERRIDE}'" -# determine pilot version to be used from .repository.repo_version in ${JOB_CFG_FILE} -# here, just set & export EESSI_PILOT_VERSION_OVERRIDE +# determine EESSI version to be used from .repository.repo_version in ${JOB_CFG_FILE} +# here, just set & export EESSI_VERSION_OVERRIDE # next script (eessi_container.sh) makes use of it via sourcing init scripts # (e.g., init/eessi_defaults or init/minimal_eessi_env) -export EESSI_PILOT_VERSION_OVERRIDE=$(cfg_get_value "repository" "repo_version") -echo "bot/build.sh: EESSI_PILOT_VERSION_OVERRIDE='${EESSI_PILOT_VERSION_OVERRIDE}'" +export EESSI_VERSION_OVERRIDE=$(cfg_get_value "repository" "repo_version") +echo "bot/build.sh: EESSI_VERSION_OVERRIDE='${EESSI_VERSION_OVERRIDE}'" # determine CVMFS repo to be used from .repository.repo_name in ${JOB_CFG_FILE} # here, just set EESSI_CVMFS_REPO_OVERRIDE, a bit further down @@ -141,19 +168,64 @@ COMMON_ARGS+=("--mode" "run") # make sure to use the same parent dir for storing tarballs of tmp PREVIOUS_TMP_DIR=${PWD}/previous_tmp +# prepare arguments to install_software_layer.sh (specific to build step) +declare -a BUILD_STEP_ARGS=() +declare -a INSTALL_SCRIPT_ARGS=() +declare -a REMOVAL_SCRIPT_ARGS=() +if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then + INSTALL_SCRIPT_ARGS+=("--generic") + REMOVAL_SCRIPT_ARGS+=("--generic") +fi +[[ ! -z ${BUILD_LOGS_DIR} ]] && INSTALL_SCRIPT_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}") +[[ ! -z ${SHARED_FS_PATH} ]] && INSTALL_SCRIPT_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}") + +# determine if the removal step has to be run +# assume there's only one diff file that corresponds to the PR patch file +pr_diff=$(ls [0-9]*.diff | head -1) +# the true at the end of the next command is important: grep will expectedly return 1 if there is no easystack file being added under rebuilds, +# but due to "set -e" the entire script would otherwise fail +changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | (grep "/rebuilds/" || true)) +if [[ -z "${changed_easystacks_rebuilds}" ]]; then + echo "This PR does not add any easystack files in a rebuilds subdirectory, so let's skip the removal step." +else + # prepare directory to store tarball of tmp for removal and build steps + TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step + mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} + + # prepare arguments to eessi_container.sh specific to remove step + declare -a REMOVAL_STEP_ARGS=() + REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") + REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") + # add fakeroot option in order to be able to remove software, see: + # https://github.com/EESSI/software-layer/issues/312 + REMOVAL_STEP_ARGS+=("--fakeroot") + + # create tmp file for output of removal step + removal_outerr=$(mktemp remove.outerr.XXXX) + + echo "Executing command to remove software:" + echo "./eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" + echo " -- ./EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" + ./eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ + -- ./EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} + + # make sure that the build step resumes from the same temporary directory + # this is important, as otherwise the removed software will still be there + REMOVAL_TMPDIR=$(grep ' as tmp directory ' ${removal_outerr} | cut -d ' ' -f 2) + BUILD_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") +fi + # prepare directory to store tarball of tmp for build step TARBALL_TMP_BUILD_STEP_DIR=${PREVIOUS_TMP_DIR}/build_step mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR} # prepare arguments to eessi_container.sh specific to build step -declare -a BUILD_STEP_ARGS=() BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}") BUILD_STEP_ARGS+=("--storage" "${STORAGE}") - -# prepare arguments to install_software_layer.sh (specific to build step) -GENERIC_OPT= -if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then - GENERIC_OPT="--generic" +# add options required to handle NVIDIA support +BUILD_STEP_ARGS+=("--nvidia" "all") +if [[ ! -z ${SHARED_FS_PATH} ]]; then + BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi # create tmp file for output of build step @@ -161,9 +233,9 @@ build_outerr=$(mktemp build.outerr.XXXX) echo "Executing command to build software:" echo "./eessi_container.sh ${COMMON_ARGS[@]} ${BUILD_STEP_ARGS[@]}" -echo " -- ./install_software_layer.sh ${GENERIC_OPT} \"$@\" 2>&1 | tee -a ${build_outerr}" +echo " -- ./install_software_layer.sh \"${INSTALL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${build_outerr}" ./eessi_container.sh "${COMMON_ARGS[@]}" "${BUILD_STEP_ARGS[@]}" \ - -- ./install_software_layer.sh ${GENERIC_OPT} "$@" 2>&1 | tee -a ${build_outerr} + -- ./install_software_layer.sh "${INSTALL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${build_outerr} # prepare directory to store tarball of tmp for tarball step TARBALL_TMP_TARBALL_STEP_DIR=${PREVIOUS_TMP_DIR}/tarball_step @@ -177,13 +249,19 @@ declare -a TARBALL_STEP_ARGS=() TARBALL_STEP_ARGS+=("--save" "${TARBALL_TMP_TARBALL_STEP_DIR}") # determine temporary directory to resume from -BUILD_TMPDIR=$(grep ' as tmp directory ' ${build_outerr} | cut -d ' ' -f 2) -TARBALL_STEP_ARGS+=("--resume" "${BUILD_TMPDIR}") +if [[ -z ${REMOVAL_TMPDIR} ]]; then + # no rebuild step was done, so the tarball step should resume from the build directory + BUILD_TMPDIR=$(grep ' as tmp directory ' ${build_outerr} | cut -d ' ' -f 2) + TARBALL_STEP_ARGS+=("--resume" "${BUILD_TMPDIR}") +else + # a removal step was done, so resume from its temporary directory (which was also used for the build step) + TARBALL_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") +fi timestamp=$(date +%s) -# to set EESSI_PILOT_VERSION we need to source init/eessi_defaults now +# to set EESSI_VERSION we need to source init/eessi_defaults now source init/eessi_defaults -export TGZ=$(printf "eessi-%s-software-%s-%s-%d.tar.gz" ${EESSI_PILOT_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${timestamp}) +export TGZ=$(printf "eessi-%s-software-%s-%s-%d.tar.gz" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${timestamp}) # value of first parameter to create_tarball.sh - TMP_IN_CONTAINER - needs to be # synchronised with setting of TMP_IN_CONTAINER in eessi_container.sh @@ -192,8 +270,8 @@ export TGZ=$(printf "eessi-%s-software-%s-%s-%d.tar.gz" ${EESSI_PILOT_VERSION} $ TMP_IN_CONTAINER=/tmp echo "Executing command to create tarball:" echo "./eessi_container.sh ${COMMON_ARGS[@]} ${TARBALL_STEP_ARGS[@]}" -echo " -- ./create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_PILOT_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} /eessi_bot_job/${TGZ} 2>&1 | tee -a ${tar_outerr}" +echo " -- ./create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} /eessi_bot_job/${TGZ} 2>&1 | tee -a ${tar_outerr}" ./eessi_container.sh "${COMMON_ARGS[@]}" "${TARBALL_STEP_ARGS[@]}" \ - -- ./create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_PILOT_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} /eessi_bot_job/${TGZ} 2>&1 | tee -a ${tar_outerr} + -- ./create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} /eessi_bot_job/${TGZ} 2>&1 | tee -a ${tar_outerr} exit 0 diff --git a/bot/check-build.sh b/bot/check-build.sh index ec1ca56bba..d8246c67be 100755 --- a/bot/check-build.sh +++ b/bot/check-build.sh @@ -98,15 +98,15 @@ job_dir=${PWD} job_out="slurm-${SLURM_JOB_ID}.out" [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for job output file(s) matching '"${job_out}"'" if [[ -f ${job_out} ]]; then - SLURM=1 + SLURM_OUTPUT_FOUND=1 [[ ${VERBOSE} -ne 0 ]] && echo " found slurm output file '"${job_out}"'" else - SLURM=0 + SLURM_OUTPUT_FOUND=0 [[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found" fi ERROR=-1 -if [[ ${SLURM} -eq 1 ]]; then +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_error='ERROR: ' grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_error}") [[ $? -eq 0 ]] && ERROR=1 || ERROR=0 @@ -116,7 +116,7 @@ if [[ ${SLURM} -eq 1 ]]; then fi FAILED=-1 -if [[ ${SLURM} -eq 1 ]]; then +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_failed='FAILED: ' grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_failed}") [[ $? -eq 0 ]] && FAILED=1 || FAILED=0 @@ -126,7 +126,7 @@ if [[ ${SLURM} -eq 1 ]]; then fi MISSING=-1 -if [[ ${SLURM} -eq 1 ]]; then +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_req_missing=' required modules missing:' grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_req_missing}") [[ $? -eq 0 ]] && MISSING=1 || MISSING=0 @@ -136,7 +136,7 @@ if [[ ${SLURM} -eq 1 ]]; then fi NO_MISSING=-1 -if [[ ${SLURM} -eq 1 ]]; then +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_no_missing='No missing installations' grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_no_missing}") [[ $? -eq 0 ]] && NO_MISSING=1 || NO_MISSING=0 @@ -147,7 +147,7 @@ fi TGZ=-1 TARBALL= -if [[ ${SLURM} -eq 1 ]]; then +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_tgz_created="\.tar\.gz created!" grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_tgz_created}" | sort -u) if [[ $? -eq 0 ]]; then @@ -169,9 +169,27 @@ fi [[ ${VERBOSE} -ne 0 ]] && echo " NO_MISSING.: $([[ $NO_MISSING -eq 1 ]] && echo 'yes' || echo 'no') (yes)" [[ ${VERBOSE} -ne 0 ]] && echo " TGZ_CREATED: $([[ $TGZ -eq 1 ]] && echo 'yes' || echo 'no') (yes)" +# Here, we try to do some additional analysis on the output file +# to see if we can print a more clear 'reason' for the failure +# For now, we only analyse unmerged EasyConfigs as potential cause, but we can easily add checks for other +# specific scenarios below + +# Check for the pattern being added here by check_missing_installations.sh to the output to +# see if EasyConfigs might have been unmerged, and that's causing a failure +UNMERGED_EASYCONFIG=-1 +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + gp_unmerged="are you sure all PRs referenced have been merged in EasyBuild" + grep_unmerged=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${gp_unmerged}") + [[ $? -eq 0 ]] && UNMERGED_EASYCONFIG=1 || UNMERGED_EASYCONFIG=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${gp_unmerged}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_unmerged}" +fi + job_result_file=_bot_job${SLURM_JOB_ID}.result -if [[ ${SLURM} -eq 1 ]] && \ +# Default reason: +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]] && \ [[ ${ERROR} -eq 0 ]] && \ [[ ${FAILED} -eq 0 ]] && \ [[ ${MISSING} -eq 0 ]] && \ @@ -180,10 +198,17 @@ if [[ ${SLURM} -eq 1 ]] && \ [[ ! -z ${TARBALL} ]]; then # SUCCESS status="SUCCESS" + reason="" summary=":grin: SUCCESS" +elif [[ ${UNMERGED_EASYCONFIG} -eq 1 ]]; then + status="FAILURE" + reason="EasyConfig not found during missing installation check. Are you sure all PRs referenced have been merged in EasyBuild?" + summary=":cry: FAILURE" else # FAILURE status="FAILURE" + # General failure, we don't know a more specific reason + reason="" summary=":cry: FAILURE" fi @@ -253,14 +278,6 @@ fi # ### -# construct and write complete PR comment details: implements third alternative -comment_template="
__SUMMARY_FMT__
__DETAILS_FMT____ARTEFACTS_FMT__
" -comment_summary_fmt="__SUMMARY__ _(click triangle for details)_" -comment_details_fmt="
_Details_
__DETAILS_LIST__
" -comment_success_item_fmt=":white_check_mark: __ITEM__" -comment_failure_item_fmt=":x: __ITEM__" -comment_artefacts_fmt="
_Artefacts_
__ARTEFACTS_LIST__
" -comment_artefact_details_fmt="
__ARTEFACT_SUMMARY____ARTEFACT_DETAILS__
" function print_br_item() { format="${1}" @@ -332,42 +349,66 @@ echo -n "comment_description = " >> ${job_result_file} # - __DETAILS_FMT__ -> variable $comment_details # - __ARTEFACTS_FMT__ -> variable $comment_artefacts +# construct and write complete PR comment details: implements third alternative +comment_template="
__SUMMARY_FMT__
__REASON_FMT____DETAILS_FMT____ARTEFACTS_FMT__
" +comment_success_item_fmt=":white_check_mark: __ITEM__" +comment_failure_item_fmt=":x: __ITEM__" + +# Initialize comment_description +comment_description=${comment_template} + +# Now, start replacing template items one by one +# Replace the summary template (__SUMMARY_FMT__) +comment_summary_fmt="__SUMMARY__ _(click triangle for details)_" comment_summary="${comment_summary_fmt/__SUMMARY__/${summary}}" +comment_description=${comment_template/__SUMMARY_FMT__/${comment_summary}} + +# Only add if there is a reason (e.g. no reason for successful runs) +if [[ ! -z ${reason} ]]; then + comment_reason_fmt="
_Reason_
__REASONS__
" + reason_details="${comment_reason_fmt/__REASONS__/${reason}}" + comment_description=${comment_description/__REASON_FMT__/${reason_details}} +else + comment_description=${comment_description/__REASON_FMT__/""} +fi -# first construct comment_details_list, abbreviated CoDeList +# Replace the details template (__DETAILS_FMT__) +# first construct comment_details_list, abbreviated comment_details_list # then use it to set comment_details -CoDeList="" +comment_details_list="" success_msg="job output file ${job_out}" failure_msg="no job output file ${job_out}" -CoDeList=${CoDeList}$(add_detail ${SLURM} 1 "${success_msg}" "${failure_msg}") +comment_details_list=${comment_details_list}$(add_detail ${SLURM_OUTPUT_FOUND} 1 "${success_msg}" "${failure_msg}") success_msg="no message matching ${GP_error}" failure_msg="found message matching ${GP_error}" -CoDeList=${CoDeList}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}") +comment_details_list=${comment_details_list}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}") success_msg="no message matching ${GP_failed}" failure_msg="found message matching ${GP_failed}" -CoDeList=${CoDeList}$(add_detail ${FAILED} 0 "${success_msg}" "${failure_msg}") +comment_details_list=${comment_details_list}$(add_detail ${FAILED} 0 "${success_msg}" "${failure_msg}") success_msg="no message matching ${GP_req_missing}" failure_msg="found message matching ${GP_req_missing}" -CoDeList=${CoDeList}$(add_detail ${MISSING} 0 "${success_msg}" "${failure_msg}") +comment_details_list=${comment_details_list}$(add_detail ${MISSING} 0 "${success_msg}" "${failure_msg}") success_msg="found message(s) matching ${GP_no_missing}" failure_msg="no message matching ${GP_no_missing}" -CoDeList=${CoDeList}$(add_detail ${NO_MISSING} 1 "${success_msg}" "${failure_msg}") +comment_details_list=${comment_details_list}$(add_detail ${NO_MISSING} 1 "${success_msg}" "${failure_msg}") success_msg="found message matching ${GP_tgz_created}" failure_msg="no message matching ${GP_tgz_created}" -CoDeList=${CoDeList}$(add_detail ${TGZ} 1 "${success_msg}" "${failure_msg}") - -comment_details="${comment_details_fmt/__DETAILS_LIST__/${CoDeList}}" +comment_details_list=${comment_details_list}$(add_detail ${TGZ} 1 "${success_msg}" "${failure_msg}") +# Now, do the actual replacement of __DETAILS_FMT__ +comment_details_fmt="
_Details_
__DETAILS_LIST__
" +comment_details="${comment_details_fmt/__DETAILS_LIST__/${comment_details_list}}" +comment_description=${comment_description/__DETAILS_FMT__/${comment_details}} -# first construct comment_artefacts_list, abbreviated CoArList +# first construct comment_artefacts_list # then use it to set comment_artefacts -CoArList="" +comment_artifacts_list="" # TARBALL should only contain a single tarball if [[ ! -z ${TARBALL} ]]; then @@ -427,50 +468,49 @@ if [[ ! -z ${TARBALL} ]]; then software_pkgs=$(echo "${software_entries}" | sed -e "s@${prefix}/software/@@" | awk -F/ '{if (NR >= 2) {print $1 "/" $2}}' | sort -u) artefact_summary="$(print_code_item '__ITEM__' ${TARBALL})" - CoArList="" - CoArList="${CoArList}$(print_br_item2 'size: __ITEM__ MiB (__ITEM2__ bytes)' ${size_mib} ${size})" - CoArList="${CoArList}$(print_br_item 'entries: __ITEM__' ${entries})" - CoArList="${CoArList}$(print_br_item 'modules under ___ITEM___' ${prefix}/modules/all)" - CoArList="${CoArList}
"
+    comment_artifacts_list=""
+    comment_artifacts_list="${comment_artifacts_list}$(print_br_item2 'size: __ITEM__ MiB (__ITEM2__ bytes)' ${size_mib} ${size})"
+    comment_artifacts_list="${comment_artifacts_list}$(print_br_item 'entries: __ITEM__' ${entries})"
+    comment_artifacts_list="${comment_artifacts_list}$(print_br_item 'modules under ___ITEM___' ${prefix}/modules/all)"
+    comment_artifacts_list="${comment_artifacts_list}
"
     if [[ ! -z ${modules} ]]; then
         while IFS= read -r mod ; do
-            CoArList="${CoArList}$(print_br_item '__ITEM__' ${mod})"
+            comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' ${mod})"
         done <<< "${modules}"
     else
-        CoArList="${CoArList}$(print_br_item '__ITEM__' 'no module files in tarball')"
+        comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' 'no module files in tarball')"
     fi
-    CoArList="${CoArList}
" - CoArList="${CoArList}$(print_br_item 'software under ___ITEM___' ${prefix}/software)" - CoArList="${CoArList}
"
+    comment_artifacts_list="${comment_artifacts_list}
" + comment_artifacts_list="${comment_artifacts_list}$(print_br_item 'software under ___ITEM___' ${prefix}/software)" + comment_artifacts_list="${comment_artifacts_list}
"
     if [[ ! -z ${software_pkgs} ]]; then
         while IFS= read -r sw_pkg ; do
-            CoArList="${CoArList}$(print_br_item '__ITEM__' ${sw_pkg})"
+            comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' ${sw_pkg})"
         done <<< "${software_pkgs}"
     else
-        CoArList="${CoArList}$(print_br_item '__ITEM__' 'no software packages in tarball')"
+        comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' 'no software packages in tarball')"
     fi
-    CoArList="${CoArList}
" - CoArList="${CoArList}$(print_br_item 'other under ___ITEM___' ${prefix})" - CoArList="${CoArList}
"
+    comment_artifacts_list="${comment_artifacts_list}
" + comment_artifacts_list="${comment_artifacts_list}$(print_br_item 'other under ___ITEM___' ${prefix})" + comment_artifacts_list="${comment_artifacts_list}
"
     if [[ ! -z ${other_shortened} ]]; then
         while IFS= read -r other ; do
-            CoArList="${CoArList}$(print_br_item '__ITEM__' ${other})"
+            comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' ${other})"
         done <<< "${other_shortened}"
     else
-        CoArList="${CoArList}$(print_br_item '__ITEM__' 'no other files in tarball')"
+        comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' 'no other files in tarball')"
     fi
-    CoArList="${CoArList}
" + comment_artifacts_list="${comment_artifacts_list}
" else - CoArList="${CoArList}$(print_dd_item 'No artefacts were created or found.' '')" + comment_artifacts_list="${comment_artifacts_list}$(print_dd_item 'No artefacts were created or found.' '')" fi +comment_artefact_details_fmt="
__ARTEFACT_SUMMARY____ARTEFACT_DETAILS__
" comment_artefacts_details="${comment_artefact_details_fmt/__ARTEFACT_SUMMARY__/${artefact_summary}}" -comment_artefacts_details="${comment_artefacts_details/__ARTEFACT_DETAILS__/${CoArList}}" -comment_artefacts="${comment_artefacts_fmt/__ARTEFACTS_LIST__/${comment_artefacts_details}}" +comment_artefacts_details="${comment_artefacts_details/__ARTEFACT_DETAILS__/${comment_artifacts_list}}" -# now put all pieces together creating comment_details from comment_template -comment_description=${comment_template/__SUMMARY_FMT__/${comment_summary}} -comment_description=${comment_description/__DETAILS_FMT__/${comment_details}} +comment_artefacts_fmt="
_Artefacts_
__ARTEFACTS_LIST__
" +comment_artefacts="${comment_artefacts_fmt/__ARTEFACTS_LIST__/${comment_artefacts_details}}" comment_description=${comment_description/__ARTEFACTS_FMT__/${comment_artefacts}} echo "${comment_description}" >> ${job_result_file} diff --git a/bot/check-test.sh b/bot/check-test.sh new file mode 100755 index 0000000000..f045b9500a --- /dev/null +++ b/bot/check-test.sh @@ -0,0 +1,205 @@ +#!/bin/bash +# +# Dummy script that only creates test result file for the bot, without actually checking anything +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Kenneth Hoste (HPC-UGent) +# +# license: GPLv2 +# +job_dir=${PWD} +job_out="slurm-${SLURM_JOB_ID}.out" +job_test_result_file="_bot_job${SLURM_JOB_ID}.test" + +# Check that job output file is found +[[ ${VERBOSE} -ne 0 ]] && echo ">> searching for job output file(s) matching '"${job_out}"'" +if [[ -f ${job_out} ]]; then + SLURM_OUTPUT_FOUND=1 + [[ ${VERBOSE} -ne 0 ]] && echo " found slurm output file '"${job_out}"'" +else + SLURM_OUTPUT_FOUND=0 + [[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found" +fi + + +# ReFrame prints e.g. +#[----------] start processing checks +#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:rome+default +#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:genoa+default +#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=1_cpn_2_nodes %module_name=GROMACS/2021.3-foss-2021a /f4194106 @snellius:genoa+default +#[ FAIL ] (1/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:genoa+default +#==> test failed during 'sanity': test staged in '/scratch-shared/casparl/reframe_output/staging/snellius/genoa/default/GROMACS_EESSI_d597cff4' +#[ OK ] (2/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:rome+default +#P: perf: 8.441 ns/day (r:0, l:None, u:None) +#[ FAIL ] (3/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=1_cpn_2_nodes %module_name=GROMACS/2021.3-foss-2021a /f4194106 @snellius:genoa+default +#==> test failed during 'sanity': test staged in '/scratch-shared/casparl/reframe_output/staging/snellius/genoa/default/GROMACS_EESSI_f4194106' +#[----------] all spawned checks have finished +#[ FAILED ] Ran 3/3 test case(s) from 2 check(s) (2 failure(s), 0 skipped, 0 aborted) + +# We will grep for the last and final line, since this reflects the overall result +# Specifically, we grep for FAILED, since this is also what we print if a step in the test script itself fails +FAILED=-1 +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_failed='\[\s*FAILED\s*\].*Ran .* test case' + grep_reframe_failed=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_failed}") + [[ $? -eq 0 ]] && FAILED=1 || FAILED=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_failed}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_reframe_failed}" +fi + +# Here, we grep for 'ERROR:', which is printed if a fatal_error is encountered when executing the test step +# I.e. this is an error in execution of the run_tests.sh itself, NOT in running the actual tests +ERROR=-1 +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_error='ERROR: ' + grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_error}") + [[ $? -eq 0 ]] && ERROR=1 || ERROR=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_error}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}" +fi + +SUCCESS=-1 +# Grep for the success pattern, so we can report the amount of tests run +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_success='\[\s*PASSED\s*\].*Ran .* test case' + grep_reframe_success=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_success}") + [[ $? -eq 0 ]] && SUCCESS=1 || SUCCESS=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_success}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_reframe_success}" +fi + +if [[ ! -z ${grep_reframe_failed} ]]; then + grep_reframe_result=${grep_reframe_failed} +else + grep_reframe_result=${grep_reframe_success} +fi + +echo "[TEST]" > ${job_test_result_file} +if [[ ${SLURM_OUTPUT_FOUND} -eq 0 ]]; then + summary=":cry: FAILURE" + reason="Job output file not found, cannot check test results." + status="FAILURE" +# Should come before general errors: if SUCCESS==1, it indicates the test suite ran succesfully +# regardless of other things that might have gone wrong +elif [[ ${SUCCESS} -eq 1 ]]; then + summary=":grin: SUCCESS" + reason="" + status="SUCCESS" +# Should come before general errors: if FAILED==1, it indicates the test suite ran +# otherwise the pattern wouldn't have been there +elif [[ ${FAILED} -eq 1 ]]; then + summary=":cry: FAILURE" + reason="EESSI test suite produced failures." + status="FAILURE" +elif [[ ${ERROR} -eq 1 ]]; then + summary=":cry: FAILURE" + reason="EESSI test suite was not run, test step itself failed to execute." + status="FAILURE" +else + summary=":grin: FAILURE" + reason="Failed for unknown reason" + status="FAILURE" +fi + + +echo "[TEST]" > ${job_test_result_file} +echo -n "comment_description = " >> ${job_test_result_file} + +# Use template for writing PR comment with details +# construct and write complete PR comment details: implements third alternative +comment_template="
__SUMMARY_FMT__
__REASON_FMT____REFRAME_FMT____DETAILS_FMT__
" +comment_success_item_fmt=":white_check_mark: __ITEM__" +comment_failure_item_fmt=":x: __ITEM__" + +# Initialize comment_description +comment_description=${comment_template} + +# Now, start replacing template items one by one +comment_summary_fmt="__SUMMARY__ _(click triangle for details)_" +comment_summary="${comment_summary_fmt/__SUMMARY__/${summary}}" +comment_description=${comment_description/__SUMMARY_FMT__/${comment_summary}} + + +# Only add if there is a reason (e.g. no reason for successful runs) +if [[ ! -z ${reason} ]]; then + comment_reason_fmt="
_Reason_
__REASONS__
" + reason_details="${comment_reason_fmt/__REASONS__/${reason}}" + comment_description=${comment_description/__REASON_FMT__/${reason_details}} +else + comment_description=${comment_description/__REASON_FMT__/""} +fi + +# Only add if there is a reframe summary (e.g. no reframe summary if reframe wasn't launched succesfully) +echo "ReFrame result:" +echo "${grep_reframe_result}" +if [[ ! -z ${grep_reframe_result} ]]; then + comment_reframe_fmt="
_ReFrame Summary_
__REFRAME_SUMMARY__
" + reframe_summary=${comment_reframe_fmt/__REFRAME_SUMMARY__/${grep_reframe_result}} + comment_description=${comment_description/__REFRAME_FMT__/${reframe_summary}} +else + comment_description=${comment_description/__REFRAME_FMT__/""} +fi + +# Declare functions +function print_br_item() { + format="${1}" + item="${2}" + echo -n "${format//__ITEM__/${item}}
" +} + +function success() { + format="${comment_success_item_fmt}" + item="$1" + print_br_item "${format}" "${item}" +} + +function failure() { + format="${comment_failure_item_fmt}" + item="$1" + print_br_item "${format}" "${item}" +} + +function add_detail() { + actual=${1} + expected=${2} + success_msg="${3}" + failure_msg="${4}" + if [[ ${actual} -eq ${expected} ]]; then + success "${success_msg}" + else + failure "${failure_msg}" + fi +} + +# first construct comment_details_list, abbreviated comment_details_list +# then use it to set comment_details +comment_details_list="" + +success_msg="job output file ${job_out}" +failure_msg="no job output file ${job_out}" +comment_details_list=${comment_details_list}$(add_detail ${SLURM_OUTPUT_FOUND} 1 "${success_msg}" "${failure_msg}") + +success_msg="no message matching ${GP_error}" +failure_msg="found message matching ${GP_error}" +comment_details_list=${comment_details_list}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}") + +# Add an escape character to every *, for it to be printed correctly in the comment on GitHub +GP_failed="${GP_failed//\*/\\*}" +success_msg="no message matching ""${GP_failed}""" +failure_msg="found message matching ""${GP_failed}""" +comment_details_list=${comment_details_list}$(add_detail ${FAILED} 0 "${success_msg}" "${failure_msg}") + +comment_details_fmt="
_Details_
__DETAILS_LIST__
" +comment_details="${comment_details_fmt/__DETAILS_LIST__/${comment_details_list}}" +comment_description=${comment_description/__DETAILS_FMT__/${comment_details}} + +# Actually writing the comment description to the result file +echo "${comment_description}" >> ${job_test_result_file} +echo "status = ${status}" >> ${job_test_result_file} + +exit 0 diff --git a/bot/inspect.sh b/bot/inspect.sh new file mode 100755 index 0000000000..9d1fa87e1f --- /dev/null +++ b/bot/inspect.sh @@ -0,0 +1,446 @@ +#!/usr/bin/env bash +# +# Script to inspect result of a build job for the EESSI software layer. +# Intended use is that it is called with a path to a job directory. +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Thomas Roeblitz (@trz42) +# +# license: GPLv2 +# + +# ASSUMPTIONs: +# - Script is executed on the same architecture the job was running on. +# - Initially, we also assume that is run on the same resource with the +# same (compute) node setup (local disk space, HTTP proxies, etc.) +# - The job directory being supplied has been prepared by the bot with a +# checkout of a pull request (OR by some other means) +# - The job directory contains a directory 'cfg' where the main config +# file 'job.cfg' has been deposited. +# - The 'cfg' directory may contain any additional files referenced in +# 'job.cfg' (repos.cfg, etc.). +# - The job produced some tarballs for its state (tmp disk for overlayfs, +# CVMFS cache, etc.) under 'previous_tmp/{build,tarball}_step'. + +# stop as soon as something fails +set -e + +display_help() { + echo "usage: $0 [OPTIONS]" + echo " -h | --help - display this usage information" + echo " -r | --resume TGZ - inspect job saved in tarball path TGZ; note, we assume the path" + echo " to be something like JOB_DIR/previous_tmp/{build,tarball}_step/TARBALL.tgz" + echo " and thus determine JOB_DIR from the given path" + echo " [default: none]" + echo " -c | --command COMMAND - command to execute inside the container, in the prefix environment" + echo " -x | --http-proxy URL - provides URL for the environment variable http_proxy" + echo " -y | --https-proxy URL - provides URL for the environment variable https_proxy" +} + +resume_tgz= +http_proxy= +https_proxy= + +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case ${1} in + -h|--help) + display_help + exit 0 + ;; + -r|--resume) + export resume_tgz="${2}" + shift 2 + ;; + -x|--http-proxy) + export http_proxy="${2}" + shift 2 + ;; + -y|--https-proxy) + export https_proxy="${2}" + shift 2 + ;; + -c|--command) + export run_in_prefix="${2}" + shift 2 + ;; + -*|--*) + echo "Error: Unknown option: ${1}" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("${1}") # save positional arg + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +# source utils.sh and cfg_files.sh +source scripts/utils.sh +source scripts/cfg_files.sh + +if [[ -z ${resume_tgz} ]]; then + echo_red "path to tarball for resuming build job is missing" + display_help + exit 1 +fi + +job_dir=$(dirname $(dirname $(dirname ${resume_tgz}))) + +if [[ -z ${job_dir} ]]; then + # job directory could be determined + echo_red "job directory could not be determined from '${resume_tgz}'" + display_help + exit 2 +fi + +# defaults +export JOB_CFG_FILE="${job_dir}/cfg/job.cfg" +HOST_ARCH=$(uname -m) + +# check if ${JOB_CFG_FILE} exists +if [[ ! -r "${JOB_CFG_FILE}" ]]; then + fatal_error "job config file (JOB_CFG_FILE=${JOB_CFG_FILE}) does not exist or not readable" +fi +echo "bot/inspect.sh: showing ${JOB_CFG_FILE} from software-layer side" +cat ${JOB_CFG_FILE} + +echo "bot/inspect.sh: obtaining configuration settings from '${JOB_CFG_FILE}'" +cfg_load ${JOB_CFG_FILE} + +# if http_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $http_proxy +HTTP_PROXY=$(cfg_get_value "site_config" "http_proxy") +HTTP_PROXY=${HTTP_PROXY:-${http_proxy}} +echo "bot/inspect.sh: HTTP_PROXY='${HTTP_PROXY}'" + +# if https_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $https_proxy +HTTPS_PROXY=$(cfg_get_value "site_config" "https_proxy") +HTTPS_PROXY=${HTTPS_PROXY:-${https_proxy}} +echo "bot/inspect.sh: HTTPS_PROXY='${HTTPS_PROXY}'" + +LOCAL_TMP=$(cfg_get_value "site_config" "local_tmp") +echo "bot/inspect.sh: LOCAL_TMP='${LOCAL_TMP}'" +# TODO should local_tmp be mandatory? --> then we check here and exit if it is not provided + +# check if path to copy build logs to is specified, so we can copy build logs for failing builds there +BUILD_LOGS_DIR=$(cfg_get_value "site_config" "build_logs_dir") +echo "bot/inspect.sh: BUILD_LOGS_DIR='${BUILD_LOGS_DIR}'" +# if $BUILD_LOGS_DIR is set, add it to $SINGULARITY_BIND so the path is available in the build container +if [[ ! -z ${BUILD_LOGS_DIR} ]]; then + mkdir -p ${BUILD_LOGS_DIR} + if [[ -z ${SINGULARITY_BIND} ]]; then + export SINGULARITY_BIND="${BUILD_LOGS_DIR}" + else + export SINGULARITY_BIND="${SINGULARITY_BIND},${BUILD_LOGS_DIR}" + fi +fi + +SINGULARITY_CACHEDIR=$(cfg_get_value "site_config" "container_cachedir") +echo "bot/inspect.sh: SINGULARITY_CACHEDIR='${SINGULARITY_CACHEDIR}'" +if [[ ! -z ${SINGULARITY_CACHEDIR} ]]; then + # make sure that separate directories are used for different CPU families + SINGULARITY_CACHEDIR=${SINGULARITY_CACHEDIR}/${HOST_ARCH} + export SINGULARITY_CACHEDIR +fi + +echo -n "setting \$STORAGE by replacing any var in '${LOCAL_TMP}' -> " +# replace any env variable in ${LOCAL_TMP} with its +# current value (e.g., a value that is local to the job) +STORAGE=$(envsubst <<< ${LOCAL_TMP}) +echo "'${STORAGE}'" + +# make sure ${STORAGE} exists +mkdir -p ${STORAGE} + +# make sure the base tmp storage is unique +JOB_STORAGE=$(mktemp --directory --tmpdir=${STORAGE} bot_job_tmp_XXX) +echo "bot/inspect.sh: created unique base tmp storage directory at ${JOB_STORAGE}" + +# obtain list of modules to be loaded +LOAD_MODULES=$(cfg_get_value "site_config" "load_modules") +echo "bot/inspect.sh: LOAD_MODULES='${LOAD_MODULES}'" + +# singularity/apptainer settings: CONTAINER, HOME, TMPDIR, BIND +CONTAINER=$(cfg_get_value "repository" "container") +echo "bot/inspect.sh: CONTAINER='${CONTAINER}'" +# instead of using ${PWD} as HOME in the container, we use the job directory +# to have access to output files of the job +export SINGULARITY_HOME="${job_dir}:/eessi_bot_job" +echo "bot/inspect.sh: SINGULARITY_HOME='${SINGULARITY_HOME}'" +export SINGULARITY_TMPDIR="${PWD}/singularity_tmpdir" +echo "bot/inspect.sh: SINGULARITY_TMPDIR='${SINGULARITY_TMPDIR}'" +mkdir -p ${SINGULARITY_TMPDIR} + +# load modules if LOAD_MODULES is not empty +if [[ ! -z ${LOAD_MODULES} ]]; then + for mod in $(echo ${LOAD_MODULES} | tr ',' '\n') + do + echo "bot/inspect.sh: loading module '${mod}'" + module load ${mod} + done +else + echo "bot/inspect.sh: no modules to be loaded" +fi + +# determine repository to be used from entry .repository in ${JOB_CFG_FILE} +REPOSITORY=$(cfg_get_value "repository" "repo_id") +echo "bot/inspect.sh: REPOSITORY='${REPOSITORY}'" +# TODO better to read this from tarball??? +EESSI_REPOS_CFG_DIR_OVERRIDE=$(cfg_get_value "repository" "repos_cfg_dir") +export EESSI_REPOS_CFG_DIR_OVERRIDE=${EESSI_REPOS_CFG_DIR_OVERRIDE:-${PWD}/cfg} +echo "bot/inspect.sh: EESSI_REPOS_CFG_DIR_OVERRIDE='${EESSI_REPOS_CFG_DIR_OVERRIDE}'" + +# determine EESSI version to be used from .repository.repo_version in ${JOB_CFG_FILE} +# here, just set & export EESSI_VERSION_OVERRIDE +# next script (eessi_container.sh) makes use of it via sourcing init scripts +# (e.g., init/eessi_defaults or init/minimal_eessi_env) +export EESSI_VERSION_OVERRIDE=$(cfg_get_value "repository" "repo_version") +echo "bot/inspect.sh: EESSI_VERSION_OVERRIDE='${EESSI_VERSION_OVERRIDE}'" + +# determine CVMFS repo to be used from .repository.repo_name in ${JOB_CFG_FILE} +# here, just set EESSI_CVMFS_REPO_OVERRIDE, a bit further down +# "source init/eessi_defaults" via sourcing init/minimal_eessi_env +export EESSI_CVMFS_REPO_OVERRIDE="/cvmfs/$(cfg_get_value 'repository' 'repo_name')" +echo "bot/inspect.sh: EESSI_CVMFS_REPO_OVERRIDE='${EESSI_CVMFS_REPO_OVERRIDE}'" + +# determine architecture to be used from entry .architecture in ${JOB_CFG_FILE} +# fallbacks: +# - ${CPU_TARGET} handed over from bot +# - left empty to let downstream script(s) determine subdir to be used +EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(cfg_get_value "architecture" "software_subdir") +EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE:-${CPU_TARGET}} +export EESSI_SOFTWARE_SUBDIR_OVERRIDE +echo "bot/inspect.sh: EESSI_SOFTWARE_SUBDIR_OVERRIDE='${EESSI_SOFTWARE_SUBDIR_OVERRIDE}'" + +# get EESSI_OS_TYPE from .architecture.os_type in ${JOB_CFG_FILE} (default: linux) +EESSI_OS_TYPE=$(cfg_get_value "architecture" "os_type") +export EESSI_OS_TYPE=${EESSI_OS_TYPE:-linux} +echo "bot/inspect.sh: EESSI_OS_TYPE='${EESSI_OS_TYPE}'" + +# prepare arguments to eessi_container.sh common to build and tarball steps +declare -a CMDLINE_ARGS=() +CMDLINE_ARGS+=("--verbose") +CMDLINE_ARGS+=("--access" "rw") +CMDLINE_ARGS+=("--mode" "run") +[[ ! -z ${CONTAINER} ]] && CMDLINE_ARGS+=("--container" "${CONTAINER}") +[[ ! -z ${HTTP_PROXY} ]] && CMDLINE_ARGS+=("--http-proxy" "${HTTP_PROXY}") +[[ ! -z ${HTTPS_PROXY} ]] && CMDLINE_ARGS+=("--https-proxy" "${HTTPS_PROXY}") +[[ ! -z ${REPOSITORY} ]] && CMDLINE_ARGS+=("--repository" "${REPOSITORY}") + +[[ ! -z ${resume_tgz} ]] && CMDLINE_ARGS+=("--resume" "${resume_tgz}") + +# create a directory for creating temporary data and scripts for the inspection +INSPECT_DIR=$(mktemp --directory --tmpdir=${PWD} inspect.XXX) +if [[ -z ${SINGULARITY_BIND} ]]; then + export SINGULARITY_BIND="${INSPECT_DIR}:/inspect_eessi_build_job" +else + export SINGULARITY_BIND="${SINGULARITY_BIND},${INSPECT_DIR}:/inspect_eessi_build_job" +fi + +# add arguments for temporary storage and storing a tarball of tmp +CMDLINE_ARGS+=("--save" "${INSPECT_DIR}") +CMDLINE_ARGS+=("--storage" "${JOB_STORAGE}") + +# # prepare arguments to install_software_layer.sh (specific to build step) +# declare -a INSTALL_SCRIPT_ARGS=() +# if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then +# INSTALL_SCRIPT_ARGS+=("--generic") +# fi +# [[ ! -z ${BUILD_LOGS_DIR} ]] && INSTALL_SCRIPT_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}") + +# make sure some environment settings are available inside the shell started via +# startprefix +base_dir=$(dirname $(realpath $0)) +# base_dir of inspect.sh script is '.../bot', 'init' dir is at the same level +# TODO better use script from tarball??? +source ${base_dir}/../init/eessi_defaults + +if [ -z $EESSI_VERSION ]; then + echo "ERROR: \$EESSI_VERSION must be set!" >&2 + exit 1 +fi +EESSI_COMPAT_LAYER_DIR="${EESSI_CVMFS_REPO}/versions/${EESSI_VERSION}/compat/linux/$(uname -m)" + +# NOTE The below requires access to the CVMFS repository. We could make a first +# test run with a container. For now we skip the test. +# if [ ! -d ${EESSI_COMPAT_LAYER_DIR} ]; then +# echo "ERROR: ${EESSI_COMPAT_LAYER_DIR} does not exist!" >&2 +# exit 1 +# fi + +# When we want to run a script with arguments, the next line is ensures to retain +# these arguments. +# INPUT=$(echo "$@") +mkdir -p ${INSPECT_DIR}/scripts +RESUME_SCRIPT=${INSPECT_DIR}/scripts/resume_env.sh +echo "bot/inspect.sh: creating script '${RESUME_SCRIPT}' to resume environment settings" + +cat << EOF > ${RESUME_SCRIPT} +#!${EESSI_COMPAT_LAYER_DIR}/bin/bash +echo "Sourcing '\$BASH_SOURCE' to init bot environment of build job" +EOF +if [ ! -z ${SLURM_JOB_ID} ]; then + # TODO do we need the value at all? if so which one: current or of the job to + # inspect? + echo "export CURRENT_SLURM_JOB_ID=${SLURM_JOB_ID}" >> ${RESUME_SCRIPT} +fi +if [ ! -z ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} ]; then + echo "export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" >> ${RESUME_SCRIPT} +fi +if [ ! -z ${EESSI_CVMFS_REPO_OVERRIDE} ]; then + echo "export EESSI_CVMFS_REPO_OVERRIDE=${EESSI_CVMFS_REPO_OVERRIDE}" >> ${RESUME_SCRIPT} +fi +if [ ! -z ${EESSI_VERSION_OVERRIDE} ]; then + echo "export EESSI_VERSION_OVERRIDE=${EESSI_VERSION_OVERRIDE}" >> ${RESUME_SCRIPT} +fi +if [ ! -z ${http_proxy} ]; then + echo "export http_proxy=${http_proxy}" >> ${RESUME_SCRIPT} +fi +if [ ! -z ${https_proxy} ]; then + echo "export https_proxy=${https_proxy}" >> ${RESUME_SCRIPT} +fi +cat << 'EOF' >> ${RESUME_SCRIPT} +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) + +source ${TOPDIR}/scripts/utils.sh + +# honor $TMPDIR if it is already defined, use /tmp otherwise +if [ -z $TMPDIR ]; then + export WORKDIR=/tmp/$USER +else + export WORKDIR=$TMPDIR/$USER +fi + +TMPDIR=$(mktemp -d) + +echo ">> Setting up environment..." + +source $TOPDIR/init/minimal_eessi_env + +if [ -d $EESSI_CVMFS_REPO ]; then + echo_green "$EESSI_CVMFS_REPO available, OK!" +else + fatal_error "$EESSI_CVMFS_REPO is not available!" +fi + +# make sure we're in Prefix environment by checking $SHELL +if [[ ${SHELL} = ${EPREFIX}/bin/bash ]]; then + echo_green ">> It looks like we're in a Gentoo Prefix environment, good!" +else + fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!" +fi + +# avoid that pyc files for EasyBuild are stored in EasyBuild installation directory +export PYTHONPYCACHEPREFIX=$TMPDIR/pycache + +DETECTION_PARAMETERS='' +GENERIC=0 +EB='eb' +if [[ "$EASYBUILD_OPTARCH" == "GENERIC" || "$EESSI_SOFTWARE_SUBDIR_OVERRIDE" == *"/generic" ]]; then + echo_yellow ">> GENERIC build requested, taking appropriate measures!" + DETECTION_PARAMETERS="$DETECTION_PARAMETERS --generic" + GENERIC=1 + export EASYBUILD_OPTARCH=GENERIC + EB='eb --optarch=GENERIC' +fi + +echo ">> Determining software subdirectory to use for current build host..." +if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) + echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script" +else + echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" +fi + +# Set all the EESSI environment variables (respecting $EESSI_SOFTWARE_SUBDIR_OVERRIDE) +# $EESSI_SILENT - don't print any messages +# $EESSI_BASIC_ENV - give a basic set of environment variables +EESSI_SILENT=1 EESSI_BASIC_ENV=1 source $TOPDIR/init/eessi_environment_variables + +if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then + fatal_error "Failed to determine software subdirectory?!" +elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then + fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!" +else + echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!" +fi + +echo ">> Initializing Lmod..." +source $EPREFIX/usr/share/Lmod/init/bash +ml_version_out=$TMPDIR/ml.out +ml --version &> $ml_version_out +if [[ $? -eq 0 ]]; then + echo_green ">> Found Lmod ${LMOD_VERSION}" +else + fatal_error "Failed to initialize Lmod?! (see output in ${ml_version_out}" +fi + +echo ">> Configuring EasyBuild..." +source $TOPDIR/configure_easybuild + +echo ">> Setting up \$MODULEPATH..." +# make sure no modules are loaded +module --force purge +# ignore current $MODULEPATH entirely +module unuse $MODULEPATH +module use $EASYBUILD_INSTALLPATH/modules/all +if [[ -z ${MODULEPATH} ]]; then + fatal_error "Failed to set up \$MODULEPATH?!" +else + echo_green ">> MODULEPATH set up: ${MODULEPATH}" +fi + +echo +echo_green "Build environment set up with install path ${EASYBUILD_INSTALLPATH}." +echo +echo "The build job can be inspected with the following resources:" +echo " - job directory is $HOME (\$HOME), check for slurm-*.out file" +echo " - temporary data of the job is available at /tmp" +echo " - note, the prefix $EESSI_PREFIX is writable" +echo +echo "You may want to load an EasyBuild module. The inspect.sh script does not load" +echo "that automatically, because multiple versions might have been used by the job." +echo "Choose an EasyBuild version (see installed versions with 'module avail EasyBuild')" +echo "and simply run" +echo +echo "module load EasyBuild/_VERSION_" +echo +echo "Replace _VERSION_ with the version you want to use." +echo + +EOF +chmod u+x ${RESUME_SCRIPT} + +# try to map it into the container's $HOME/.profile instead +# TODO check if script already exists, if so change its name and source it at the beginning of the RESUME_SCRIPT +if [[ -z ${SINGULARITY_BIND} ]]; then + export SINGULARITY_BIND="${RESUME_SCRIPT}:/eessi_bot_job/.profile" +else + export SINGULARITY_BIND="${SINGULARITY_BIND},${RESUME_SCRIPT}:/eessi_bot_job/.profile" +fi + +echo "Executing command to start interactive session to inspect build job:" +# TODO possibly add information on how to init session after the prefix is +# entered, initialization consists of +# - environment variable settings (see 'run_in_compat_layer_env.sh') +# - setup steps run in 'EESSI-install-software.sh' +# These initializations are combined into a single script that is executed when +# the shell in startprefix is started. We set the env variable BASH_ENV here. +if [[ -z ${run_in_prefix} ]]; then + echo "./eessi_container.sh ${CMDLINE_ARGS[@]}" + echo " -- ${EESSI_COMPAT_LAYER_DIR}/startprefix" + ./eessi_container.sh "${CMDLINE_ARGS[@]}" \ + -- ${EESSI_COMPAT_LAYER_DIR}/startprefix +else + echo "./eessi_container.sh ${CMDLINE_ARGS[@]}" + echo " -- ${EESSI_COMPAT_LAYER_DIR}/startprefix <<< ${run_in_prefix}" + ./eessi_container.sh "${CMDLINE_ARGS[@]}" \ + -- ${EESSI_COMPAT_LAYER_DIR}/startprefix <<< ${run_in_prefix} +fi + +exit 0 diff --git a/bot/test.sh b/bot/test.sh new file mode 100755 index 0000000000..4984340e6e --- /dev/null +++ b/bot/test.sh @@ -0,0 +1,225 @@ +#!/usr/bin/env bash +# +# script to run tests or the test suite for the whole EESSI software layer or +# just what has been built in a job. Intended use is that it is called +# at the end of a (batch) job running on a compute node. +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Thomas Roeblitz (@trz42) +# author: Caspar van Leeuwen (@casparvl) +# +# license: GPLv2 +# + +# ASSUMPTIONs: +# + assumption for the build step (as run through bot/build.sh which is provided +# in this repository too) +# - working directory has been prepared by the bot with a checkout of a +# pull request (OR by some other means) +# - the working directory contains a directory 'cfg' where the main config +# file 'job.cfg' has been deposited +# - the directory may contain any additional files referenced in job.cfg +# + assumptions for the test step +# - temporary storage is still available +# example +# Using /localscratch/9640860/NESSI/eessi.x765Dd8mFh as tmp directory (to resume session add '--resume /localscratch/9640860/NESSI/eessi.x765Dd8mFh'). +# - run test-suite.sh inside build container using tmp storage from build step +# plus possibly additional settings (repo, etc.) +# - needed setup steps may be similar to bot/inspect.sh (PR#317) + +# stop as soon as something fails +set -e + +# source utils.sh and cfg_files.sh +source scripts/utils.sh +source scripts/cfg_files.sh + +# defaults +export JOB_CFG_FILE="${JOB_CFG_FILE_OVERRIDE:=./cfg/job.cfg}" +HOST_ARCH=$(uname -m) + +# check if ${JOB_CFG_FILE} exists +if [[ ! -r "${JOB_CFG_FILE}" ]]; then + fatal_error "job config file (JOB_CFG_FILE=${JOB_CFG_FILE}) does not exist or not readable" +fi +echo "bot/test.sh: showing ${JOB_CFG_FILE} from software-layer side" +cat ${JOB_CFG_FILE} + +echo "bot/test.sh: obtaining configuration settings from '${JOB_CFG_FILE}'" +cfg_load ${JOB_CFG_FILE} + +# if http_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $http_proxy +HTTP_PROXY=$(cfg_get_value "site_config" "http_proxy") +HTTP_PROXY=${HTTP_PROXY:-${http_proxy}} +echo "bot/test.sh: HTTP_PROXY='${HTTP_PROXY}'" + +# if https_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $https_proxy +HTTPS_PROXY=$(cfg_get_value "site_config" "https_proxy") +HTTPS_PROXY=${HTTPS_PROXY:-${https_proxy}} +echo "bot/test.sh: HTTPS_PROXY='${HTTPS_PROXY}'" + +LOCAL_TMP=$(cfg_get_value "site_config" "local_tmp") +echo "bot/test.sh: LOCAL_TMP='${LOCAL_TMP}'" +# TODO should local_tmp be mandatory? --> then we check here and exit if it is not provided + +# check if path to copy build logs to is specified, so we can copy build logs for failing builds there +BUILD_LOGS_DIR=$(cfg_get_value "site_config" "build_logs_dir") +echo "bot/test.sh: BUILD_LOGS_DIR='${BUILD_LOGS_DIR}'" +# if $BUILD_LOGS_DIR is set, add it to $SINGULARITY_BIND so the path is available in the build container +if [[ ! -z ${BUILD_LOGS_DIR} ]]; then + mkdir -p ${BUILD_LOGS_DIR} + if [[ -z ${SINGULARITY_BIND} ]]; then + export SINGULARITY_BIND="${BUILD_LOGS_DIR}" + else + export SINGULARITY_BIND="${SINGULARITY_BIND},${BUILD_LOGS_DIR}" + fi +fi + +# check if path to directory on shared filesystem is specified, +# and use it as location for source tarballs used by EasyBuild if so +SHARED_FS_PATH=$(cfg_get_value "site_config" "shared_fs_path") +echo "bot/test.sh: SHARED_FS_PATH='${SHARED_FS_PATH}'" +# if $SHARED_FS_PATH is set, add it to $SINGULARITY_BIND so the path is available in the build container +if [[ ! -z ${SHARED_FS_PATH} ]]; then + mkdir -p ${SHARED_FS_PATH} + if [[ -z ${SINGULARITY_BIND} ]]; then + export SINGULARITY_BIND="${SHARED_FS_PATH}" + else + export SINGULARITY_BIND="${SINGULARITY_BIND},${SHARED_FS_PATH}" + fi +fi + +SINGULARITY_CACHEDIR=$(cfg_get_value "site_config" "container_cachedir") +echo "bot/test.sh: SINGULARITY_CACHEDIR='${SINGULARITY_CACHEDIR}'" +if [[ ! -z ${SINGULARITY_CACHEDIR} ]]; then + # make sure that separate directories are used for different CPU families + SINGULARITY_CACHEDIR=${SINGULARITY_CACHEDIR}/${HOST_ARCH} + export SINGULARITY_CACHEDIR +fi + +# try to determine tmp directory from build job +RESUME_DIR=$(grep 'Using .* as tmp directory' slurm-${SLURM_JOBID}.out | head -1 | awk '{print $2}') + +if [[ -z ${RESUME_DIR} ]]; then + echo -n "setting \$STORAGE by replacing any var in '${LOCAL_TMP}' -> " + # replace any env variable in ${LOCAL_TMP} with its + # current value (e.g., a value that is local to the job) + STORAGE=$(envsubst <<< ${LOCAL_TMP}) + echo "'${STORAGE}'" + + # make sure ${STORAGE} exists + mkdir -p ${STORAGE} + + # make sure the base tmp storage is unique + JOB_STORAGE=$(mktemp --directory --tmpdir=${STORAGE} bot_job_tmp_XXX) + echo "bot/test.sh: created unique base tmp storage directory at ${JOB_STORAGE}" + + RESUME_TGZ=${PWD}/previous_tmp/build_step/$(ls previous_tmp/build_step) + if [[ -z ${RESUME_TGZ} ]]; then + echo "bot/test.sh: no information about tmp directory and tarball of build step; --> giving up" + exit 2 + fi +fi + +# obtain list of modules to be loaded +LOAD_MODULES=$(cfg_get_value "site_config" "load_modules") +echo "bot/test.sh: LOAD_MODULES='${LOAD_MODULES}'" + +# singularity/apptainer settings: CONTAINER, HOME, TMPDIR, BIND +CONTAINER=$(cfg_get_value "repository" "container") +export SINGULARITY_HOME="${PWD}:/eessi_bot_job" +export SINGULARITY_TMPDIR="${PWD}/singularity_tmpdir" +mkdir -p ${SINGULARITY_TMPDIR} + +# load modules if LOAD_MODULES is not empty +if [[ ! -z ${LOAD_MODULES} ]]; then + for mod in $(echo ${LOAD_MODULES} | tr ',' '\n') + do + echo "bot/test.sh: loading module '${mod}'" + module load ${mod} + done +else + echo "bot/test.sh: no modules to be loaded" +fi + +# determine repository to be used from entry .repository in ${JOB_CFG_FILE} +REPOSITORY=$(cfg_get_value "repository" "repo_id") +EESSI_REPOS_CFG_DIR_OVERRIDE=$(cfg_get_value "repository" "repos_cfg_dir") +export EESSI_REPOS_CFG_DIR_OVERRIDE=${EESSI_REPOS_CFG_DIR_OVERRIDE:-${PWD}/cfg} +echo "bot/test.sh: EESSI_REPOS_CFG_DIR_OVERRIDE='${EESSI_REPOS_CFG_DIR_OVERRIDE}'" + +# determine pilot version to be used from .repository.repo_version in ${JOB_CFG_FILE} +# here, just set & export EESSI_PILOT_VERSION_OVERRIDE +# next script (eessi_container.sh) makes use of it via sourcing init scripts +# (e.g., init/eessi_defaults or init/minimal_eessi_env) +export EESSI_PILOT_VERSION_OVERRIDE=$(cfg_get_value "repository" "repo_version") +echo "bot/test.sh: EESSI_PILOT_VERSION_OVERRIDE='${EESSI_PILOT_VERSION_OVERRIDE}'" + +# determine CVMFS repo to be used from .repository.repo_name in ${JOB_CFG_FILE} +# here, just set EESSI_CVMFS_REPO_OVERRIDE, a bit further down +# "source init/eessi_defaults" via sourcing init/minimal_eessi_env +export EESSI_CVMFS_REPO_OVERRIDE=$(cfg_get_value "repository" "repo_name") +echo "bot/test.sh: EESSI_CVMFS_REPO_OVERRIDE='${EESSI_CVMFS_REPO_OVERRIDE}'" + +# determine architecture to be used from entry .architecture in ${JOB_CFG_FILE} +# fallbacks: +# - ${CPU_TARGET} handed over from bot +# - left empty to let downstream script(s) determine subdir to be used +EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(cfg_get_value "architecture" "software_subdir") +EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE:-${CPU_TARGET}} +export EESSI_SOFTWARE_SUBDIR_OVERRIDE +echo "bot/test.sh: EESSI_SOFTWARE_SUBDIR_OVERRIDE='${EESSI_SOFTWARE_SUBDIR_OVERRIDE}'" + +# get EESSI_OS_TYPE from .architecture.os_type in ${JOB_CFG_FILE} (default: linux) +EESSI_OS_TYPE=$(cfg_get_value "architecture" "os_type") +export EESSI_OS_TYPE=${EESSI_OS_TYPE:-linux} +echo "bot/test.sh: EESSI_OS_TYPE='${EESSI_OS_TYPE}'" + +# prepare arguments to eessi_container.sh common to build and tarball steps +declare -a COMMON_ARGS=() +COMMON_ARGS+=("--verbose") +COMMON_ARGS+=("--access" "ro") +COMMON_ARGS+=("--mode" "run") +[[ ! -z ${CONTAINER} ]] && COMMON_ARGS+=("--container" "${CONTAINER}") +[[ ! -z ${HTTP_PROXY} ]] && COMMON_ARGS+=("--http-proxy" "${HTTP_PROXY}") +[[ ! -z ${HTTPS_PROXY} ]] && COMMON_ARGS+=("--https-proxy" "${HTTPS_PROXY}") +[[ ! -z ${REPOSITORY} ]] && COMMON_ARGS+=("--repository" "${REPOSITORY}") + +# make sure to use the same parent dir for storing tarballs of tmp +PREVIOUS_TMP_DIR=${PWD}/previous_tmp + +# prepare directory to store tarball of tmp for test step +TARBALL_TMP_TEST_STEP_DIR=${PREVIOUS_TMP_DIR}/test_step +mkdir -p ${TARBALL_TMP_TEST_STEP_DIR} + +# prepare arguments to eessi_container.sh specific to test step +declare -a TEST_STEP_ARGS=() +TEST_STEP_ARGS+=("--save" "${TARBALL_TMP_TEST_STEP_DIR}") + +if [[ -z ${RESUME_DIR} ]]; then + TEST_STEP_ARGS+=("--storage" "${STORAGE}") + TEST_STEP_ARGS+=("--resume" "${RESUME_TGZ}") +else + TEST_STEP_ARGS+=("--resume" "${RESUME_DIR}") +fi + +# prepare arguments to test_suite.sh (specific to test step) +declare -a TEST_SUITE_ARGS=() +if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then + TEST_SUITE_ARGS+=("--generic") +fi +# [[ ! -z ${BUILD_LOGS_DIR} ]] && TEST_SUITE_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}") +# [[ ! -z ${SHARED_FS_PATH} ]] && TEST_SUITE_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}") + +# create tmp file for output of build step +test_outerr=$(mktemp test.outerr.XXXX) + +echo "Executing command to test software:" +echo "./eessi_container.sh ${COMMON_ARGS[@]} ${TEST_STEP_ARGS[@]}" +echo " -- ./run_tests.sh \"${TEST_SUITE_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${test_outerr}" +./eessi_container.sh "${COMMON_ARGS[@]}" "${TEST_STEP_ARGS[@]}" \ + -- ./run_tests.sh "${TEST_SUITE_ARGS[@]}" "$@" 2>&1 | tee -a ${test_outerr} + +exit 0 diff --git a/check_missing_installations.sh b/check_missing_installations.sh index 4a5316c09f..d8135ea3cb 100755 --- a/check_missing_installations.sh +++ b/check_missing_installations.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Script to check for missing installations in EESSI pilot software stack (version 2021.12) +# Script to check for missing installations in EESSI software stack # # author: Kenneth Hoste (@boegel) # author: Thomas Roeblitz (@trz42) @@ -10,26 +10,67 @@ TOPDIR=$(dirname $(realpath $0)) -if [ -z ${EESSI_PILOT_VERSION} ]; then - echo "ERROR: \${EESSI_PILOT_VERSION} must be set to run $0!" >&2 +if [ "$#" -eq 1 ]; then + true +elif [ "$#" -eq 2 ]; then + echo "Using $2 to give create exceptions for PR filtering of easystack" + # Find lines that are added and use from-pr, make them unique, grab the + # PR numbers and use them to construct something we can use within awk + pr_exceptions=$(grep ^+ $2 | grep from-pr | uniq | awk '{print $3}' | xargs -i echo " || /'{}'/") +else + echo "ERROR: Usage: $0 ()" >&2 exit 1 fi +easystack=$1 LOCAL_TMPDIR=$(mktemp -d) +# Clone the develop branch of EasyBuild and use that to search for easyconfigs +git clone -b develop https://github.com/easybuilders/easybuild-easyconfigs.git $LOCAL_TMPDIR/easyconfigs +export EASYBUILD_ROBOT_PATHS=$LOCAL_TMPDIR/easyconfigs/easybuild/easyconfigs + +# All PRs used in EESSI are supposed to be merged, so we can strip out all cases of from-pr +tmp_easystack=${LOCAL_TMPDIR}/$(basename ${easystack}) +grep -v from-pr ${easystack} > ${tmp_easystack} + source $TOPDIR/scripts/utils.sh source $TOPDIR/configure_easybuild +echo ">> Active EasyBuild configuration when checking for missing installations:" +${EB:-eb} --show-config + echo ">> Checking for missing installations in ${EASYBUILD_INSTALLPATH}..." eb_missing_out=$LOCAL_TMPDIR/eb_missing.out -# we need to use --from-pr to pull in some easyconfigs that are not available in EasyBuild version being used -# PR #16531: Nextflow-22.10.1.eb -${EB:-eb} --from-pr 16531 --easystack eessi-${EESSI_PILOT_VERSION}.yml --experimental --missing 2>&1 | tee ${eb_missing_out} +${EB:-eb} --easystack ${tmp_easystack} --missing 2>&1 | tee ${eb_missing_out} exit_code=${PIPESTATUS[0]} ok_msg="Command 'eb --missing ...' succeeded, analysing output..." fail_msg="Command 'eb --missing ...' failed, check log '${eb_missing_out}'" +if [ "$exit_code" -ne 0 ] && [ ! -z "$pr_exceptions" ]; then + # We might have failed due to unmerged PRs. Try to make exceptions for --from-pr added in this PR + # to software-layer, and see if then it passes. If so, we can report a more specific fail_msg + # Note that if no --from-pr's were used in this PR, $pr_exceptions will be empty and we might as + # well skip this check - unmerged PRs can not be the reason for the non-zero exit code in that scenario + + # Let's use awk so we can allow for exceptions if we are given a PR diff file + awk_command="awk '\!/'from-pr'/ EXCEPTIONS' $easystack" + awk_command=${awk_command/\\/} # Strip out the backslash we needed for ! + eval ${awk_command/EXCEPTIONS/$pr_exceptions} > ${tmp_easystack} + + msg=">> Checking for missing installations in ${EASYBUILD_INSTALLPATH}," + msg="${msg} allowing for --from-pr's that were added in this PR..." + echo ${msg} + eb_missing_out=$LOCAL_TMPDIR/eb_missing_with_from_pr.out + ${EB:-eb} --easystack ${tmp_easystack} --missing 2>&1 | tee ${eb_missing_out} + exit_code_with_from_pr=${PIPESTATUS[0]} + + # If now we succeeded, the reason must be that we originally stripped the --from-pr's + if [ "$exit_code_with_from_pr" -eq 0 ]; then + fail_msg="$fail_msg (are you sure all PRs referenced have been merged in EasyBuild?)" + fi +fi + check_exit_code ${exit_code} "${ok_msg}" "${fail_msg}" # the above assesses the installed software for each easyconfig provided in diff --git a/configure_easybuild b/configure_easybuild index 245553f342..c67b879cf3 100644 --- a/configure_easybuild +++ b/configure_easybuild @@ -25,8 +25,7 @@ fi # note: filtering Bison may break some installations, like Qt5 (see https://github.com/EESSI/software-layer/issues/49) # filtering pkg-config breaks R-bundle-Bioconductor installation (see also https://github.com/easybuilders/easybuild-easyconfigs/pull/11104) -# problems occur when filtering pkg-config with gnuplot too (picks up Lua 5.1 from $EPREFIX rather than from Lua 5.3 dependency) -DEPS_TO_FILTER=Autoconf,Automake,Autotools,binutils,bzip2,cURL,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,Lua,M4,makeinfo,ncurses,util-linux,XZ,zlib +DEPS_TO_FILTER=Autoconf,Automake,Autotools,binutils,bzip2,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,M4,makeinfo,ncurses,util-linux,XZ,zlib # For aarch64 we need to also filter out Yasm. # See https://github.com/easybuilders/easybuild-easyconfigs/issues/11190 if [[ "$EESSI_CPU_FAMILY" == "aarch64" ]]; then @@ -36,3 +35,6 @@ fi export EASYBUILD_FILTER_DEPS=$DEPS_TO_FILTER export EASYBUILD_MODULE_EXTENSIONS=1 + +# need to enable use of experimental features, since we're using easystack files +export EASYBUILD_EXPERIMENTAL=1 diff --git a/create_directory_tarballs.sh b/create_directory_tarballs.sh index 70e666f871..0270719a73 100755 --- a/create_directory_tarballs.sh +++ b/create_directory_tarballs.sh @@ -1,7 +1,5 @@ #!/bin/bash -SOFTWARE_LAYER_TARBALL_URL=https://github.com/EESSI/software-layer/tarball/main - set -eo pipefail if [ $# -ne 1 ]; then @@ -11,6 +9,8 @@ fi version=$1 +SOFTWARE_LAYER_TARBALL_URL="https://github.com/EESSI/software-layer/tarball/${version}-software.eessi.io" + TOPDIR=$(dirname $(realpath $0)) source $TOPDIR/scripts/utils.sh @@ -28,9 +28,9 @@ mkdir "${tartmp}/${version}" tarname="eessi-${version}-init-$(date +%s).tar.gz" curl -Ls ${SOFTWARE_LAYER_TARBALL_URL} | tar xzf - -C "${tartmp}/${version}" --strip-components=1 --no-wildcards-match-slash --wildcards '*/init/' source "${tartmp}/${version}/init/minimal_eessi_env" -if [ "${EESSI_PILOT_VERSION}" != "${version}" ] +if [ "${EESSI_VERSION}" != "${version}" ] then - fatal_error "Specified version ${version} does not match version ${EESSI_PILOT_VERSION} in the init files!" + fatal_error "Specified version ${version} does not match version ${EESSI_VERSION} in the init files!" fi tar czf "${tarname}" -C "${tartmp}" "${version}" rm -rf "${tartmp}" diff --git a/create_lmodrc.py b/create_lmodrc.py index ae65153a20..28ad2a1915 100755 --- a/create_lmodrc.py +++ b/create_lmodrc.py @@ -7,6 +7,7 @@ DOT_LMOD = '.lmod' +# LMOD_RC file is the place to define properties, see https://lmod.readthedocs.io/en/latest/145_properties.html TEMPLATE_LMOD_RC = """propT = { } scDescriptT = { diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py new file mode 100755 index 0000000000..29b2d39bd7 --- /dev/null +++ b/create_lmodsitepackage.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# +# Create SitePackage.lua configuration file for Lmod. +# +import os +import sys + +DOT_LMOD = '.lmod' + +hook_txt ="""require("strict") +local hook = require("Hook") +local open = io.open + +local function read_file(path) + local file = open(path, "rb") -- r read mode and b binary mode + if not file then return nil end + local content = file:read "*a" -- *a or *all reads the whole file + file:close() + return content +end + +local function load_site_specific_hooks() + -- This function will be run after the EESSI hooks are registered + -- It will load a local SitePackage.lua that is architecture independent (if it exists) from e.g. + -- /cvmfs/software.eessi.io/host_injections/2023.06/.lmod/SitePackage.lua + -- That can define a new hook + -- + -- function site_specific_load_hook(t) + -- + -- end + -- + -- And the either append to the existing hook: + -- + -- local function final_load_hook(t) + -- eessi_load_hook(t) + -- site_specific_load_hook(t) + -- end + -- + -- Over overwrite the EESSI hook entirely: + -- + -- hook.register("load", final_load_hook) + -- + -- Note that the appending procedure can be simplified once we have an lmod >= 8.7.36 + -- See https://github.com/TACC/Lmod/pull/696#issuecomment-1998765722 + -- + -- Subsequently, this function will look for an architecture-specific SitePackage.lua, e.g. from + -- /cvmfs/software.eessi.io/host_injections/2023.06/software/linux/x86_64/amd/zen2/.lmod/SitePackage.lua + -- This can then register an additional hook, e.g. + -- + -- function arch_specific_load_hook(t) + -- + -- end + -- + -- local function final_load_hook(t) + -- eessi_load_hook(t) + -- site_specific_load_hook(t) + -- arch_specific_load_hook(t) + -- end + -- + -- hook.register("load", final_load_hook) + -- + -- Again, the host site could also decide to overwrite by simply doing + -- + -- hook.register("load", arch_specific_load_hook) + + -- get path to to architecture independent SitePackage.lua + local prefixHostInjections = string.gsub(os.getenv('EESSI_PREFIX') or "", 'versions', 'host_injections') + local hostSitePackage = prefixHostInjections .. "/.lmod/SitePackage.lua" + + -- If the file exists, run it + if isFile(hostSitePackage) then + dofile(hostSitePackage) + end + + -- build the full architecture specific path in host_injections + local archHostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') + local archSitePackage = archHostInjections .. "/.lmod/SitePackage.lua" + + -- If the file exists, run it + if isFile(archSitePackage) then + dofile(archSitePackage) + end + +end + + +local function eessi_cuda_enabled_load_hook(t) + local frameStk = require("FrameStk"):singleton() + local mt = frameStk:mt() + local simpleName = string.match(t.modFullName, "(.-)/") + -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. + -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse + -- to load the CUDA module and print an informative message on how to set up GPU support for EESSI + local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" + if simpleName == 'CUDA' then + -- get the full host_injections path + local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') + -- build final path where the CUDA software should be installed + local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" + local cudaDirExists = isDir(cudaEasyBuildDir) + if not cudaDirExists then + local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI " + advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI " + advice = advice .. "can find it.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYou requested to load ", simpleName, " ", advice) + end + end + -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker, + -- otherwise, refuse to load the requested module and print error message + local haveGpu = mt:haveProperty(simpleName,"arch","gpu") + if haveGpu then + local arch = os.getenv("EESSI_CPU_FAMILY") or "" + local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" + local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" + local cudaDriverExists = isFile(cudaDriverFile) + local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so") + if not (cudaDriverExists or singularityCudaExists) then + local advice = "which relies on the CUDA runtime environment and driver libraries. " + advice = advice .. "In order to be able to use the module, you will need " + advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYou requested to load ", simpleName, " ", advice) + else + -- CUDA driver exists, now we check its version to see if an update is needed + if cudaDriverExists then + local cudaVersion = read_file(cudaVersionFile) + local cudaVersion_req = os.getenv("EESSICUDAVERSION") + -- driver CUDA versions don't give a patch version for CUDA + local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)") + local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)") + local driver_libs_need_update = false + if major < major_req then + driver_libs_need_update = true + elseif major == major_req then + if minor < minor_req then + driver_libs_need_update = true + end + end + if driver_libs_need_update == true then + local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". " + advice = advice .. "Please update your CUDA driver libraries and then " + advice = advice .. "let EESSI know about the update.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice) + end + end + end + end +end + +-- Combine both functions into a single one, as we can only register one function as load hook in lmod +-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed +function eessi_load_hook(t) + eessi_cuda_enabled_load_hook(t) +end + + +hook.register("load", eessi_load_hook) + +-- Note that this needs to happen at the end, so that any EESSI specific hooks can be overwritten by the site +load_site_specific_hooks() +""" + +def error(msg): + sys.stderr.write("ERROR: %s\n" % msg) + sys.exit(1) + + +if len(sys.argv) != 2: + error("Usage: %s " % sys.argv[0]) + +prefix = sys.argv[1] + +if not os.path.exists(prefix): + error("Prefix directory %s does not exist!" % prefix) + +sitepackage_path = os.path.join(prefix, DOT_LMOD, 'SitePackage.lua') +try: + os.makedirs(os.path.dirname(sitepackage_path), exist_ok=True) + with open(sitepackage_path, 'w') as fp: + fp.write(hook_txt) + +except (IOError, OSError) as err: + error("Failed to create %s: %s" % (sitepackage_path, err)) + +print(sitepackage_path) diff --git a/create_tarball.sh b/create_tarball.sh index b6c72b341d..2d77acfc43 100755 --- a/create_tarball.sh +++ b/create_tarball.sh @@ -5,11 +5,11 @@ set -e base_dir=$(dirname $(realpath $0)) if [ $# -ne 4 ]; then - echo "ERROR: Usage: $0 " >&2 + echo "ERROR: Usage: $0 " >&2 exit 1 fi eessi_tmpdir=$1 -pilot_version=$2 +eessi_version=$2 cpu_arch_subdir=$3 target_tgz=$4 @@ -20,7 +20,7 @@ os="linux" source ${base_dir}/init/eessi_defaults cvmfs_repo=${EESSI_CVMFS_REPO} -software_dir="${cvmfs_repo}/versions/${pilot_version}/software/${os}/${cpu_arch_subdir}" +software_dir="${cvmfs_repo}/versions/${eessi_version}/software/${os}/${cpu_arch_subdir}" if [ ! -d ${software_dir} ]; then echo "Software directory ${software_dir} does not exist?!" >&2 exit 2 @@ -28,7 +28,7 @@ fi overlay_upper_dir="${eessi_tmpdir}/overlay-upper" -software_dir_overlay="${overlay_upper_dir}/versions/${pilot_version}/software/${os}/${cpu_arch_subdir}" +software_dir_overlay="${overlay_upper_dir}/versions/${eessi_version}/software/${os}/${cpu_arch_subdir}" if [ ! -d ${software_dir_overlay} ]; then echo "Software directory overlay ${software_dir_overlay} does not exist?!" >&2 exit 3 @@ -40,22 +40,34 @@ echo ">> Collecting list of files/directories to include in tarball via ${PWD}.. files_list=${tmpdir}/files.list.txt module_files_list=${tmpdir}/module_files.list.txt -if [ -d ${pilot_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then +if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then # include Lmod cache and configuration file (lmodrc.lua), # skip whiteout files and backup copies of Lmod cache (spiderT.old.*) - find ${pilot_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' > ${files_list} + find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' >> ${files_list} fi -if [ -d ${pilot_version}/software/${os}/${cpu_arch_subdir}/modules ]; then + +# include scripts that were copied by install_scripts.sh, which we want to ship in EESSI repository +if [ -d ${eessi_version}/scripts ]; then + find ${eessi_version}/scripts -type f | grep -v '/\.wh\.' >> ${files_list} +fi + +# also include init, which is also copied by install_scripts.sh +if [ -d ${eessi_version}/init ]; then + find ${eessi_version}/init -type f | grep -v '/\.wh\.' >> ${files_list} +fi + +if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules ]; then # module files - find ${pilot_version}/software/${os}/${cpu_arch_subdir}/modules -type f | grep -v '/\.wh\.' >> ${files_list} + find ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules -type f | grep -v '/\.wh\.' >> ${files_list} # module symlinks - find ${pilot_version}/software/${os}/${cpu_arch_subdir}/modules -type l | grep -v '/\.wh\.' >> ${files_list} + find ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules -type l | grep -v '/\.wh\.' >> ${files_list} # module files and symlinks - find ${pilot_version}/software/${os}/${cpu_arch_subdir}/modules/all -type f -o -type l \ - | grep -v '/\.wh\.' | sed -e 's/.lua$//' | sed -e 's@.*/modules/all/@@g' | sort -u \ + find ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules/all -type f -o -type l \ + | grep -v '/\.wh\.' | grep -v '/\.modulerc\.lua' | sed -e 's/.lua$//' | sed -e 's@.*/modules/all/@@g' | sort -u \ >> ${module_files_list} fi -if [ -d ${pilot_version}/software/${os}/${cpu_arch_subdir}/software -a -r ${module_files_list} ]; then + +if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/software -a -r ${module_files_list} ]; then # installation directories but only those for which module files were created # Note, we assume that module names (as defined by 'PACKAGE_NAME/VERSION.lua' # using EasyBuild's standard module naming scheme) match the name of the @@ -64,7 +76,7 @@ if [ -d ${pilot_version}/software/${os}/${cpu_arch_subdir}/software -a -r ${modu # installation directories), the procedure will likely not work. for package_version in $(cat ${module_files_list}); do echo "handling ${package_version}" - ls -d ${pilot_version}/software/${os}/${cpu_arch_subdir}/software/${package_version} \ + ls -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/software/${package_version} \ | grep -v '/\.wh\.' >> ${files_list} done fi diff --git a/easystacks/software.eessi.io/2023.06/README.md b/easystacks/software.eessi.io/2023.06/README.md new file mode 100644 index 0000000000..733ebf9475 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/README.md @@ -0,0 +1,7 @@ +File naming matters, since it determines the order in which easystack files are processed. + +Software installed with system toolchain should be installed first, +this includes EasyBuild itself, see `eessi-2023.06-eb-4.8.2-001-system.yml` . + +CUDA installations must be done before CUDA is required as dependency for something +built with a non-system toolchain, see `eessi-2023.06-eb-4.8.2-010-CUDA.yml` . diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-001-system.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-001-system.yml new file mode 100644 index 0000000000..f02b9f2802 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-001-system.yml @@ -0,0 +1,7 @@ +easyconfigs: + - EasyBuild-4.8.2.eb: + options: + from-pr: 19105 + - Nextflow-23.10.0.eb: + options: + from-pr: 19172 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-010-CUDA.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-010-CUDA.yml new file mode 100644 index 0000000000..dda274b8db --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-010-CUDA.yml @@ -0,0 +1,5 @@ +easyconfigs: + - CUDA-12.1.1.eb: + options: + include-easyblocks-from-pr: 3045 + accept-eula-for: CUDA diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2022b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2022b.yml new file mode 100644 index 0000000000..fd88fafb0c --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2022b.yml @@ -0,0 +1,7 @@ +easyconfigs: + - foss-2022b.eb + - HarfBuzz-5.3.1-GCCcore-12.2.0.eb: + options: + from-pr: 19339 + - Qt5-5.15.7-GCCcore-12.2.0.eb + - QuantumESPRESSO-7.2-foss-2022b.eb diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml new file mode 100644 index 0000000000..7244219dc3 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml @@ -0,0 +1,54 @@ +easyconfigs: + - GCC-12.3.0.eb + - Rust-1.70.0-GCCcore-12.3.0.eb: + # fix build of Rust 1.70.0 by disabling download of pre-built LLVM; + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3038 + options: + include-easyblocks-from-pr: 3038 + - foss-2023a.eb + - pybind11-2.11.1-GCCcore-12.3.0.eb: + # avoid indirect dependency on old CMake version built with GCCcore/10.2.0 via Catch2 build dependency; + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19270 + options: + from-pr: 19270 + - SciPy-bundle-2023.07-gfbf-2023a.eb + - TensorFlow-2.13.0-foss-2023a.eb: + # patch setup.py for grpcio extension in TensorFlow 2.13.0 easyconfigs to take into account alternate sysroot; + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19268 + options: + from-pr: 19268 + - X11-20230603-GCCcore-12.3.0.eb + - HarfBuzz-5.3.1-GCCcore-12.3.0.eb: + options: + from-pr: 19339 + - Qt5-5.15.10-GCCcore-12.3.0.eb + - OSU-Micro-Benchmarks-7.1-1-gompi-2023a.eb + - LHAPDF-6.5.4-GCC-12.3.0.eb: + options: + from-pr: 19363 + - LoopTools-2.15-GCC-12.3.0.eb: + options: + from-pr: 19397 + - R-4.3.2-gfbf-2023a.eb: + options: + from-pr: 19185 + - Boost-1.82.0-GCC-12.3.0.eb + - netCDF-4.9.2-gompi-2023a.eb + - FFmpeg-6.0-GCCcore-12.3.0.eb + - CUDA-Samples-12.1-GCC-12.3.0-CUDA-12.1.1.eb: + # use easyconfig that only install subset of CUDA samples, + # to circumvent problem with nvcc linking to glibc of host OS, + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19189; + # and where additional samples are excluded because they fail to build on aarch64, + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451; + options: + from-pr: 19451 + - ALL-0.9.2-foss-2023a.eb: + options: + from-pr: 19455 + - CDO-2.2.2-gompi-2023a.eb: + options: + from-pr: 19735 + - BWA-0.7.17-20220923-GCCcore-12.3.0.eb: + options: + from-pr: 19820 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-001-system.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-001-system.yml new file mode 100644 index 0000000000..25c13e49c9 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-001-system.yml @@ -0,0 +1,5 @@ +easyconfigs: + - EasyBuild-4.9.0.eb: + options: + from-pr: 19464 + - ReFrame-4.3.3.eb diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2022b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2022b.yml new file mode 100644 index 0000000000..3962f63bda --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2022b.yml @@ -0,0 +1,13 @@ +easyconfigs: + - SciPy-bundle-2023.02-gfbf-2022b.eb + - GDAL-3.6.2-foss-2022b.eb + - waLBerla-6.1-foss-2022b.eb: + options: + from-pr: 19324 + - WRF-4.4.1-foss-2022b-dmpar.eb + - ImageMagick-7.1.0-53-GCCcore-12.2.0.eb: + options: + from-pr: 20086 + - R-4.2.2-foss-2022b.eb: + options: + from-pr: 20238 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml new file mode 100644 index 0000000000..44952e2765 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml @@ -0,0 +1,73 @@ +easyconfigs: + - OpenFOAM-11-foss-2023a.eb: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19545 + options: + from-pr: 19545 + - at-spi2-core-2.49.91-GCCcore-12.3.0.eb + - ESPResSo-4.2.1-foss-2023a.eb: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19592 + options: + from-pr: 19592 + - Rivet-3.1.9-gompi-2023a-HepMC3-3.2.6.eb: + options: + from-pr: 19679 + - PyTorch-2.1.2-foss-2023a.eb: + options: + from-pr: 19573 + - scikit-learn-1.3.1-gfbf-2023a.eb + - snakemake-8.4.2-foss-2023a.eb: + options: + from-pr: 19646 + - LAMMPS-2Aug2023_update2-foss-2023a-kokkos.eb: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19471 + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3036 + options: + from-pr: 19471 + include-easyblocks-from-pr: 3036 + - matplotlib-3.7.2-gfbf-2023a.eb + - PyQt5-5.15.10-GCCcore-12.3.0.eb: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19554 + options: + from-pr: 19554 + - Pillow-SIMD-9.5.0-GCCcore-12.3.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19996 + from-pr: 19996 + - dask-2023.9.2-foss-2023a.eb + - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb + - OSU-Micro-Benchmarks-7.2-gompi-2023b.eb + - JupyterNotebook-7.0.2-GCCcore-12.3.0.eb + - ImageMagick-7.1.1-15-GCCcore-12.3.0.eb: + options: + from-pr: 20086 + - Z3-4.12.2-GCCcore-12.3.0.eb: + options: + # The Z3 dependency of PyTorch had it's versionsuffix removed + # and we need to workaround the problem this creates, + # see https://github.com/EESSI/software-layer/pull/501 for details + from-pr: 20050 + - PyOpenGL-3.1.7-GCCcore-12.3.0.eb: + options: + from-pr: 20007 + - ipympl-0.9.3-foss-2023a.eb: + options: + from-pr: 20126 + - OpenJPEG-2.5.0-GCCcore-12.3.0.eb + - OpenFOAM-10-foss-2023a.eb + - Highway-1.0.4-GCCcore-12.3.0.eb + - ELPA-2023.05.001-foss-2023a.eb + - libxc-6.2.2-GCC-12.3.0.eb + - SuperLU_DIST-8.1.2-foss-2023a.eb: + options: + from-pr: 20162 + - PETSc-3.20.3-foss-2023a.eb: + options: + include-easyblocks-from-pr: 3086 + from-pr: 19686 + - MODFLOW-6.4.4-foss-2023a.eb: + options: + from-pr: 20142 + - R-bundle-CRAN-2023.12-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20238 + from-pr: 20238 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023b.yml new file mode 100644 index 0000000000..86c5106c85 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023b.yml @@ -0,0 +1,59 @@ +easyconfigs: + - GCC-13.2.0.eb + - foss-2023b.eb + - SciPy-bundle-2023.11-gfbf-2023b.eb + - netCDF-4.9.2-gompi-2023b.eb: + options: + from-pr: 19534 + - matplotlib-3.8.2-gfbf-2023b.eb: + options: + from-pr: 19552 + - AOFlagger-3.4.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - arpack-ng-3.9.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - Armadillo-12.8.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - casacore-3.5.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - IDG-1.2.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - EveryBeam-0.5.2-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - DP3-6.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - WSClean-3.4-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - CDO-2.2.2-gompi-2023b.eb: + options: + from-pr: 19792 + - python-casacore-3.5.2-foss-2023b.eb: + options: + from-pr: 20089 + - libspatialindex-1.9.3-GCCcore-13.2.0.eb: + options: + from-pr: 19922 + - LittleCMS-2.15-GCCcore-13.2.0.eb + - giflib-5.2.1-GCCcore-13.2.0.eb + - OpenJPEG-2.5.0-GCCcore-13.2.0.eb + - libwebp-1.3.2-GCCcore-13.2.0.eb + - Wayland-1.22.0-GCCcore-13.2.0.eb + - Qt5-5.15.13-GCCcore-13.2.0.eb: + options: + from-pr: 20201 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml new file mode 100644 index 0000000000..c5a08b5209 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml @@ -0,0 +1,4 @@ +easyconfigs: + - EasyBuild-4.9.1.eb: + options: + from-pr: 20299 diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240301-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240301-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml new file mode 100644 index 0000000000..042d0a214c --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240301-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml @@ -0,0 +1,15 @@ +# 2024-03-01 +# Rebuild all OpenMPI 4.1.x versions due to an issue with smcuda: +# https://github.com/open-mpi/ompi/issues/12270 +# https://github.com/open-mpi/ompi/pull/12344 +# https://github.com/easybuilders/easybuild-easyconfigs/pull/19940 +easyconfigs: + - OpenMPI-4.1.4-GCC-12.2.0.eb: + options: + from-pr: 19940 + - OpenMPI-4.1.5-GCC-12.3.0: + options: + from-pr: 19940 + - OpenMPI-4.1.6-GCC-13.2.0: + options: + from-pr: 19940 diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240328-eb-4.9.0-GCCcore-fix-aarch64-vectorization.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240328-eb-4.9.0-GCCcore-fix-aarch64-vectorization.yml new file mode 100644 index 0000000000..f63aa421f5 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240328-eb-4.9.0-GCCcore-fix-aarch64-vectorization.yml @@ -0,0 +1,15 @@ +# 2024-03-28 +# Rebuild GCCcore to fix a compiler bug in the tree-vectorizer +# We encountered it in https://github.com/EESSI/software-layer/pull/479#issuecomment-1957091774 +# and https://github.com/EESSI/software-layer/pull/507#issuecomment-2011724613 +# Upstream issue: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111478 +# Upstream fix: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=e5f1956498251a4973d52c8aad3faf34d0443169 +# Fix in EasyBuild https://github.com/easybuilders/easybuild-easyconfigs/pull/19974 +# https://github.com/easybuilders/easybuild-easyconfigs/pull/20218 +easyconfigs: + - GCCcore-12.3.0.eb: + options: + from-pr: 20218 + - GCCcore-13.2.0.eb: + options: + from-pr: 19974 diff --git a/eb_hooks.py b/eb_hooks.py index 2fba925b01..20b5f76cfc 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -3,16 +3,31 @@ import os import re +import easybuild.tools.environment as env from easybuild.easyblocks.generic.configuremake import obtain_config_guess +from easybuild.framework.easyconfig.constants import EASYCONFIG_CONSTANTS from easybuild.tools.build_log import EasyBuildError, print_msg from easybuild.tools.config import build_option, update_build_option -from easybuild.tools.filetools import apply_regex_substitutions, copy_file, which +from easybuild.tools.filetools import apply_regex_substitutions, copy_file, remove_file, symlink, which from easybuild.tools.run import run_cmd from easybuild.tools.systemtools import AARCH64, POWER, X86_64, get_cpu_architecture, get_cpu_features from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC +# prefer importing LooseVersion from easybuild.tools, but fall back to distuils in case EasyBuild <= 4.7.0 is used +try: + from easybuild.tools import LooseVersion +except ImportError: + from distutils.version import LooseVersion + + +CPU_TARGET_NEOVERSE_N1 = 'aarch64/neoverse_n1' +CPU_TARGET_NEOVERSE_V1 = 'aarch64/neoverse_v1' +CPU_TARGET_AARCH64_GENERIC = 'aarch64/generic' + EESSI_RPATH_OVERRIDE_ATTR = 'orig_rpath_override_dirs' +SYSTEM = EASYCONFIG_CONSTANTS['SYSTEM'][0] + def get_eessi_envvar(eessi_envvar): """Get an EESSI environment variable from the environment""" @@ -55,12 +70,23 @@ def parse_hook(ec, *args, **kwargs): if ec.name in PARSE_HOOKS: PARSE_HOOKS[ec.name](ec, eprefix) + # inject the GPU property (if required) + ec = inject_gpu_property(ec) -def pre_configure_hook(self, *args, **kwargs): - """Main pre-configure hook: trigger custom functions based on software name.""" - if self.name in PRE_CONFIGURE_HOOKS: - PRE_CONFIGURE_HOOKS[self.name](self, *args, **kwargs) +def post_ready_hook(self, *args, **kwargs): + """ + Post-ready hook: limit parallellism for selected builds, because they require a lot of memory per used core. + """ + # 'parallel' easyconfig parameter is set via EasyBlock.set_parallel in ready step based on available cores. + # here we reduce parallellism to only use half of that for selected software, + # to avoid failing builds/tests due to out-of-memory problems + if self.name in ['TensorFlow', 'libxc']: + parallel = self.cfg['parallel'] + if parallel > 1: + self.cfg['parallel'] = parallel // 2 + msg = "limiting parallelism to %s (was %s) for %s to avoid out-of-memory failures during building/testing" + print_msg(msg % (self.cfg['parallel'], parallel, self.name), log=self.log) def pre_prepare_hook(self, *args, **kwargs): @@ -90,8 +116,11 @@ def pre_prepare_hook(self, *args, **kwargs): print_msg("Updated rpath_override_dirs (to allow overriding MPI family %s): %s", mpi_family, rpath_override_dirs) + if self.name in PRE_PREPARE_HOOKS: + PRE_PREPARE_HOOKS[self.name](self, *args, **kwargs) -def gcc_postprepare(self, *args, **kwargs): + +def post_prepare_hook_gcc_prefixed_ld_rpath_wrapper(self, *args, **kwargs): """ Post-configure hook for GCCcore: - copy RPATH wrapper script for linker commands to also have a wrapper in place with system type prefix like 'x86_64-pc-linux-gnu' @@ -121,6 +150,7 @@ def gcc_postprepare(self, *args, **kwargs): else: raise EasyBuildError("GCCcore-specific hook triggered for non-GCCcore easyconfig?!") + def post_prepare_hook(self, *args, **kwargs): """Main post-prepare hook: trigger custom functions.""" @@ -134,7 +164,33 @@ def post_prepare_hook(self, *args, **kwargs): POST_PREPARE_HOOKS[self.name](self, *args, **kwargs) -def cgal_toolchainopts_precise(ec, eprefix): +def parse_hook_casacore_disable_vectorize(ec, eprefix): + """ + Disable 'vectorize' toolchain option for casacore 3.5.0 on aarch64/neoverse_v1 + Compiling casacore 3.5.0 with GCC 13.2.0 (foss-2023b) gives an error when building for aarch64/neoverse_v1. + See also, https://github.com/EESSI/software-layer/pull/479 + """ + if ec.name == 'casacore': + tcname, tcversion = ec['toolchain']['name'], ec['toolchain']['version'] + if ( + LooseVersion(ec.version) == LooseVersion('3.5.0') and + tcname == 'foss' and tcversion == '2023b' + ): + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if cpu_target == CPU_TARGET_NEOVERSE_V1: + if not hasattr(ec, 'toolchainopts'): + ec['toolchainopts'] = {} + ec['toolchainopts']['vectorize'] = False + print_msg("Changed toochainopts for %s: %s", ec.name, ec['toolchainopts']) + else: + print_msg("Not changing option vectorize for %s on non-neoverse_v1", ec.name) + else: + print_msg("Not changing option vectorize for %s %s %s", ec.name, ec.version, ec.toolchain) + else: + raise EasyBuildError("casacore-specific hook triggered for non-casacore easyconfig?!") + + +def parse_hook_cgal_toolchainopts_precise(ec, eprefix): """Enable 'precise' rather than 'strict' toolchain option for CGAL on POWER.""" if ec.name == 'CGAL': if get_cpu_architecture() == POWER: @@ -147,7 +203,7 @@ def cgal_toolchainopts_precise(ec, eprefix): raise EasyBuildError("CGAL-specific hook triggered for non-CGAL easyconfig?!") -def fontconfig_add_fonts(ec, eprefix): +def parse_hook_fontconfig_add_fonts(ec, eprefix): """Inject --with-add-fonts configure option for fontconfig.""" if ec.name == 'fontconfig': # make fontconfig aware of fonts included with compat layer @@ -158,7 +214,59 @@ def fontconfig_add_fonts(ec, eprefix): raise EasyBuildError("fontconfig-specific hook triggered for non-fontconfig easyconfig?!") -def ucx_eprefix(ec, eprefix): +def parse_hook_openblas_relax_lapack_tests_num_errors(ec, eprefix): + """Relax number of failing numerical LAPACK tests for aarch64/neoverse_v1 CPU target for OpenBLAS < 0.3.23""" + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if ec.name == 'OpenBLAS': + if LooseVersion(ec.version) < LooseVersion('0.3.23'): + # relax maximum number of failed numerical LAPACK tests for aarch64/neoverse_v1 CPU target + # since the default setting of 150 that works well on other aarch64 targets and x86_64 is a bit too strict + # See https://github.com/EESSI/software-layer/issues/314 + cfg_option = 'max_failing_lapack_tests_num_errors' + if cpu_target == CPU_TARGET_NEOVERSE_V1: + orig_value = ec[cfg_option] + ec[cfg_option] = 400 + print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)", + ec.name, ec[cfg_option], orig_value) + else: + print_msg("Not changing option %s for %s on non-AARCH64", cfg_option, ec.name) + else: + raise EasyBuildError("OpenBLAS-specific hook triggered for non-OpenBLAS easyconfig?!") + + +def parse_hook_pybind11_replace_catch2(ec, eprefix): + """ + Replace Catch2 build dependency in pybind11 easyconfigs with one that doesn't use system toolchain. + cfr. https://github.com/easybuilders/easybuild-easyconfigs/pull/19270 + """ + # this is mainly necessary to avoid that --missing keeps reporting Catch2/2.13.9 is missing, + # and to avoid that we need to use "--from-pr 19270" for every easyconfigs that (indirectly) depends on pybind11 + if ec.name == 'pybind11' and ec.version in ['2.10.3', '2.11.1']: + build_deps = ec['builddependencies'] + catch2_build_dep = None + catch2_name, catch2_version = ('Catch2', '2.13.9') + for idx, build_dep in enumerate(build_deps): + if build_dep[0] == catch2_name and build_dep[1] == catch2_version: + catch2_build_dep = build_dep + break + if catch2_build_dep and len(catch2_build_dep) == 4 and catch2_build_dep[3] == SYSTEM: + build_deps[idx] = (catch2_name, catch2_version) + + +def parse_hook_qt5_check_qtwebengine_disable(ec, eprefix): + """ + Disable check for QtWebEngine in Qt5 as workaround for problem with determining glibc version. + """ + if ec.name == 'Qt5': + # workaround for glibc version being reported as "UNKNOWN" in Gentoo Prefix environment by EasyBuild v4.7.2, + # see also https://github.com/easybuilders/easybuild-framework/pull/4290 + ec['check_qtwebengine'] = False + print_msg("Checking for QtWebEgine in Qt5 installation has been disabled") + else: + raise EasyBuildError("Qt5-specific hook triggered for non-Qt5 easyconfig?!") + + +def parse_hook_ucx_eprefix(ec, eprefix): """Make UCX aware of compatibility layer via additional configuration options.""" if ec.name == 'UCX': ec.update('configopts', '--with-sysroot=%s' % eprefix) @@ -168,13 +276,85 @@ def ucx_eprefix(ec, eprefix): raise EasyBuildError("UCX-specific hook triggered for non-UCX easyconfig?!") +def parse_hook_lammps_remove_deps_for_CI_aarch64(ec, *args, **kwargs): + """ + Remove x86_64 specific dependencies for the CI to pass on aarch64 + """ + if ec.name == 'LAMMPS' and ec.version in ('2Aug2023_update2',): + if os.getenv('EESSI_CPU_FAMILY') == 'aarch64': + # ScaFaCoS and tbb are not compatible with aarch64/* CPU targets, + # so remove them as dependencies for LAMMPS (they're optional); + # see also https://github.com/easybuilders/easybuild-easyconfigs/pull/19164 + + # https://github.com/easybuilders/easybuild-easyconfigs/pull/19000; + # we need this hook because we check for missing installations for all CPU targets + # on an x86_64 VM in GitHub Actions (so condition based on ARCH in LAMMPS easyconfig is always true) + ec['dependencies'] = [dep for dep in ec['dependencies'] if dep[0] not in ('ScaFaCoS', 'tbb')] + else: + raise EasyBuildError("LAMMPS-specific hook triggered for non-LAMMPS easyconfig?!") + + +def pre_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwargs): + """ + Solve issues with compiling or running the tests on both + neoverse_n1 and neoverse_v1 with Highway 1.0.4 and GCC 12.3.0: + - for neoverse_n1 we set optarch to GENERIC + - for neoverse_v1 we completely disable the tests + cfr. https://github.com/EESSI/software-layer/issues/469 + """ + if self.name == 'Highway': + tcname, tcversion = self.toolchain.name, self.toolchain.version + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + # note: keep condition in sync with the one used in + # post_prepare_hook_highway_handle_test_compilation_issues + if self.version in ['1.0.4'] and tcname == 'GCCcore' and tcversion == '12.3.0': + if cpu_target == CPU_TARGET_NEOVERSE_V1: + self.cfg.update('configopts', '-DHWY_ENABLE_TESTS=OFF') + if cpu_target == CPU_TARGET_NEOVERSE_N1: + self.orig_optarch = build_option('optarch') + update_build_option('optarch', OPTARCH_GENERIC) + else: + raise EasyBuildError("Highway-specific hook triggered for non-Highway easyconfig?!") + + +def post_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwargs): + """ + Post-prepare hook for Highway to reset optarch build option. + """ + if self.name == 'Highway': + tcname, tcversion = self.toolchain.name, self.toolchain.version + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + # note: keep condition in sync with the one used in + # pre_prepare_hook_highway_handle_test_compilation_issues + if self.version in ['1.0.4'] and tcname == 'GCCcore' and tcversion == '12.3.0': + if cpu_target == CPU_TARGET_NEOVERSE_N1: + update_build_option('optarch', self.orig_optarch) + def pre_configure_hook(self, *args, **kwargs): """Main pre-configure hook: trigger custom functions based on software name.""" if self.name in PRE_CONFIGURE_HOOKS: PRE_CONFIGURE_HOOKS[self.name](self, *args, **kwargs) -def libfabric_disable_psm3_x86_64_generic(self, *args, **kwargs): +def pre_configure_hook_openblas_optarch_generic(self, *args, **kwargs): + """ + Pre-configure hook for OpenBLAS: add DYNAMIC_ARCH=1 to build/test/install options when using --optarch=GENERIC + """ + if self.name == 'OpenBLAS': + if build_option('optarch') == OPTARCH_GENERIC: + for step in ('build', 'test', 'install'): + self.cfg.update(f'{step}opts', "DYNAMIC_ARCH=1") + + # use -mtune=generic rather than -mcpu=generic in $CFLAGS on aarch64, + # because -mcpu=generic implies a particular -march=armv* which clashes with those used by OpenBLAS + # when building with DYNAMIC_ARCH=1 + if get_cpu_architecture() == AARCH64: + cflags = os.getenv('CFLAGS').replace('-mcpu=generic', '-mtune=generic') + env.setvar('CFLAGS', cflags) + else: + raise EasyBuildError("OpenBLAS-specific hook triggered for non-OpenBLAS easyconfig?!") + + +def pre_configure_hook_libfabric_disable_psm3_x86_64_generic(self, *args, **kwargs): """Add --disable-psm3 to libfabric configure options when building with --optarch=GENERIC on x86_64.""" if self.name == 'libfabric': if get_cpu_architecture() == X86_64: @@ -187,7 +367,7 @@ def libfabric_disable_psm3_x86_64_generic(self, *args, **kwargs): raise EasyBuildError("libfabric-specific hook triggered for non-libfabric easyconfig?!") -def metabat_preconfigure(self, *args, **kwargs): +def pre_configure_hook_metabat_filtered_zlib_dep(self, *args, **kwargs): """ Pre-configure hook for MetaBAT: - take into account that zlib is a filtered dependency, @@ -201,7 +381,7 @@ def metabat_preconfigure(self, *args, **kwargs): raise EasyBuildError("MetaBAT-specific hook triggered for non-MetaBAT easyconfig?!") -def wrf_preconfigure(self, *args, **kwargs): +def pre_configure_hook_wrf_aarch64(self, *args, **kwargs): """ Pre-configure hook for WRF: - patch arch/configure_new.defaults so building WRF with foss toolchain works on aarch64 @@ -210,24 +390,307 @@ def wrf_preconfigure(self, *args, **kwargs): if get_cpu_architecture() == AARCH64: pattern = "Linux x86_64 ppc64le, gfortran" repl = "Linux x86_64 aarch64 ppc64le, gfortran" - self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure_new.defaults && " % (pattern, repl)) - print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) + if LooseVersion(self.version) <= LooseVersion('3.9.0'): + self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure_new.defaults && " % (pattern, repl)) + print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) + + if LooseVersion('4.0.0') <= LooseVersion(self.version) <= LooseVersion('4.2.1'): + self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure.defaults && " % (pattern, repl)) + print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) else: raise EasyBuildError("WRF-specific hook triggered for non-WRF easyconfig?!") +def pre_configure_hook_atspi2core_filter_ld_library_path(self, *args, **kwargs): + """ + pre-configure hook for at-spi2-core: + - instruct GObject-Introspection's g-ir-scanner tool to not set $LD_LIBRARY_PATH + when EasyBuild is configured to filter it, see: + https://github.com/EESSI/software-layer/issues/196 + """ + if self.name == 'at-spi2-core': + if build_option('filter_env_vars') and 'LD_LIBRARY_PATH' in build_option('filter_env_vars'): + sed_cmd = 'sed -i "s/gir_extra_args = \[/gir_extra_args = \[\\n \'--lib-dirs-envvar=FILTER_LD_LIBRARY_PATH\',/g" %(start_dir)s/atspi/meson.build && ' + self.cfg.update('preconfigopts', sed_cmd) + else: + raise EasyBuildError("at-spi2-core-specific hook triggered for non-at-spi2-core easyconfig?!") + + +def pre_test_hook(self,*args, **kwargs): + """Main pre-test hook: trigger custom functions based on software name.""" + if self.name in PRE_TEST_HOOKS: + PRE_TEST_HOOKS[self.name](self, *args, **kwargs) + + +def pre_test_hook_exclude_failing_test_Highway(self, *args, **kwargs): + """ + Pre-test hook for Highway: exclude failing TestAllShiftRightLanes/SVE_256 test on neoverse_v1 + cfr. https://github.com/EESSI/software-layer/issues/469 + """ + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if self.name == 'Highway' and self.version in ['1.0.3'] and cpu_target == CPU_TARGET_NEOVERSE_V1: + self.cfg['runtest'] += ' ARGS="-E TestAllShiftRightLanes/SVE_256"' + + +def pre_test_hook_ignore_failing_tests_ESPResSo(self, *args, **kwargs): + """ + Pre-test hook for ESPResSo: skip failing tests, tests frequently timeout due to known bugs in ESPResSo v4.2.1 + cfr. https://github.com/EESSI/software-layer/issues/363 + """ + if self.name == 'ESPResSo' and self.version == '4.2.1': + self.cfg['testopts'] = "|| echo 'ignoring failing tests (probably due to timeouts)'" + + +def pre_test_hook_ignore_failing_tests_FFTWMPI(self, *args, **kwargs): + """ + Pre-test hook for FFTW.MPI: skip failing tests for FFTW.MPI 3.3.10 on neoverse_v1 + cfr. https://github.com/EESSI/software-layer/issues/325 + """ + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if self.name == 'FFTW.MPI' and self.version == '3.3.10' and cpu_target == CPU_TARGET_NEOVERSE_V1: + self.cfg['testopts'] = "|| echo ignoring failing tests" + + +def pre_test_hook_ignore_failing_tests_SciPybundle(self, *args, **kwargs): + """ + Pre-test hook for SciPy-bundle: skip failing tests for selected SciPy-bundle versions + In version 2021.10, 2 failing tests in scipy 1.6.3: + FAILED optimize/tests/test_linprog.py::TestLinprogIPSparse::test_bug_6139 - A... + FAILED optimize/tests/test_linprog.py::TestLinprogIPSparsePresolve::test_bug_6139 + = 2 failed, 30554 passed, 2064 skipped, 10992 deselected, 76 xfailed, 7 xpassed, 40 warnings in 380.27s (0:06:20) = + In versions 2023.02, 2023.07, and 2023.11, 2 failing tests in scipy (versions 1.10.1, 1.11.1, 1.11.4): + FAILED scipy/spatial/tests/test_distance.py::TestPdist::test_pdist_correlation_iris + FAILED scipy/spatial/tests/test_distance.py::TestPdist::test_pdist_correlation_iris_float32 + = 2 failed, 54409 passed, 3016 skipped, 223 xfailed, 13 xpassed, 10917 warnings in 892.04s (0:14:52) = + In previous versions we were not as strict yet on the numpy/SciPy tests + """ + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + scipy_bundle_versions = ('2021.10', '2023.02', '2023.07', '2023.11') + if self.name == 'SciPy-bundle' and self.version in scipy_bundle_versions and cpu_target == CPU_TARGET_NEOVERSE_V1: + self.cfg['testopts'] = "|| echo ignoring failing tests" + +def pre_test_hook_ignore_failing_tests_netCDF(self, *args, **kwargs): + """ + Pre-test hook for netCDF: skip failing tests for selected netCDF versions on neoverse_v1 + cfr. https://github.com/EESSI/software-layer/issues/425 + The following tests are problematic: + 163 - nc_test4_run_par_test (Timeout) + 190 - h5_test_run_par_tests (Timeout) + A few other tests are skipped in the easyconfig and patches for similar issues, see above issue for details. + """ + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if self.name == 'netCDF' and self.version == '4.9.2' and cpu_target == CPU_TARGET_NEOVERSE_V1: + self.cfg['testopts'] = "|| echo ignoring failing tests" + +def pre_test_hook_increase_max_failed_tests_arm_PyTorch(self, *args, **kwargs): + """ + Pre-test hook for PyTorch: increase max failing tests for ARM for PyTorch 2.1.2 + See https://github.com/EESSI/software-layer/pull/444#issuecomment-1890416171 + """ + if self.name == 'PyTorch' and self.version == '2.1.2' and get_cpu_architecture() == AARCH64: + self.cfg['max_failed_tests'] = 10 + + +def pre_single_extension_hook(ext, *args, **kwargs): + """Main pre-extension: trigger custom functions based on software name.""" + if ext.name in PRE_SINGLE_EXTENSION_HOOKS: + PRE_SINGLE_EXTENSION_HOOKS[ext.name](ext, *args, **kwargs) + + +def post_single_extension_hook(ext, *args, **kwargs): + """Main post-extension hook: trigger custom functions based on software name.""" + if ext.name in POST_SINGLE_EXTENSION_HOOKS: + POST_SINGLE_EXTENSION_HOOKS[ext.name](ext, *args, **kwargs) + + +def pre_single_extension_isoband(ext, *args, **kwargs): + """ + Pre-extension hook for isoband R package, to fix build on top of recent glibc. + """ + if ext.name == 'isoband' and LooseVersion(ext.version) < LooseVersion('0.2.5'): + # use constant value instead of SIGSTKSZ for stack size in vendored testthat included in isoband sources, + # cfr. https://github.com/r-lib/isoband/commit/6984e6ce8d977f06e0b5ff73f5d88e5c9a44c027 + ext.cfg['preinstallopts'] = "sed -i 's/SIGSTKSZ/32768/g' src/testthat/vendor/catch.h && " + + +def pre_single_extension_numpy(ext, *args, **kwargs): + """ + Pre-extension hook for numpy, to change -march=native to -march=armv8.4-a for numpy 1.24.2 + when building for aarch64/neoverse_v1 CPU target. + """ + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if ext.name == 'numpy' and ext.version == '1.24.2' and cpu_target == CPU_TARGET_NEOVERSE_V1: + # note: this hook is called before build environment is set up (by calling toolchain.prepare()), + # so environment variables like $CFLAGS are not defined yet + # unsure which of these actually matter for numpy, so changing all of them + ext.orig_optarch = build_option('optarch') + update_build_option('optarch', 'march=armv8.4-a') + + +def post_single_extension_numpy(ext, *args, **kwargs): + """ + Post-extension hook for numpy, to reset 'optarch' build option. + """ + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if ext.name == 'numpy' and ext.version == '1.24.2' and cpu_target == CPU_TARGET_NEOVERSE_V1: + update_build_option('optarch', ext.orig_optarch) + + +def pre_single_extension_testthat(ext, *args, **kwargs): + """ + Pre-extension hook for testthat R package, to fix build on top of recent glibc. + """ + if ext.name == 'testthat' and LooseVersion(ext.version) < LooseVersion('3.1.0'): + # use constant value instead of SIGSTKSZ for stack size, + # cfr. https://github.com/r-lib/testthat/issues/1373 + https://github.com/r-lib/testthat/pull/1403 + ext.cfg['preinstallopts'] = "sed -i 's/SIGSTKSZ/32768/g' inst/include/testthat/vendor/catch.h && " + + +def post_sanitycheck_hook(self, *args, **kwargs): + """Main post-sanity-check hook: trigger custom functions based on software name.""" + if self.name in POST_SANITYCHECK_HOOKS: + POST_SANITYCHECK_HOOKS[self.name](self, *args, **kwargs) + + +def post_sanitycheck_cuda(self, *args, **kwargs): + """ + Remove files from CUDA installation that we are not allowed to ship, + and replace them with a symlink to a corresponding installation under host_injections. + """ + if self.name == 'CUDA': + print_msg("Replacing files in CUDA installation that we can not ship with symlinks to host_injections...") + + # read CUDA EULA, construct allowlist based on section 2.6 that specifies list of files that can be shipped + eula_path = os.path.join(self.installdir, 'EULA.txt') + relevant_eula_lines = [] + with open(eula_path) as infile: + copy = False + for line in infile: + if line.strip() == "2.6. Attachment A": + copy = True + continue + elif line.strip() == "2.7. Attachment B": + copy = False + continue + elif copy: + relevant_eula_lines.append(line) + + # create list without file extensions, they're not really needed and they only complicate things + allowlist = ['EULA', 'README'] + file_extensions = ['.so', '.a', '.h', '.bc'] + for line in relevant_eula_lines: + for word in line.split(): + if any(ext in word for ext in file_extensions): + allowlist.append(os.path.splitext(word)[0]) + allowlist = sorted(set(allowlist)) + self.log.info("Allowlist for files in CUDA installation that can be redistributed: " + ', '.join(allowlist)) + + # Do some quick sanity checks for things we should or shouldn't have in the list + if 'nvcc' in allowlist: + raise EasyBuildError("Found 'nvcc' in allowlist: %s" % allowlist) + if 'libcudart' not in allowlist: + raise EasyBuildError("Did not find 'libcudart' in allowlist: %s" % allowlist) + + # iterate over all files in the CUDA installation directory + for dir_path, _, files in os.walk(self.installdir): + for filename in files: + full_path = os.path.join(dir_path, filename) + # we only really care about real files, i.e. not symlinks + if not os.path.islink(full_path): + # check if the current file is part of the allowlist + basename = os.path.splitext(filename)[0] + if basename in allowlist: + self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) + else: + self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s", + basename, full_path) + # if it is not in the allowlist, delete the file and create a symlink to host_injections + host_inj_path = full_path.replace('versions', 'host_injections') + # make sure source and target of symlink are not the same + if full_path == host_inj_path: + raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " + "are using this hook for an EESSI installation?", + full_path, host_inj_path) + remove_file(full_path) + symlink(host_inj_path, full_path) + else: + raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!") + + +def inject_gpu_property(ec): + """ + Add 'gpu' property, via modluafooter easyconfig parameter + """ + ec_dict = ec.asdict() + # Check if CUDA is in the dependencies, if so add the 'gpu' Lmod property + if ('CUDA' in [dep[0] for dep in iter(ec_dict['dependencies'])]): + ec.log.info("Injecting gpu as Lmod arch property and envvar with CUDA version") + key = 'modluafooter' + value = 'add_property("arch","gpu")' + cuda_version = 0 + for dep in iter(ec_dict['dependencies']): + # Make CUDA a build dependency only (rpathing saves us from link errors) + if 'CUDA' in dep[0]: + cuda_version = dep[1] + ec_dict['dependencies'].remove(dep) + if dep not in ec_dict['builddependencies']: + ec_dict['builddependencies'].append(dep) + value = '\n'.join([value, 'setenv("EESSICUDAVERSION","%s")' % cuda_version]) + if key in ec_dict: + if not value in ec_dict[key]: + ec[key] = '\n'.join([ec_dict[key], value]) + else: + ec[key] = value + return ec + + PARSE_HOOKS = { - 'CGAL': cgal_toolchainopts_precise, - 'fontconfig': fontconfig_add_fonts, - 'UCX': ucx_eprefix, + 'casacore': parse_hook_casacore_disable_vectorize, + 'CGAL': parse_hook_cgal_toolchainopts_precise, + 'fontconfig': parse_hook_fontconfig_add_fonts, + 'LAMMPS': parse_hook_lammps_remove_deps_for_CI_aarch64, + 'OpenBLAS': parse_hook_openblas_relax_lapack_tests_num_errors, + 'pybind11': parse_hook_pybind11_replace_catch2, + 'Qt5': parse_hook_qt5_check_qtwebengine_disable, + 'UCX': parse_hook_ucx_eprefix, +} + +PRE_PREPARE_HOOKS = { + 'Highway': pre_prepare_hook_highway_handle_test_compilation_issues, } POST_PREPARE_HOOKS = { - 'GCCcore': gcc_postprepare, + 'GCCcore': post_prepare_hook_gcc_prefixed_ld_rpath_wrapper, + 'Highway': post_prepare_hook_highway_handle_test_compilation_issues, } PRE_CONFIGURE_HOOKS = { - 'libfabric': libfabric_disable_psm3_x86_64_generic, - 'MetaBAT': metabat_preconfigure, - 'WRF': wrf_preconfigure, + 'libfabric': pre_configure_hook_libfabric_disable_psm3_x86_64_generic, + 'MetaBAT': pre_configure_hook_metabat_filtered_zlib_dep, + 'OpenBLAS': pre_configure_hook_openblas_optarch_generic, + 'WRF': pre_configure_hook_wrf_aarch64, + 'at-spi2-core': pre_configure_hook_atspi2core_filter_ld_library_path, +} + +PRE_TEST_HOOKS = { + 'ESPResSo': pre_test_hook_ignore_failing_tests_ESPResSo, + 'FFTW.MPI': pre_test_hook_ignore_failing_tests_FFTWMPI, + 'Highway': pre_test_hook_exclude_failing_test_Highway, + 'SciPy-bundle': pre_test_hook_ignore_failing_tests_SciPybundle, + 'netCDF': pre_test_hook_ignore_failing_tests_netCDF, + 'PyTorch': pre_test_hook_increase_max_failed_tests_arm_PyTorch, +} + +PRE_SINGLE_EXTENSION_HOOKS = { + 'isoband': pre_single_extension_isoband, + 'numpy': pre_single_extension_numpy, + 'testthat': pre_single_extension_testthat, +} + +POST_SINGLE_EXTENSION_HOOKS = { + 'numpy': post_single_extension_numpy, +} + +POST_SANITYCHECK_HOOKS = { + 'CUDA': post_sanitycheck_cuda, } diff --git a/eessi-2021.06.yml b/eessi-2021.06.yml deleted file mode 100644 index 3587827746..0000000000 --- a/eessi-2021.06.yml +++ /dev/null @@ -1,53 +0,0 @@ -software: - R-bundle-Bioconductor: - toolchains: - foss-2020a: - versions: - '3.11': - versionsuffix: -R-4.0.0 - GROMACS: - toolchains: - foss-2020a: - versions: - '2020.1': - versionsuffix: -Python-3.8.2 - '2020.4': - versionsuffix: -Python-3.8.2 - Horovod: - toolchains: - foss-2020a: - versions: - '0.21.3': - versionsuffix: -TensorFlow-2.3.1-Python-3.8.2 - OpenFOAM: - toolchains: - foss-2020a: - versions: ['8', 'v2006'] - OSU-Micro-Benchmarks: - toolchains: - gompi-2020a: - versions: ['5.6.3'] - QuantumESPRESSO: - toolchains: - foss-2020a: - versions: ['6.6'] - TensorFlow: - toolchains: - foss-2020a: - versions: - '2.3.1': - versionsuffix: -Python-3.8.2 - RStudio-Server: - toolchains: - foss-2020a: - versions: - '1.3.1093': - versionsuffix: -Java-11-R-4.0.0 - ReFrame: - toolchains: - SYSTEM: - versions: '3.6.2' - code-server: - toolchains: - SYSTEM: - versions: '3.7.3' diff --git a/eessi-2021.12.yml b/eessi-2021.12.yml deleted file mode 100644 index 210bbb2845..0000000000 --- a/eessi-2021.12.yml +++ /dev/null @@ -1,69 +0,0 @@ -software: - code-server: - toolchains: - SYSTEM: - versions: '3.7.3' - GROMACS: - toolchains: - foss-2020a: - versions: - '2020.1': - versionsuffix: -Python-3.8.2 - '2020.4': - versionsuffix: -Python-3.8.2 - Horovod: - toolchains: - foss-2020a: - versions: - '0.21.3': - versionsuffix: -TensorFlow-2.3.1-Python-3.8.2 - Nextflow: - toolchains: - SYSTEM: - versions: '22.10.1' - OpenFOAM: - toolchains: - foss-2020a: - versions: ['8', 'v2006'] - OSU-Micro-Benchmarks: - toolchains: - gompi-2020a: - versions: ['5.6.3'] - gompi-2021a: - versions: ['5.7.1'] - QuantumESPRESSO: - toolchains: - foss-2020a: - versions: ['6.6'] - R: - toolchains: - foss-2021a: - versions: '4.1.0' - R-bundle-Bioconductor: - toolchains: - foss-2020a: - versions: - '3.11': - versionsuffix: -R-4.0.0 - RStudio-Server: - toolchains: - foss-2020a: - versions: - '1.3.1093': - versionsuffix: -Java-11-R-4.0.0 - SciPy-bundle: - toolchains: - foss-2021a: - versions: ['2021.05'] - TensorFlow: - toolchains: - foss-2020a: - versions: - '2.3.1': - versionsuffix: -Python-3.8.2 - WRF: - toolchains: - foss-2020a: - versions: - '3.9.1.1': - versionsuffix: -dmpar diff --git a/eessi-2023.06-known-issues.yml b/eessi-2023.06-known-issues.yml new file mode 100644 index 0000000000..011cb2dc08 --- /dev/null +++ b/eessi-2023.06-known-issues.yml @@ -0,0 +1,48 @@ +- aarch64/generic: + - PyTorch-2.1.2-foss-2023a: + - issue: https://github.com/EESSI/software-layer/issues/461 + - info: "8 failing tests (out of 209539) on aarch64/*" +- aarch64/neoverse_n1: + - Highway-1.0.4-GCCcore-12.3.0.eb: + - issue: https://github.com/EESSI/software-layer/issues/469 + - info: "failing to build the tests" + - PyTorch-2.1.2-foss-2023a: + - issue: https://github.com/EESSI/software-layer/issues/461 + - info: "8 failing tests (out of 209539) on aarch64/*" +- aarch64/neoverse_v1: + - ESPResSo-4.2.1-foss-2023a: + - issue: https://github.com/EESSI/software-layer/issues/363 + - info: "ESPResSo tests failing due to timeouts" + - FFTW.MPI-3.3.10-gompi-2023a: + - issue: https://github.com/EESSI/software-layer/issues/325 + - info: "Flaky FFTW tests, random failures" + - FFTW.MPI-3.3.10-gompi-2023b: + - issue: https://github.com/EESSI/software-layer/issues/325 + - info: "Flaky FFTW tests, random failures" + - Highway-1.0.3-GCCcore-12.2.0.eb: + - issue: https://github.com/EESSI/software-layer/issues/469 + - info: "failing SVE test due to wrong expected value" + - Highway-1.0.4-GCCcore-12.3.0.eb: + - issue: https://github.com/EESSI/software-layer/issues/469 + - info: "failing to build the tests" + - netCDF-4.9.2-gompi-2023a.eb: + - issue: https://github.com/EESSI/software-layer/issues/425 + - info: "netCDF intermittent test failures" + - netCDF-4.9.2-gompi-2023b.eb: + - issue: https://github.com/EESSI/software-layer/issues/425 + - info: "netCDF intermittent test failures" + - OpenBLAS-0.3.21-GCC-12.2.0: + - issue: https://github.com/EESSI/software-layer/issues/314 + - info: "Increased number of numerical errors in OpenBLAS test suite (344 vs max. 150 on x86_64/*)" + - PyTorch-2.1.2-foss-2023a: + - issue: https://github.com/EESSI/software-layer/issues/461 + - info: "8 failing tests (out of 209539) on aarch64/*" + - SciPy-bundle-2023.02-gfbf-2022b: + - issue: https://github.com/EESSI/software-layer/issues/318 + - info: "numpy built with -march=armv8.4-a instead of -mcpu=native (no SVE) + 2 failing tests (vs 50005 passed) in scipy test suite" + - SciPy-bundle-2023.07-gfbf-2023a: + - issue: https://github.com/EESSI/software-layer/issues/318 + - info: "2 failing tests (vs 54409 passed) in scipy test suite" + - SciPy-bundle-2023.11-gfbf-2023b: + - issue: https://github.com/EESSI/software-layer/issues/318 + - info: "2 failing tests (vs 54876 passed) in scipy test suite" diff --git a/eessi_container.sh b/eessi_container.sh index 48c4653ba9..7d00d1400c 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -30,8 +30,8 @@ # -. initial settings & exit codes TOPDIR=$(dirname $(realpath $0)) -source ${TOPDIR}/scripts/utils.sh -source ${TOPDIR}/scripts/cfg_files.sh +source "${TOPDIR}"/scripts/utils.sh +source "${TOPDIR}"/scripts/cfg_files.sh # exit codes: bitwise shift codes to allow for combination of exit codes # ANY_ERROR_EXITCODE is sourced from ${TOPDIR}/scripts/utils.sh @@ -46,6 +46,7 @@ SAVE_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 8)) HTTP_PROXY_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 9)) HTTPS_PROXY_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 10)) RUN_SCRIPT_MISSING_EXITCODE=$((${ANY_ERROR_EXITCODE} << 11)) +NVIDIA_MODE_UNKNOWN_EXITCODE=$((${ANY_ERROR_EXITCODE} << 12)) # CernVM-FS settings CVMFS_VAR_LIB="var-lib-cvmfs" @@ -72,14 +73,20 @@ display_help() { echo " -a | --access {ro,rw} - ro (read-only), rw (read & write) [default: ro]" echo " -c | --container IMG - image file or URL defining the container to use" echo " [default: docker://ghcr.io/eessi/build-node:debian11]" - echo " -h | --help - display this usage information [default: false]" + echo " -f | --fakeroot - run the container with --fakeroot [default: false]" echo " -g | --storage DIR - directory space on host machine (used for" echo " temporary data) [default: 1. TMPDIR, 2. /tmp]" + echo " -h | --help - display this usage information [default: false]" + echo " -i | --host-injections - directory to link to for host_injections " + echo " [default: /..storage../opt-eessi]" echo " -l | --list-repos - list available repository identifiers [default: false]" echo " -m | --mode MODE - with MODE==shell (launch interactive shell) or" echo " MODE==run (run a script or command) [default: shell]" + echo " -n | --nvidia MODE - configure the container to work with NVIDIA GPUs," + echo " MODE==install for a CUDA installation, MODE==run to" + echo " attach a GPU, MODE==all for both [default: false]" echo " -r | --repository CFG - configuration file or identifier defining the" - echo " repository to use [default: EESSI-pilot via" + echo " repository to use [default: EESSI via" echo " default container, see --container]" echo " -u | --resume DIR/TGZ - resume a previous run from a directory or tarball," echo " where DIR points to a previously used tmp directory" @@ -107,11 +114,13 @@ display_help() { ACCESS="ro" CONTAINER="docker://ghcr.io/eessi/build-node:debian11" #DRY_RUN=0 +FAKEROOT=0 VERBOSE=0 STORAGE= LIST_REPOS=0 MODE="shell" -REPOSITORY="EESSI-pilot" +SETUP_NVIDIA=0 +REPOSITORY="EESSI" RESUME= SAVE= HTTP_PROXY=${http_proxy:-} @@ -133,6 +142,10 @@ while [[ $# -gt 0 ]]; do # DRY_RUN=1 # shift 1 # ;; + -f|--fakeroot) + FAKEROOT=1 + shift 1 + ;; -g|--storage) STORAGE="$2" shift 2 @@ -141,6 +154,10 @@ while [[ $# -gt 0 ]]; do display_help exit 0 ;; + -i|--host-injections) + USER_HOST_INJECTIONS="$2" + shift 2 + ;; -l|--list-repos) LIST_REPOS=1 shift 1 @@ -149,6 +166,11 @@ while [[ $# -gt 0 ]]; do MODE="$2" shift 2 ;; + -n|--nvidia) + SETUP_NVIDIA=1 + NVIDIA_MODE="$2" + shift 2 + ;; -r|--repository) REPOSITORY="$2" shift 2 @@ -194,7 +216,7 @@ set -- "${POSITIONAL_ARGS[@]}" if [[ ${LIST_REPOS} -eq 1 ]]; then echo "Listing available repositories with format 'name [source]':" - echo " EESSI-pilot [default]" + echo " EESSI [default]" if [[ -r ${EESSI_REPOS_CFG_FILE} ]]; then cfg_load ${EESSI_REPOS_CFG_FILE} sections=$(cfg_sections) @@ -224,9 +246,16 @@ if [[ "${MODE}" != "shell" && "${MODE}" != "run" ]]; then fatal_error "unknown execution mode '${MODE}'" "${MODE_UNKNOWN_EXITCODE}" fi +# Also validate the NVIDIA GPU mode (if present) +if [[ ${SETUP_NVIDIA} -eq 1 ]]; then + if [[ "${NVIDIA_MODE}" != "run" && "${NVIDIA_MODE}" != "install" && "${NVIDIA_MODE}" != "all" ]]; then + fatal_error "unknown NVIDIA mode '${NVIDIA_MODE}'" "${NVIDIA_MODE_UNKNOWN_EXITCODE}" + fi +fi + # TODO (arg -r|--repository) check if repository is known # REPOSITORY_ERROR_EXITCODE -if [[ ! -z "${REPOSITORY}" && "${REPOSITORY}" != "EESSI-pilot" && ! -r ${EESSI_REPOS_CFG_FILE} ]]; then +if [[ ! -z "${REPOSITORY}" && "${REPOSITORY}" != "EESSI" && ! -r ${EESSI_REPOS_CFG_FILE} ]]; then fatal_error "arg '--repository ${REPOSITORY}' requires a cfg file at '${EESSI_REPOS_CFG_FILE}'" "${REPOSITORY_ERROR_EXITCODE}" fi @@ -310,12 +339,25 @@ fi # |-overlay-work # |-home # |-repos_cfg +# |-opt-eessi (unless otherwise specificed for host_injections) # tmp dir for EESSI EESSI_TMPDIR=${EESSI_HOST_STORAGE} mkdir -p ${EESSI_TMPDIR} [[ ${VERBOSE} -eq 1 ]] && echo "EESSI_TMPDIR=${EESSI_TMPDIR}" +# Set host_injections directory and ensure it is a writable directory (if user provided) +if [ -z ${USER_HOST_INJECTIONS+x} ]; then + # Not set, so use our default + HOST_INJECTIONS=${EESSI_TMPDIR}/opt-eessi + mkdir -p $HOST_INJECTIONS +else + # Make sure the host_injections directory specified exists and is a folder + mkdir -p ${USER_HOST_INJECTIONS} || fatal_error "host_injections directory ${USER_HOST_INJECTIONS} is either not a directory or cannot be created" + HOST_INJECTIONS=${USER_HOST_INJECTIONS} +fi +[[ ${VERBOSE} -eq 1 ]] && echo "HOST_INJECTIONS=${HOST_INJECTIONS}" + # configure Singularity: if SINGULARITY_CACHEDIR is already defined, use that # a global SINGULARITY_CACHEDIR would ensure that we don't consume # storage space again and again for the container & also speed-up @@ -394,16 +436,51 @@ fi [[ ${VERBOSE} -eq 1 ]] && echo "SINGULARITY_HOME=${SINGULARITY_HOME}" # define paths to add to SINGULARITY_BIND (added later when all BIND mounts are defined) -BIND_PATHS="${EESSI_CVMFS_VAR_LIB}:/var/lib/cvmfs,${EESSI_CVMFS_VAR_RUN}:/var/run/cvmfs" +BIND_PATHS="${EESSI_CVMFS_VAR_LIB}:/var/lib/cvmfs,${EESSI_CVMFS_VAR_RUN}:/var/run/cvmfs,${HOST_INJECTIONS}:/opt/eessi" # provide a '/tmp' inside the container BIND_PATHS="${BIND_PATHS},${EESSI_TMPDIR}:${TMP_IN_CONTAINER}" [[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}" +declare -a ADDITIONAL_CONTAINER_OPTIONS=() + +# Configure anything we need for NVIDIA GPUs and CUDA installation +if [[ ${SETUP_NVIDIA} -eq 1 ]]; then + if [[ "${NVIDIA_MODE}" == "run" || "${NVIDIA_MODE}" == "all" ]]; then + # Give singularity the appropriate flag + ADDITIONAL_CONTAINER_OPTIONS+=("--nv") + [[ ${VERBOSE} -eq 1 ]] && echo "ADDITIONAL_CONTAINER_OPTIONS=${ADDITIONAL_CONTAINER_OPTIONS[@]}" + fi + if [[ "${NVIDIA_MODE}" == "install" || "${NVIDIA_MODE}" == "all" ]]; then + # Add additional bind mounts to allow CUDA to install within a container + # (Experience tells us that these are necessary, but we don't know _why_ + # as the CUDA installer is a black box. The suspicion is that the CUDA + # installer gets confused by the permissions on these directories when + # inside a container) + EESSI_VAR_LOG=${EESSI_TMPDIR}/var-log + EESSI_USR_LOCAL_CUDA=${EESSI_TMPDIR}/usr-local-cuda + mkdir -p ${EESSI_VAR_LOG} + mkdir -p ${EESSI_USR_LOCAL_CUDA} + BIND_PATHS="${BIND_PATHS},${EESSI_VAR_LOG}:/var/log,${EESSI_USR_LOCAL_CUDA}:/usr/local/cuda" + [[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}" + if [[ "${NVIDIA_MODE}" == "install" ]] ; then + # We need to "trick" our LMOD_RC file to allow us to load CUDA modules even without a CUDA driver + # (this works because we build within a container and the LMOD_RC recognises that) + touch ${EESSI_TMPDIR}/libcuda.so + export SINGULARITY_CONTAINLIBS="${EESSI_TMPDIR}/libcuda.so" + fi + fi +fi + +# Configure the fakeroot setting for the container +if [[ ${FAKEROOT} -eq 1 ]]; then + ADDITIONAL_CONTAINER_OPTIONS+=("--fakeroot") +fi + # set up repository config (always create directory repos_cfg and populate it with info when # arg -r|--repository is used) mkdir -p ${EESSI_TMPDIR}/repos_cfg -if [[ "${REPOSITORY}" == "EESSI-pilot" ]]; then +if [[ "${REPOSITORY}" == "EESSI" ]]; then # need to source defaults as late as possible (see other sourcing below) source ${TOPDIR}/init/eessi_defaults @@ -427,7 +504,7 @@ else # map { local_filepath -> container_filepath } # # repo_name_domain is the domain part of the repo_name, e.g., - # eessi-hpc.org for pilot.eessi-hpc.org + # eessi.io for software.eessi.io # # where config bundle includes the files (-> target location in container) # - default.local -> /etc/cvmfs/default.local @@ -479,7 +556,7 @@ else target=${cfg_file_map[${src}]} BIND_PATHS="${BIND_PATHS},${EESSI_TMPDIR}/repos_cfg/${src}:${target}" done - export EESSI_PILOT_VERSION_OVERRIDE=${repo_version} + export EESSI_VERSION_OVERRIDE=${repo_version} export EESSI_CVMFS_REPO_OVERRIDE="/cvmfs/${repo_name}" # need to source defaults as late as possible (after *_OVERRIDEs) source ${TOPDIR}/init/eessi_defaults @@ -513,10 +590,14 @@ fi # 4. set up vars and dirs specific to a scenario declare -a EESSI_FUSE_MOUNTS=() + +# always mount cvmfs-config repo (to get access to software.eessi.io) +EESSI_FUSE_MOUNTS+=("--fusemount" "container:cvmfs2 cvmfs-config.cern.ch /cvmfs/cvmfs-config.cern.ch") + if [[ "${ACCESS}" == "ro" ]]; then - export EESSI_PILOT_READONLY="container:cvmfs2 ${repo_name} /cvmfs/${repo_name}" + export EESSI_READONLY="container:cvmfs2 ${repo_name} /cvmfs/${repo_name}" - EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_PILOT_READONLY}") + EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") export EESSI_FUSE_MOUNTS fi @@ -525,18 +606,18 @@ if [[ "${ACCESS}" == "rw" ]]; then mkdir -p ${EESSI_TMPDIR}/overlay-work # set environment variables for fuse mounts in Singularity container - export EESSI_PILOT_READONLY="container:cvmfs2 ${repo_name} /cvmfs_ro/${repo_name}" + export EESSI_READONLY="container:cvmfs2 ${repo_name} /cvmfs_ro/${repo_name}" - EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_PILOT_READONLY}") + EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") - EESSI_PILOT_WRITABLE_OVERLAY="container:fuse-overlayfs" - EESSI_PILOT_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}" - EESSI_PILOT_WRITABLE_OVERLAY+=" -o upperdir=${TMP_IN_CONTAINER}/overlay-upper" - EESSI_PILOT_WRITABLE_OVERLAY+=" -o workdir=${TMP_IN_CONTAINER}/overlay-work" - EESSI_PILOT_WRITABLE_OVERLAY+=" ${EESSI_CVMFS_REPO}" - export EESSI_PILOT_WRITABLE_OVERLAY + EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs" + EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}" + EESSI_WRITABLE_OVERLAY+=" -o upperdir=${TMP_IN_CONTAINER}/overlay-upper" + EESSI_WRITABLE_OVERLAY+=" -o workdir=${TMP_IN_CONTAINER}/overlay-work" + EESSI_WRITABLE_OVERLAY+=" ${EESSI_CVMFS_REPO}" + export EESSI_WRITABLE_OVERLAY - EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_PILOT_WRITABLE_OVERLAY}") + EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_WRITABLE_OVERLAY}") export EESSI_FUSE_MOUNTS fi @@ -558,8 +639,8 @@ if [ ! -z ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} ]; then fi echo "Launching container with command (next line):" -echo "singularity ${RUN_QUIET} ${MODE} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@" -singularity ${RUN_QUIET} ${MODE} "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@" +echo "singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_CONTAINER_OPTIONS[@]} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@" +singularity ${RUN_QUIET} ${MODE} "${ADDITIONAL_CONTAINER_OPTIONS[@]}" "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@" exit_code=$? # 6. save tmp if requested (arg -s|--save) diff --git a/init/Magic_Castle/bash b/init/Magic_Castle/bash index 85e4d54241..dbe1772d3a 100644 --- a/init/Magic_Castle/bash +++ b/init/Magic_Castle/bash @@ -6,7 +6,7 @@ EESSI_SILENT=1 source $(dirname "$BASH_SOURCE")/../eessi_environment_variables # Don't change the default prompt -# export PS1="[EESSI pilot $EESSI_PILOT_VERSION] $ " +# export PS1="[EESSI $EESSI_VERSION] $ " # Provide a clean MODULEPATH export MODULEPATH_ROOT=$EESSI_MODULEPATH @@ -36,4 +36,4 @@ else module reload fi -echo "Environment set up to use EESSI pilot software stack (${EESSI_PILOT_VERSION}), have fun!" +echo "Environment set up to use EESSI (${EESSI_VERSION}), have fun!" diff --git a/init/Magic_Castle/eessi_pilot_python3 b/init/Magic_Castle/eessi_python3 similarity index 73% rename from init/Magic_Castle/eessi_pilot_python3 rename to init/Magic_Castle/eessi_python3 index a762ac7b84..b2f7fd4d66 100755 --- a/init/Magic_Castle/eessi_pilot_python3 +++ b/init/Magic_Castle/eessi_python3 @@ -12,10 +12,10 @@ export EESSI_SILENT=1 # for MacOS due to the use of `readlink`) source $(dirname "$(readlink -f "$BASH_SOURCE")")/../eessi_environment_variables -eessi_pilot_python=$(ls ${EESSI_SOFTWARE_PATH}/software/Python/3*GCCcore*/bin/python | sed 1q) -if [ -f "$eessi_pilot_python" ]; then - $eessi_pilot_python "$@" +eessi_python=$(ls ${EESSI_SOFTWARE_PATH}/software/Python/3*GCCcore*/bin/python | sed 1q) +if [ -f "$eessi_python" ]; then + $eessi_python "$@" else - echo "ERROR: No EESSI pilot python 3 available." + echo "ERROR: No EESSI Python 3 available." false fi diff --git a/init/arch_specs/eessi_arch_arm_part.spec b/init/arch_specs/eessi_arch_arm_part.spec new file mode 100755 index 0000000000..e059f5206f --- /dev/null +++ b/init/arch_specs/eessi_arch_arm_part.spec @@ -0,0 +1,5 @@ +# ARM CPU architecture specifications +# Software path in EESSI | Vendor ID | CPU part (ARM uarch identification) +"aarch64/cortex_a72" "" "0xd08" # Raspberry Pi 4 +"aarch64/neoverse_n1" "" "0xd0c" # AWS Graviton2 +"aarch64/neoverse_v1" "" "0xd40" # AWS Graviton3 diff --git a/init/bash b/init/bash index ea605db0b5..0029f91454 100644 --- a/init/bash +++ b/init/bash @@ -1,10 +1,10 @@ -# Allow for a silent mode -if [[ -v EESSI_SILENT ]]; then - # EESSI_SILENT set - output=/dev/null -else - output=/dev/stdout -fi +function show_msg { + # only echo msg if EESSI_SILENT is unset + msg=$1 + if [[ ! -v EESSI_SILENT ]]; then + echo "$msg" + fi +} # The following method should be safe, but might break if file is a symlink # (could switch to $(dirname "$(readlink -f "$BASH_SOURCE")") in that case) @@ -13,27 +13,27 @@ source $(dirname "$BASH_SOURCE")/eessi_environment_variables # only continue if setting EESSI environment variables worked fine if [ $? -eq 0 ]; then - export PS1="[EESSI pilot $EESSI_PILOT_VERSION] $ " + export PS1="{EESSI $EESSI_VERSION} $PS1" # add location of commands provided by compat layer to $PATH; # see https://github.com/EESSI/software-layer/issues/52 export PATH=$EPREFIX/usr/bin:$EPREFIX/bin:$PATH # init Lmod - echo "Initializing Lmod..." >> $output + show_msg "Initializing Lmod..." source $EESSI_EPREFIX/usr/share/Lmod/init/bash # prepend location of modules for EESSI software stack to $MODULEPATH - echo "Prepending $EESSI_MODULEPATH to \$MODULEPATH..." >> $output + show_msg "Prepending $EESSI_MODULEPATH to \$MODULEPATH..." module use $EESSI_MODULEPATH - #echo >> $output - #echo "*** Known problems in the ${EESSI_PILOT_VERSION} pilot software stack ***" >> $output - #echo >> $output - #echo "1) ..." >> $output - #echo >> $output - #echo >> $output + #show_msg "" + #show_msg "*** Known problems in the ${EESSI_VERSION} software stack ***" + #show_msg "" + #show_msg "1) ..." + #show_msg "" + #show_msg "" - echo "Environment set up to use EESSI pilot software stack, have fun!" >> $output + echo "Environment set up to use EESSI (${EESSI_VERSION}), have fun!" fi diff --git a/init/eessi_archdetect.sh b/init/eessi_archdetect.sh index 58dd99eb6b..693334716f 100755 --- a/init/eessi_archdetect.sh +++ b/init/eessi_archdetect.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash VERSION="1.1.0" -# Logging -LOG_LEVEL="INFO" +# default log level: only emit warnings or errors +LOG_LEVEL="WARN" # Default result type is a best match CPUPATH_RESULT="best" @@ -69,8 +69,8 @@ check_allinfirst(){ cpupath(){ # If EESSI_SOFTWARE_SUBDIR_OVERRIDE is set, use it - log "DEBUG" "cpupath: Override variable set as '$EESI_SOFTWARE_SUBDIR_OVERRIDE' " - [ $EESI_SOFTWARE_SUBDIR_OVERRIDE ] && echo ${EESI_SOFTWARE_SUBDIR_OVERRIDE} && exit + log "DEBUG" "cpupath: Override variable set as '$EESSI_SOFTWARE_SUBDIR_OVERRIDE' " + [ $EESSI_SOFTWARE_SUBDIR_OVERRIDE ] && echo ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} && exit # Identify the best matching CPU architecture from a list of supported specifications for the host CPU # Return the path to the installation files in EESSI of the best matching architecture @@ -80,27 +80,38 @@ cpupath(){ local machine_type=${EESSI_MACHINE_TYPE:-$(uname -m)} log "DEBUG" "cpupath: Host CPU architecture identified as '$machine_type'" + # Identify the host CPU vendor + local cpu_vendor_tag="vendor[ _]id" + local cpu_vendor=$(get_cpuinfo "$cpu_vendor_tag") + log "DEBUG" "cpupath: CPU vendor of host system: '$cpu_vendor'" + + # Identify the host CPU part + local cpu_part_tag="cpu[ _]part" + local cpu_part=$(get_cpuinfo "$cpu_part_tag") + log "DEBUG" "cpupath: CPU part of host system: '$cpu_part'" + # Populate list of supported specs for this architecture case $machine_type in "x86_64") local spec_file="eessi_arch_x86.spec";; - "aarch64") local spec_file="eessi_arch_arm.spec";; + "aarch64") + # look for ARM core identification through cpu part when available, otherwise fall back to features + local spec_file="eessi_arch_arm.spec" + [ ! "${cpu_part}x" == "x" ] && local spec_file="eessi_arch_arm_part.spec" + ;; "ppc64le") local spec_file="eessi_arch_ppc.spec";; *) log "ERROR" "cpupath: Unsupported CPU architecture $machine_type" esac + # spec files are located in a subfolder with this script local base_dir=$(dirname $(realpath $0)) update_arch_specs "$base_dir/arch_specs/${spec_file}" - # Identify the host CPU vendor - local cpu_vendor_tag="vendor[ _]id" - local cpu_vendor=$(get_cpuinfo "$cpu_vendor_tag") - log "DEBUG" "cpupath: CPU vendor of host system: '$cpu_vendor'" - # Identify the host CPU flags or features local cpu_flag_tag='flags' # cpuinfo systems print different line identifiers, eg features, instead of flags [ "${cpu_vendor}" == "ARM" ] && cpu_flag_tag='flags' [ "${machine_type}" == "aarch64" ] && [ "${cpu_vendor}x" == "x" ] && cpu_flag_tag='features' + [ "${machine_type}" == "aarch64" ] && [ ! "${cpu_part}x" == "x" ] && cpu_flag_tag='cpu part' [ "${machine_type}" == "ppc64le" ] && cpu_flag_tag='cpu' local cpu_flags=$(get_cpuinfo "$cpu_flag_tag") diff --git a/init/eessi_defaults b/init/eessi_defaults index f482cbc269..d1779a36ae 100644 --- a/init/eessi_defaults +++ b/init/eessi_defaults @@ -8,5 +8,8 @@ # license: GPLv2 # -export EESSI_CVMFS_REPO="${EESSI_CVMFS_REPO_OVERRIDE:=/cvmfs/pilot.eessi-hpc.org}" -export EESSI_PILOT_VERSION="${EESSI_PILOT_VERSION_OVERRIDE:=2021.12}" +export EESSI_CVMFS_REPO="${EESSI_CVMFS_REPO_OVERRIDE:=/cvmfs/software.eessi.io}" +export EESSI_VERSION="${EESSI_VERSION_OVERRIDE:=2023.06}" +# use archdetect by default, unless otherwise specified +export EESSI_USE_ARCHDETECT="${EESSI_USE_ARCHDETECT:=1}" +export EESSI_USE_ARCHSPEC="${EESSI_USE_ARCHSPEC:=0}" diff --git a/init/eessi_environment_variables b/init/eessi_environment_variables index 10ac1926f4..5450b2bfb4 100644 --- a/init/eessi_environment_variables +++ b/init/eessi_environment_variables @@ -2,24 +2,24 @@ # $BASH_SOURCE points to correct path, see also http://mywiki.wooledge.org/BashFAQ/028 EESSI_INIT_DIR_PATH=$(dirname $(realpath $BASH_SOURCE)) -# Allow for a silent mode -if [[ -v EESSI_SILENT ]]; then - # EESSI_SILENT set - output=/dev/null -else - output=/dev/stdout -fi - function error() { echo -e "\e[31mERROR: $1\e[0m" >&2 false } -# set up minimal environment: $EESSI_PREFIX, $EESSI_PILOT_VERSION, $EESSI_OS_TYPE, $EESSI_CPU_FAMILY, $EPREFIX +function show_msg { + # only echo msg if EESSI_SILENT is unset + msg=$1 + if [[ ! -v EESSI_SILENT ]]; then + echo "$msg" + fi +} + +# set up minimal environment: $EESSI_PREFIX, $EESSI_VERSION, $EESSI_OS_TYPE, $EESSI_CPU_FAMILY, $EPREFIX source $EESSI_INIT_DIR_PATH/minimal_eessi_env if [ -d $EESSI_PREFIX ]; then - echo "Found EESSI pilot repo @ $EESSI_PREFIX!" >> $output + show_msg "Found EESSI repo @ $EESSI_PREFIX!" export EESSI_EPREFIX=$EPREFIX if [ -d $EESSI_EPREFIX ]; then @@ -27,20 +27,49 @@ if [ -d $EESSI_PREFIX ]; then # determine subdirectory in software layer if [ "$EESSI_USE_ARCHDETECT" == "1" ]; then # if archdetect is enabled, use internal code - export EESSI_SOFTWARE_SUBDIR=$(${EESSI_INIT_DIR_PATH}/eessi_archdetect.sh cpupath) - echo "archdetect says ${EESSI_SOFTWARE_SUBDIR}" >> $output - else + all_cpupaths=$(${EESSI_INIT_DIR_PATH}/eessi_archdetect.sh -a cpupath) + # iterate over colon-separated list verifying if the architecture is present + # under $EESSI_PREFIX/software/$EESSI_OS_TYPE; if so use the architecture as best match + IFS=: read -r -a archs <<< "${all_cpupaths}" + for arch in "${archs[@]}"; do + if [ -d ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${arch} ]; then + export EESSI_SOFTWARE_SUBDIR=${arch} + show_msg "archdetect says ${EESSI_SOFTWARE_SUBDIR}" + break + fi + done + elif [ "$EESSI_USE_ARCHSPEC" == "1" ]; then # note: eessi_software_subdir_for_host.py will pick up value from $EESSI_SOFTWARE_SUBDIR_OVERRIDE if it's defined! export EESSI_EPREFIX_PYTHON=$EESSI_EPREFIX/usr/bin/python3 export EESSI_SOFTWARE_SUBDIR=$($EESSI_EPREFIX_PYTHON ${EESSI_INIT_DIR_PATH}/eessi_software_subdir_for_host.py $EESSI_PREFIX) - echo "archspec says ${EESSI_SOFTWARE_SUBDIR}" >> $output + show_msg "archspec says ${EESSI_SOFTWARE_SUBDIR}" + else + error "Don't know how to detect host CPU, giving up!" fi if [ ! -z $EESSI_SOFTWARE_SUBDIR ]; then - echo "Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory." >> $output + show_msg "Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory." export EESSI_SOFTWARE_PATH=$EESSI_PREFIX/software/$EESSI_OS_TYPE/$EESSI_SOFTWARE_SUBDIR + + # Configure our LMOD + export LMOD_CONFIG_DIR="$EESSI_SOFTWARE_PATH/.lmod" + lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua" + if [ -f $lmod_rc_file ]; then + show_msg "Found Lmod configuration file at $lmod_rc_file" + else + error "Lmod configuration file not found at $lmod_rc_file" + fi + + export LMOD_PACKAGE_PATH="$EESSI_SOFTWARE_PATH/.lmod" + lmod_sitepackage_file="$LMOD_PACKAGE_PATH/SitePackage.lua" + if [ -f $lmod_sitepackage_file ]; then + show_msg "Found Lmod SitePackage.lua file at $lmod_sitepackage_file" + else + error "Lmod SitePackage.lua file not found at $lmod_sitepackage_file" + fi + if [ ! -z $EESSI_BASIC_ENV ]; then - echo "Only setting up basic environment, so we're done" >> $output + show_msg "Only setting up basic environment, so we're done" elif [ -d $EESSI_SOFTWARE_PATH ]; then # Allow for the use of a custom MNS if [ -z ${EESSI_CUSTOM_MODULEPATH+x} ]; then @@ -53,24 +82,18 @@ if [ -d $EESSI_PREFIX ]; then fi EESSI_MODULEPATH=$EESSI_SOFTWARE_PATH/$EESSI_MODULE_SUBDIR else - echo "Using defined environment variable \$EESSI_CUSTOM_MODULEPATH to set EESSI_MODULEPATH." >> $output + show_msg "Using defined environment variable \$EESSI_CUSTOM_MODULEPATH to set EESSI_MODULEPATH." EESSI_MODULEPATH=$EESSI_CUSTOM_MODULEPATH fi if [ -d $EESSI_MODULEPATH ]; then export EESSI_MODULEPATH=$EESSI_MODULEPATH - echo "Using ${EESSI_MODULEPATH} as the directory to be added to MODULEPATH." >> $output + show_msg "Using ${EESSI_MODULEPATH} as the directory to be added to MODULEPATH." else error "EESSI module path at $EESSI_MODULEPATH not found!" false fi - export LMOD_RC="$EESSI_SOFTWARE_PATH/.lmod/lmodrc.lua" - if [ -f $LMOD_RC ]; then - echo "Found Lmod configuration file at $LMOD_RC" >> $output - else - error "Lmod configuration file not found at $LMOD_RC" - fi else error "EESSI software layer at $EESSI_SOFTWARE_PATH not found!" @@ -82,5 +105,5 @@ if [ -d $EESSI_PREFIX ]; then error "Compatibility layer directory $EESSI_EPREFIX not found!" fi else - error "EESSI pilot repository at $EESSI_PREFIX not found!" + error "EESSI repository at $EESSI_PREFIX not found!" fi diff --git a/init/minimal_eessi_env b/init/minimal_eessi_env index b7cb7c5e9e..5b3562068d 100644 --- a/init/minimal_eessi_env +++ b/init/minimal_eessi_env @@ -4,11 +4,11 @@ # $BASH_SOURCE points to correct path, see also http://mywiki.wooledge.org/BashFAQ/028 EESSI_INIT_DIR_PATH=$(dirname $(realpath $BASH_SOURCE)) -# set up defaults: EESSI_CVMFS_REPO, EESSI_PILOT_VERSION +# set up defaults: EESSI_CVMFS_REPO, EESSI_VERSION # script takes *_OVERRIDEs into account source ${EESSI_INIT_DIR_PATH}/eessi_defaults -export EESSI_PREFIX=$EESSI_CVMFS_REPO/versions/$EESSI_PILOT_VERSION +export EESSI_PREFIX=$EESSI_CVMFS_REPO/versions/$EESSI_VERSION if [[ $(uname -s) == 'Linux' ]]; then export EESSI_OS_TYPE='linux' diff --git a/install_apptainer_ubuntu.sh b/install_apptainer_ubuntu.sh index c35c34cda6..5c4f37ac2d 100755 --- a/install_apptainer_ubuntu.sh +++ b/install_apptainer_ubuntu.sh @@ -5,8 +5,9 @@ set -e # see https://github.com/apptainer/singularity/issues/5390#issuecomment-899111181 sudo apt-get install alien alien --version -apptainer_rpm=$(curl --silent -L https://dl.fedoraproject.org/pub/epel/8/Everything/x86_64/Packages/a/ | grep 'apptainer-[0-9]' | sed 's/.*\(apptainer[0-9._a-z-]*.rpm\).*/\1/g') -curl -OL https://dl.fedoraproject.org/pub/epel/8/Everything/x86_64/Packages/a/${apptainer_rpm} +epel_subdir="pub/epel/8" +apptainer_rpm=$(curl --silent -L https://dl.fedoraproject.org/${epel_subdir}/Everything/x86_64/Packages/a/ | grep 'apptainer-[0-9]' | sed 's/.*\(apptainer[0-9._a-z-]*.rpm\).*/\1/g') +curl -OL https://dl.fedoraproject.org/${epel_subdir}/Everything/x86_64/Packages/a/${apptainer_rpm} sudo alien -d ${apptainer_rpm} sudo apt install ./apptainer*.deb apptainer --version diff --git a/install_scripts.sh b/install_scripts.sh new file mode 100755 index 0000000000..508735975c --- /dev/null +++ b/install_scripts.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# +# Script to install scripts from the software-layer repo into the EESSI software stack + +display_help() { + echo "usage: $0 [OPTIONS]" + echo " -p | --prefix - prefix to copy the scripts to" + echo " -h | --help - display this usage information" +} + +compare_and_copy() { + if [ "$#" -ne 2 ]; then + echo "Usage of function: compare_and_copy " + return 1 + fi + + source_file="$1" + destination_file="$2" + + if [ ! -f "$destination_file" ] || ! diff -q "$source_file" "$destination_file" ; then + cp "$source_file" "$destination_file" + echo "File $1 copied to $2" + else + echo "Files $1 and $2 are identical. No copy needed." + fi +} + +copy_files_by_list() { +# Compares and copies listed files from a source to a target directory + if [ ! "$#" -ge 3 ]; then + echo "Usage of function: copy_files_by_list " + echo "Here, file_list is an (expanded) bash array" + echo "Example:" + echo "my_files=(file1 file2)" + echo 'copy_files_by_list /my/source /my/target "${my_files[@]}"' + return 1 + fi + source_dir="$1" + target_dir="$2" + # Need to shift all arguments to the left twice. Then, rebuild the array with the rest of the arguments + shift + shift + file_list=("$@") + + # Create target dir + mkdir -p ${target_dir} + + # Copy from source to target + echo "Copying files: ${file_list[@]}" + echo "From directory: ${source_dir}" + echo "To directory: ${target_dir}" + + for file in ${file_list[@]}; do + compare_and_copy ${source_dir}/${file} ${target_dir}/${file} + done +} + +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case $1 in + -p|--prefix) + INSTALL_PREFIX="$2" + shift 2 + ;; + -h|--help) + display_help # Call your function + # no shifting needed here, we're done. + exit 0 + ;; + -*|--*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +TOPDIR=$(dirname $(realpath $0)) + +# Copy for init directory +init_files=( + bash eessi_archdetect.sh eessi_defaults eessi_environment_variables eessi_software_subdir_for_host.py + minimal_eessi_env README.md test.py +) +copy_files_by_list ${TOPDIR}/init ${INSTALL_PREFIX}/init "${init_files[@]}" + +# Copy for the init/arch_specs directory +arch_specs_files=( + eessi_arch_arm.spec eessi_arch_ppc.spec eessi_arch_x86.spec +) +copy_files_by_list ${TOPDIR}/init/arch_specs ${INSTALL_PREFIX}/init/arch_specs "${arch_specs_files[@]}" + +# Copy for init/Magic_castle directory +mc_files=( + bash eessi_python3 +) +copy_files_by_list ${TOPDIR}/init/Magic_Castle ${INSTALL_PREFIX}/init/Magic_Castle "${mc_files[@]}" + +# Copy for the scripts directory +script_files=( + utils.sh +) +copy_files_by_list ${TOPDIR}/scripts ${INSTALL_PREFIX}/scripts "${script_files[@]}" + +# Copy files for the scripts/gpu_support/nvidia directory +nvidia_files=( + install_cuda_host_injections.sh link_nvidia_host_libraries.sh +) +copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}" diff --git a/install_software_layer.sh b/install_software_layer.sh index bf3006a4a0..82ca70b73f 100755 --- a/install_software_layer.sh +++ b/install_software_layer.sh @@ -1,4 +1,4 @@ #!/bin/bash base_dir=$(dirname $(realpath $0)) source ${base_dir}/init/eessi_defaults -./run_in_compat_layer_env.sh ./EESSI-pilot-install-software.sh "$@" +./run_in_compat_layer_env.sh ./EESSI-install-software.sh "$@" diff --git a/licenses/README.md b/licenses/README.md new file mode 100644 index 0000000000..36a7615b21 --- /dev/null +++ b/licenses/README.md @@ -0,0 +1,3 @@ +see https://spdx.org/licenses + +Python function to download SPDX list of licenses is available in `spdx.py` diff --git a/licenses/licenses.json b/licenses/licenses.json new file mode 100644 index 0000000000..8831ed368c --- /dev/null +++ b/licenses/licenses.json @@ -0,0 +1,10 @@ +{ + "EasyBuild": { + "spdx": "GPL-2.0-only", + "license_url": "https://easybuild.io" + }, + "GCCcore": { + "spdx": "GPL-2.0-with-GCC-exception", + "license_url": "https://github.com/gcc-mirror/gcc/blob/master/COPYING" + } +} diff --git a/licenses/spdx.py b/licenses/spdx.py new file mode 100644 index 0000000000..06c3edb4e6 --- /dev/null +++ b/licenses/spdx.py @@ -0,0 +1,100 @@ +import json +import logging +import sys +import urllib.request + +SPDX_LICENSE_LIST_URL = 'https://raw.githubusercontent.com/spdx/license-list-data/main/json/licenses.json' + +LICENSE_URL = 'license_url' +SPDX = 'spdx' + +spdx_license_list = None + +# Configure the logging module +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") + + +def get_spdx_license_list(): + """ + Download JSON file with current list of SPDX licenses, parse it, and return it as a Python dictionary. + """ + global spdx_license_list + + if spdx_license_list is None: + with urllib.request.urlopen(SPDX_LICENSE_LIST_URL) as fp: + spdx_license_list = json.load(fp) + version, release_date = spdx_license_list['licenseListVersion'], spdx_license_list['releaseDate'] + logging.info(f"Downloaded version {version} of SPDX license list (release date: {release_date})") + licenses = spdx_license_list['licenses'] + logging.info(f"Found info on {len(licenses)} licenses!") + + return spdx_license_list + + +def license_info(spdx_id): + """Find license with specified SPDX identifier.""" + + spdx_license_list = get_spdx_license_list() + + licenses = spdx_license_list['licenses'] + for lic in licenses: + if lic['licenseId'] == spdx_id: + return lic + + # if no match is found, return None as result + return None + + +def read_licenses(path): + """ + Read software project to license mapping from specified path + """ + with open(path) as fp: + licenses = json.loads(fp.read()) + + return licenses + + +def check_licenses(licenses): + """ + Check mapping of software licenses: make sure SPDX identifiers are valid. + """ + faulty_licenses = {} + + for software_name in licenses: + spdx_lic_id = licenses[software_name][SPDX] + lic_info = license_info(spdx_lic_id) + if lic_info: + lic_url = licenses[software_name][LICENSE_URL] + logging.info(f"License for software '{software_name}': {lic_info['name']} (see {lic_url})") + else: + logging.warning(f"Found faulty SPDX license ID for {software_name}: {spdx_lic_id}") + faulty_licenses[software_name] = spdx_lic_id + + if faulty_licenses: + logging.warning(f"Found {len(faulty_licenses)} faulty SPDIX license IDs (out of {len(licenses)})!") + result = False + else: + logging.info(f"License check passed for {len(licenses)} licenses!") + result = True + + return result + + +def main(args): + if len(args) == 1: + licenses_path = args[0] + else: + logging.error("Usage: python spdx.py ") + sys.exit(1) + + licenses = read_licenses(licenses_path) + if check_licenses(licenses): + logging.info("All license checks PASSED!") + else: + logging.error("One or more licence checks failed!") + sys.exit(2) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/load_easybuild_module.sh b/load_easybuild_module.sh index 4ff2a3c37c..d1bfd18bb5 100755 --- a/load_easybuild_module.sh +++ b/load_easybuild_module.sh @@ -23,14 +23,14 @@ fi EB_VERSION=${1} # make sure that environment variables that we expect to be set are indeed set -if [ -z "${TMPDIR}" ]; then +if [ -z "${TMPDIR}" ]; then echo "\$TMPDIR is not set" >&2 exit 2 fi # ${EB} is used to specify which 'eb' command should be used; # can potentially be more than just 'eb', for example when using 'eb --optarch=GENERIC' -if [ -z "${EB}" ]; then +if [ -z "${EB}" ]; then echo "\$EB is not set" >&2 exit 2 fi @@ -71,21 +71,28 @@ else check_exit_code $? "${ok_msg}" "${fail_msg}" # maybe the module obtained with --install-latest-eb-release is exactly the EasyBuild version we wanted? + IGNORE_CACHE='' module avail 2>&1 | grep -i easybuild/${EB_VERSION} &> ${ml_av_easybuild_out} if [[ $? -eq 0 ]]; then echo_green ">> Module for EasyBuild v${EB_VERSION} found!" else - eb_ec=EasyBuild-${EB_VERSION}.eb - echo_yellow ">> Still no module for EasyBuild v${EB_VERSION}, trying with easyconfig ${eb_ec}..." - ${EB} --search ${eb_ec} | grep ${eb_ec} > /dev/null + module --ignore_cache avail 2>&1 | grep -i easybuild/${EB_VERSION} &> ${ml_av_easybuild_out} if [[ $? -eq 0 ]]; then - echo "Easyconfig ${eb_ec} found for EasyBuild v${EB_VERSION}, so installing it..." - ok_msg="EasyBuild v${EB_VERSION} installed, alright!" - fail_msg="Installing EasyBuild v${EB_VERSION}, yikes! (output: ${eb_install_out})" - ${EB} EasyBuild-${EB_VERSION}.eb 2>&1 | tee -a ${eb_install_out} - check_exit_code $? "${ok_msg}" "${fail_msg}" + echo_green ">> Module for EasyBuild v${EB_VERSION} found!" + IGNORE_CACHE='--ignore_cache' else - fatal_error "No easyconfig found for EasyBuild v${EB_VERSION}" + eb_ec=EasyBuild-${EB_VERSION}.eb + echo_yellow ">> Still no module for EasyBuild v${EB_VERSION}, trying with easyconfig ${eb_ec}..." + ${EB} --search ${eb_ec} | grep ${eb_ec} > /dev/null + if [[ $? -eq 0 ]]; then + echo "Easyconfig ${eb_ec} found for EasyBuild v${EB_VERSION}, so installing it..." + ok_msg="EasyBuild v${EB_VERSION} installed, alright!" + fail_msg="Installing EasyBuild v${EB_VERSION}, yikes! (output: ${eb_install_out})" + ${EB} EasyBuild-${EB_VERSION}.eb 2>&1 | tee -a ${eb_install_out} + check_exit_code $? "${ok_msg}" "${fail_msg}" + else + fatal_error "No easyconfig found for EasyBuild v${EB_VERSION}" + fi fi fi @@ -103,7 +110,7 @@ else fi echo ">> Loading EasyBuild v${EB_VERSION} module..." -module load EasyBuild/${EB_VERSION} +module ${IGNORE_CACHE} load EasyBuild/${EB_VERSION} eb_show_system_info_out=${TMPDIR}/eb_show_system_info.out ${EB} --show-system-info > ${eb_show_system_info_out} if [[ $? -eq 0 ]]; then diff --git a/reframe_config_bot.py.tmpl b/reframe_config_bot.py.tmpl new file mode 100644 index 0000000000..0cc3e9f530 --- /dev/null +++ b/reframe_config_bot.py.tmpl @@ -0,0 +1,59 @@ +# WARNING: this file is intended as template and the __X__ template variables need to be replaced +# before it can act as a configuration file +# Once replaced, this is a config file for running tests after the build phase, by the bot + +from eessi.testsuite.common_config import common_logging_config +from eessi.testsuite.constants import * # noqa: F403 + + +site_configuration = { + 'systems': [ + { + 'name': 'BotBuildTests', + 'descr': 'Software-layer bot', + 'hostnames': ['.*'], + 'modules_system': 'lmod', + 'partitions': [ + { + 'name': 'default', + 'scheduler': 'local', + 'launcher': 'mpirun', + 'environs': ['default'], + 'features': [ + FEATURES[CPU] + ] + list(SCALES.keys()), + 'processor': { + 'num_cpus': __NUM_CPUS__, + 'num_sockets': __NUM_SOCKETS__, + 'num_cpus_per_core': __NUM_CPUS_PER_CORE__, + 'num_cpus_per_socket': __NUM_CPUS_PER_SOCKET__, + }, + 'resources': [ + { + 'name': 'memory', + 'options': ['--mem={size}'], + } + ], + 'max_jobs': 1 + } + ] + } + ], + 'environments': [ + { + 'name': 'default', + 'cc': 'cc', + 'cxx': '', + 'ftn': '' + } + ], + 'general': [ + { + 'purge_environment': True, + 'resolve_module_conflicts': False, # avoid loading the module before submitting the job + # disable automatic detection of CPU architecture (since we're using local scheduler) + 'remote_detect': False, + } + ], + 'logging': common_logging_config(), +} diff --git a/run_in_compat_layer_env.sh b/run_in_compat_layer_env.sh index c70077bf15..f57c4d0749 100755 --- a/run_in_compat_layer_env.sh +++ b/run_in_compat_layer_env.sh @@ -3,25 +3,28 @@ base_dir=$(dirname $(realpath $0)) source ${base_dir}/init/eessi_defaults -if [ -z $EESSI_PILOT_VERSION ]; then - echo "ERROR: \$EESSI_PILOT_VERSION must be set!" >&2 +if [ -z $EESSI_VERSION ]; then + echo "ERROR: \$EESSI_VERSION must be set!" >&2 exit 1 fi -EESSI_COMPAT_LAYER_DIR="${EESSI_CVMFS_REPO}/versions/${EESSI_PILOT_VERSION}/compat/linux/$(uname -m)" +EESSI_COMPAT_LAYER_DIR="${EESSI_CVMFS_REPO}/versions/${EESSI_VERSION}/compat/linux/$(uname -m)" if [ ! -d ${EESSI_COMPAT_LAYER_DIR} ]; then echo "ERROR: ${EESSI_COMPAT_LAYER_DIR} does not exist!" >&2 exit 1 fi INPUT=$(echo "$@") +if [ ! -z ${SLURM_JOB_ID} ]; then + INPUT="export SLURM_JOB_ID=${SLURM_JOB_ID}; ${INPUT}" +fi if [ ! -z ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} ]; then INPUT="export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE}; ${INPUT}" fi if [ ! -z ${EESSI_CVMFS_REPO_OVERRIDE} ]; then INPUT="export EESSI_CVMFS_REPO_OVERRIDE=${EESSI_CVMFS_REPO_OVERRIDE}; ${INPUT}" fi -if [ ! -z ${EESSI_PILOT_VERSION_OVERRIDE} ]; then - INPUT="export EESSI_PILOT_VERSION_OVERRIDE=${EESSI_PILOT_VERSION_OVERRIDE}; ${INPUT}" +if [ ! -z ${EESSI_VERSION_OVERRIDE} ]; then + INPUT="export EESSI_VERSION_OVERRIDE=${EESSI_VERSION_OVERRIDE}; ${INPUT}" fi if [ ! -z ${http_proxy} ]; then INPUT="export http_proxy=${http_proxy}; ${INPUT}" @@ -30,5 +33,5 @@ if [ ! -z ${https_proxy} ]; then INPUT="export https_proxy=${https_proxy}; ${INPUT}" fi -echo "Running '${INPUT}' in EESSI (${EESSI_CVMFS_REPO}) ${EESSI_PILOT_VERSION} compatibility layer environment..." +echo "Running '${INPUT}' in EESSI (${EESSI_CVMFS_REPO}) ${EESSI_VERSION} compatibility layer environment..." ${EESSI_COMPAT_LAYER_DIR}/startprefix <<< "${INPUT}" diff --git a/run_tests.sh b/run_tests.sh new file mode 100755 index 0000000000..1dbb47db9d --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# This script gets invoked by the bot/test.sh script to run within the EESSI container +# Thus, this script defines all of the steps that should run for the tests. +# Note that, unless we have good reason, we don't run test steps in the prefix environment: +# users also typically don't run in the prefix environment, and we want to check if the +# software works well in that specific setup. +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Caspar van Leeuwen (@casparvl) +# +# license: GPLv2 +# + +base_dir=$(dirname $(realpath $0)) +source ${base_dir}/init/eessi_defaults + +# Git clone has to be run in compat layer, to make the git command available +./run_in_compat_layer_env.sh "git clone https://github.com/EESSI/test-suite EESSI-test-suite" + +# Run the test suite +./test_suite.sh "$@" diff --git a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh new file mode 100755 index 0000000000..a9310d817a --- /dev/null +++ b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh @@ -0,0 +1,211 @@ +#!/usr/bin/env bash + +# This script can be used to install CUDA under the `.../host_injections` directory. +# This provides the parts of the CUDA installation that cannot be redistributed as +# part of EESSI due to license limitations. While GPU-based software from EESSI will +# _run_ without these, installation of additional CUDA software requires the CUDA +# installation(s) under `host_injections` to be present. +# +# The `host_injections` directory is a variant symlink that by default points to +# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see +# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the +# installation to be successful, this directory needs to be writeable by the user +# executing this script. + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../../utils.sh + +# Function to display help message +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --help Display this help message" + echo " --accept-cuda-eula You _must_ accept the CUDA EULA to install" + echo " CUDA, see the EULA at" + echo " https://docs.nvidia.com/cuda/eula/index.html" + echo " -c, --cuda-version CUDA_VERSION Specify a version o CUDA to install (must" + echo " have a corresponding easyconfig in the" + echo " EasyBuild release)" + echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" + echo " storage during the CUDA install" + echo " (must have >10GB available)" +} + +# Initialize variables +install_cuda_version="" +eula_accepted=0 + +# Parse command-line options +while [[ $# -gt 0 ]]; do + case "$1" in + --help) + show_help + exit 0 + ;; + -c|--cuda-version) + if [ -n "$2" ]; then + install_cuda_version="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + --accept-cuda-eula) + eula_accepted=1 + shift 1 + ;; + -t|--temp-dir) + if [ -n "$2" ]; then + CUDA_TEMP_DIR="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + *) + show_help + fatal_error "Error: Unknown option: $1" + ;; + esac +done + +# Make sure EESSI is initialised +check_eessi_initialised + +# Make sure the CUDA version supplied is a semantic version +is_semantic_version() { + local version=$1 + local regex='^[0-9]+\.[0-9]+\.[0-9]+$' + + if [[ $version =~ $regex ]]; then + return 0 # Return success (0) if it's a semantic version + else + return 1 # Return failure (1) if it's not a semantic version + fi +} +if ! is_semantic_version "$install_cuda_version"; then + show_help + error="\nYou must provide a semantic version for CUDA (e.g., 12.1.1) via the appropriate\n" + error="${error}command line option. This script is intended for use with EESSI so the 'correct'\n" + error="${error}version to provide is probably one of those available under\n" + error="${error}$EESSI_SOFTWARE_PATH/software/CUDA\n" + fatal_error "${error}" +fi + +# Make sure they have accepted the CUDA EULA +if [ "$eula_accepted" -ne 1 ]; then + show_help + error="\nYou _must_ accept the CUDA EULA via the appropriate command line option.\n" + fatal_error "${error}" +fi + +# As an installation location just use $EESSI_SOFTWARE_PATH but replacing `versions` with `host_injections` +# (CUDA is a binary installation so no need to worry too much about the EasyBuild setup) +cuda_install_parent=${EESSI_SOFTWARE_PATH/versions/host_injections} + +# Only install CUDA if specified version is not found. +# (existence of easybuild subdir implies a successful install) +if [ -d "${cuda_install_parent}"/software/CUDA/"${install_cuda_version}"/easybuild ]; then + echo_green "CUDA software found! No need to install CUDA again." +else + # We need to be able write to the installation space so let's make sure we can + if ! create_directory_structure "${cuda_install_parent}"/software/CUDA ; then + fatal_error "No write permissions to directory ${cuda_install_parent}/software/CUDA" + fi + + # we need a directory we can use for temporary storage + if [[ -z "${CUDA_TEMP_DIR}" ]]; then + tmpdir=$(mktemp -d) + else + tmpdir="${CUDA_TEMP_DIR}"/temp + if ! mkdir "$tmpdir" ; then + fatal_error "Could not create directory ${tmpdir}" + fi + fi + + required_space_in_tmpdir=50000 + # Let's see if we have sources and build locations defined if not, we use the temporary space + if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then + export EASYBUILD_BUILDPATH=${tmpdir}/build + required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000)) + fi + if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then + export EASYBUILD_SOURCEPATH=${tmpdir}/sources + required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000)) + fi + + # The install is pretty fat, you need lots of space for download/unpack/install (~3*5GB), + # need to do a space check before we proceed + avail_space=$(df --output=avail "${cuda_install_parent}"/ | tail -n 1 | awk '{print $1}') + if (( avail_space < 5000000 )); then + fatal_error "Need at least 5GB disk space to install CUDA under ${cuda_install_parent}, exiting now..." + fi + avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}') + if (( avail_space < required_space_in_tmpdir )); then + error="Need at least ${required_space_in_tmpdir} disk space under ${tmpdir}.\n" + error="${error}Set the environment variable CUDA_TEMP_DIR to a location with adequate space to pass this check." + error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH " + error="${error}to reduce this requirement. Exiting now..." + fatal_error "${error}" + fi + + if ! command -v "eb" &>/dev/null; then + echo_yellow "Attempting to load an EasyBuild module to do actual install" + module load EasyBuild + # There are some scenarios where this may fail + if [ $? -ne 0 ]; then + error="'eb' command not found in your environment and\n" + error="${error} module load EasyBuild\n" + error="${error}failed for some reason.\n" + error="${error}Please re-run this script with the 'eb' command available." + fatal_error "${error}" + fi + fi + + cuda_easyconfig="CUDA-${install_cuda_version}.eb" + + # Check the easyconfig file is available in the release + # (eb search always returns 0, so we need a grep to ensure a usable exit code) + eb --search ^${cuda_easyconfig}|grep CUDA > /dev/null 2>&1 + # Check the exit code + if [ $? -ne 0 ]; then + eb_version=$(eb --version) + available_cuda_easyconfigs=$(eb --search ^CUDA-*.eb|grep CUDA) + + error="The easyconfig ${cuda_easyconfig} was not found in EasyBuild version:\n" + error="${error} ${eb_version}\n" + error="${error}You either need to give a different version of CUDA to install _or_ \n" + error="${error}use a different version of EasyBuild for the installation.\n" + error="${error}\nThe versions of available with the current eb command are:\n" + error="${error}${available_cuda_easyconfigs}" + fatal_error "${error}" + fi + + # We need the --rebuild option, as the CUDA module may or may not be on the + # `MODULEPATH` yet. Even if it is, we still want to redo this installation + # since it will provide the symlinked targets for the parts of the CUDA + # installation in the `.../versions/...` prefix + # We install the module in our `tmpdir` since we do not need the modulefile, + # we only care about providing the targets for the symlinks. + extra_args="--rebuild --installpath-modules=${tmpdir}" + + # We don't want hooks used in this install, we need a vanilla CUDA installation + touch "$tmpdir"/none.py + # shellcheck disable=SC2086 # Intended splitting of extra_args + eb --prefix="$tmpdir" ${extra_args} --accept-eula-for=CUDA --hooks="$tmpdir"/none.py --installpath="${cuda_install_parent}"/ "${cuda_easyconfig}" + ret=$? + if [ $ret -ne 0 ]; then + eb_last_log=$(unset EB_VERBOSE; eb --last-log) + cp -a ${eb_last_log} . + fatal_error "CUDA installation failed, please check EasyBuild logs $(basename ${eb_last_log})..." + else + echo_green "CUDA installation at ${cuda_install_parent}/software/CUDA/${install_cuda_version} succeeded!" + fi + # clean up tmpdir + rm -rf "${tmpdir}" +fi diff --git a/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh new file mode 100755 index 0000000000..e8d7f0d0a7 --- /dev/null +++ b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -0,0 +1,144 @@ +#!/bin/bash + +# This script links host libraries related to GPU drivers to a location where +# they can be found by the EESSI linker + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../../utils.sh + +# We rely on ldconfig to give us the location of the libraries on the host +command_name="ldconfig" +# We cannot use a version of ldconfig that's being shipped under CVMFS +exclude_prefix="/cvmfs" + +found_paths=() +# Always attempt to use /sbin/ldconfig +if [ -x "/sbin/$command_name" ]; then + found_paths+=("/sbin/$command_name") +fi +IFS=':' read -ra path_dirs <<< "$PATH" +for dir in "${path_dirs[@]}"; do + if [ "$dir" = "/sbin" ]; then + continue # we've already checked for $command_name in /sbin, don't need to do it twice + fi + if [[ ! "$dir" =~ ^$exclude_prefix ]]; then + if [ -x "$dir/$command_name" ]; then + found_paths+=("$dir/$command_name") + fi + fi +done + +if [ ${#found_paths[@]} -gt 0 ]; then + echo "Found $command_name in the following locations:" + printf -- "- %s\n" "${found_paths[@]}" + echo "Using first version" + host_ldconfig=${found_paths[0]} +else + error="$command_name not found in PATH or only found in paths starting with $exclude_prefix." + fatal_error "$error" +fi + +# Make sure EESSI is initialised (doesn't matter what version) +check_eessi_initialised + +# Find the CUDA version of the host CUDA drivers +# (making sure that this can still work inside prefix environment inside a container) +export LD_LIBRARY_PATH=/.singularity.d/libs:$LD_LIBRARY_PATH +nvidia_smi_command="nvidia-smi --query-gpu=driver_version --format=csv,noheader" +if $nvidia_smi_command > /dev/null; then + host_driver_version=$($nvidia_smi_command | tail -n1) + echo_green "Found NVIDIA GPU driver version ${host_driver_version}" + # If the first worked, this should work too + host_cuda_version=$(nvidia-smi -q --display=COMPUTE | grep CUDA | awk 'NF>1{print $NF}') + echo_green "Found host CUDA version ${host_cuda_version}" +else + error="Failed to successfully execute\n $nvidia_smi_command\n" + fatal_error "$error" +fi + +# Let's make sure the driver libraries are not already in place +link_drivers=1 + +# first make sure that target of host_injections variant symlink is an existing directory +host_injections_target=$(realpath -m ${EESSI_CVMFS_REPO}/host_injections) +if [ ! -d ${host_injections_target} ]; then + create_directory_structure ${host_injections_target} +fi + +host_injections_nvidia_dir="${EESSI_CVMFS_REPO}/host_injections/nvidia/${EESSI_CPU_FAMILY}" +host_injection_driver_dir="${host_injections_nvidia_dir}/host" +host_injection_driver_version_file="$host_injection_driver_dir/driver_version.txt" +if [ -e "$host_injection_driver_version_file" ]; then + if grep -q "$host_driver_version" "$host_injection_driver_version_file"; then + echo_green "The host GPU driver libraries (v${host_driver_version}) have already been linked! (based on ${host_injection_driver_version_file})" + link_drivers=0 + else + # There's something there but it is out of date + echo_yellow "Cleaning out outdated symlinks" + rm $host_injection_driver_dir/* + if [ $? -ne 0 ]; then + error="Unable to remove files under '$host_injection_driver_dir'." + fatal_error "$error" + fi + fi +fi + +drivers_linked=0 +if [ "$link_drivers" -eq 1 ]; then + if ! create_directory_structure "${host_injection_driver_dir}" ; then + fatal_error "No write permissions to directory ${host_injection_driver_dir}" + fi + cd ${host_injection_driver_dir} + # Need a small temporary space to hold a couple of files + temp_dir=$(mktemp -d) + + # Gather libraries on the host (_must_ be host ldconfig) + $host_ldconfig -p | awk '{print $NF}' > "$temp_dir"/libs.txt + # Allow for the fact that we may be in a container so the CUDA libs might be in there + ls /.singularity.d/libs/* >> "$temp_dir"/libs.txt 2>/dev/null + + # Leverage singularity to find the full list of libraries we should be linking to + echo_yellow "Downloading latest version of nvliblist.conf from Apptainer to ${temp_dir}/nvliblist.conf" + curl --silent --output "$temp_dir"/nvliblist.conf https://raw.githubusercontent.com/apptainer/apptainer/main/etc/nvliblist.conf + + # Make symlinks to all the interesting libraries + grep '.so$' "$temp_dir"/nvliblist.conf | xargs -i grep {} "$temp_dir"/libs.txt | xargs -i ln -s {} + + # Inject driver and CUDA versions into dir + echo $host_driver_version > driver_version.txt + echo $host_cuda_version > cuda_version.txt + drivers_linked=1 + + # Remove the temporary directory when done + rm -r "$temp_dir" +fi + +# Make latest symlink for NVIDIA drivers +cd $host_injections_nvidia_dir +symlink="latest" +if [ -L "$symlink" ]; then + # Unless the drivers have been installed, leave the symlink alone + if [ "$drivers_linked" -eq 1 ]; then + ln -sf host latest + fi +else + # No link exists yet + ln -s host latest +fi + +# Make sure the libraries can be found by the EESSI linker +host_injection_linker_dir=${EESSI_EPREFIX/versions/host_injections} +if [ -L "$host_injection_linker_dir/lib" ]; then + target_path=$(readlink -f "$host_injection_linker_dir/lib") + if [ "$target_path" != "$$host_injections_nvidia_dir/latest" ]; then + cd $host_injection_linker_dir + ln -sf $host_injections_nvidia_dir/latest lib + fi +else + create_directory_structure $host_injection_linker_dir + cd $host_injection_linker_dir + ln -s $host_injections_nvidia_dir/latest lib +fi + +echo_green "Host NVIDIA GPU drivers linked successfully for EESSI" diff --git a/scripts/utils.sh b/scripts/utils.sh index d0da95e87f..b2be3f6221 100644 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -14,7 +14,7 @@ ANY_ERROR_EXITCODE=1 function fatal_error() { echo_red "ERROR: $1" >&2 if [[ $# -gt 1 ]]; then - exit $2 + exit "$2" else exit "${ANY_ERROR_EXITCODE}" fi @@ -32,11 +32,57 @@ function check_exit_code { fi } +function check_eessi_initialised() { + if [[ -z "${EESSI_SOFTWARE_PATH}" ]]; then + fatal_error "EESSI has not been initialised!" + else + return 0 + fi +} + +function check_in_prefix_shell() { + # Make sure EPREFIX is defined + if [[ -z "${EPREFIX}" ]]; then + fatal_error "This script cannot be used without having first defined EPREFIX" + fi + if [[ ! ${SHELL} = ${EPREFIX}/bin/bash ]]; then + fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!" + fi +} + +function create_directory_structure() { + # Ensure we are given a single path argument + if [ $# -ne 1 ]; then + echo_red "Function requires a single (relative or absolute) path argument" >&2 + return $ANY_ERROR_EXITCODE + fi + dir_structure="$1" + + # Attempt to create the directory structure + error_message=$(mkdir -p "$dir_structure" 2>&1) + return_code=$? + # If it fails be explicit about the error + if [ ${return_code} -ne 0 ]; then + real_dir=$(realpath -m "$dir_structure") + echo_red "Creating ${dir_structure} (real path ${real_dir}) failed with:\n ${error_message}" >&2 + else + # If we're creating it, our use case is that we want to be able to write there + # (this is a check in case the directory already existed) + if [ ! -w "${dir_structure}" ]; then + real_dir=$(realpath -m "$dir_structure") + echo_red "You do not have (required) write permissions to ${dir_structure} (real path ${real_dir})!" + return_code=$ANY_ERROR_EXITCODE + fi + fi + + return $return_code +} + function get_path_for_tool { tool_name=$1 tool_envvar_name=$2 - which_out=$(which ${tool_name} 2>&1) + which_out=$(which "${tool_name}" 2>&1) exit_code=$? if [[ ${exit_code} -eq 0 ]]; then echo "INFO: found tool ${tool_name} in PATH (${which_out})" >&2 @@ -68,7 +114,7 @@ function get_host_from_url { url=$1 re="(http|https)://([^/:]+)" if [[ $url =~ $re ]]; then - echo ${BASH_REMATCH[2]} + echo "${BASH_REMATCH[2]}" return 0 else echo "" @@ -80,7 +126,7 @@ function get_port_from_url { url=$1 re="(http|https)://[^:]+:([0-9]+)" if [[ $url =~ $re ]]; then - echo ${BASH_REMATCH[2]} + echo "${BASH_REMATCH[2]}" return 0 else echo "" @@ -90,7 +136,7 @@ function get_port_from_url { function get_ipv4_address { hname=$1 - hipv4=$(grep ${hname} /etc/hosts | grep -v '^[[:space:]]*#' | cut -d ' ' -f 1) + hipv4=$(grep "${hname}" /etc/hosts | grep -v '^[[:space:]]*#' | cut -d ' ' -f 1) # TODO try other methods if the one above does not work --> tool that verifies # what method can be used? echo "${hipv4}" diff --git a/test_suite.sh b/test_suite.sh new file mode 100755 index 0000000000..95eb9daa2a --- /dev/null +++ b/test_suite.sh @@ -0,0 +1,200 @@ +#!/bin/bash +# +# This script creates a ReFrame config file from a template, in which CPU properties get replaced +# based on where this script is run (typically: a build node). Then, it runs the EESSI test suite. +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Caspar van Leeuwen (@casparvl) +# +# license: GPLv2 + +display_help() { + echo "usage: $0 [OPTIONS]" + echo " -g | --generic - instructs script to test for generic architecture target" + echo " -h | --help - display this usage information" + echo " -x | --http-proxy URL - provides URL for the environment variable http_proxy" + echo " -y | --https-proxy URL - provides URL for the environment variable https_proxy" +} + +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case $1 in + -g|--generic) + DETECTION_PARAMETERS="--generic" + shift + ;; + -h|--help) + display_help # Call your function + # no shifting needed here, we're done. + exit 0 + ;; + -x|--http-proxy) + export http_proxy="$2" + shift 2 + ;; + -y|--https-proxy) + export https_proxy="$2" + shift 2 + ;; + --build-logs-dir) + export build_logs_dir="${2}" + shift 2 + ;; + --shared-fs-path) + export shared_fs_path="${2}" + shift 2 + ;; + -*|--*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +TOPDIR=$(dirname $(realpath $0)) + +source $TOPDIR/scripts/utils.sh + +# honor $TMPDIR if it is already defined, use /tmp otherwise +if [ -z $TMPDIR ]; then + export WORKDIR=/tmp/$USER +else + export WORKDIR=$TMPDIR/$USER +fi + +TMPDIR=$(mktemp -d) + +echo ">> Setting up environment..." +module --force purge +export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) + +source $TOPDIR/init/bash + +# Load the ReFrame module +# Currently, we load the default version. Maybe we should somehow make this configurable in the future? +module load ReFrame +if [[ $? -eq 0 ]]; then + echo_green ">> Loaded ReFrame module" +else + fatal_error "Failed to load the ReFrame module" +fi + +# Check that a system python3 is available +python3_found=$(command -v python3) +if [ -z ${python3_found} ]; then + fatal_error "No system python3 found" +else + echo_green "System python3 found:" + python3 -V +fi + +# Check ReFrame came with the hpctestlib and we can import it +reframe_import="hpctestlib.sciapps.gromacs" +python3 -c "import ${reframe_import}" +if [[ $? -eq 0 ]]; then + echo_green "Succesfully found and imported ${reframe_import}" +else + fatal_error "Failed to import ${reframe_import}" +fi + +# Cloning should already be done in run_tests.sh before test_suite.sh is invoked +# Check if that succeeded +export TESTSUITEPREFIX=$PWD/EESSI-test-suite +if [ -d $TESTSUITEPREFIX ]; then + echo_green "Clone of the test suite $TESTSUITEPREFIX available, OK!" +else + fatal_error "Clone of the test suite $TESTSUITEPREFIX is not available!" +fi +export PYTHONPATH=$TESTSUITEPREFIX:$PYTHONPATH + +# Check that we can import from the testsuite +testsuite_import="eessi.testsuite" +python3 -c "import ${testsuite_import}" +if [[ $? -eq 0 ]]; then + echo_green "Succesfully found and imported ${testsuite_import}" +else + fatal_error "Failed to import ${testsuite_import}" +fi + +# Configure ReFrame, see https://www.eessi.io/docs/test-suite/installation-configuration +export RFM_CONFIG_FILES=$TOPDIR/reframe_config_bot.py +export RFM_CONFIG_FILE_TEMPLATE=$TOPDIR/reframe_config_bot.py.tmpl +export RFM_CHECK_SEARCH_PATH=$TESTSUITEPREFIX/eessi/testsuite/tests +export RFM_CHECK_SEARCH_RECURSIVE=1 +export RFM_PREFIX=$PWD/reframe_runs + +echo "Configured reframe with the following environment variables:" +env | grep "RFM_" + +# Inject correct CPU properties into the ReFrame config file +cpuinfo=$(lscpu) +if [[ "${cpuinfo}" =~ CPU\(s\):[^0-9]*([0-9]+) ]]; then + cpu_count=${BASH_REMATCH[1]} +else + fatal_error "Failed to get the number of CPUs for the current test hardware with lscpu." +fi +if [[ "${cpuinfo}" =~ Socket\(s\):[^0-9]*([0-9]+) ]]; then + socket_count=${BASH_REMATCH[1]} +else + fatal_error "Failed to get the number of sockets for the current test hardware with lscpu." +fi +if [[ "${cpuinfo}" =~ (Thread\(s\) per core:[^0-9]*([0-9]+)) ]]; then + threads_per_core=${BASH_REMATCH[2]} +else + fatal_error "Failed to get the number of threads per core for the current test hardware with lscpu." +fi +if [[ "${cpuinfo}" =~ (Core\(s\) per socket:[^0-9]*([0-9]+)) ]]; then + cores_per_socket=${BASH_REMATCH[2]} +else + fatal_error "Failed to get the number of cores per socket for the current test hardware with lscpu." +fi +cp ${RFM_CONFIG_FILE_TEMPLATE} ${RFM_CONFIG_FILES} +sed -i "s/__NUM_CPUS__/${cpu_count}/g" $RFM_CONFIG_FILES +sed -i "s/__NUM_SOCKETS__/${socket_count}/g" $RFM_CONFIG_FILES +sed -i "s/__NUM_CPUS_PER_CORE__/${threads_per_core}/g" $RFM_CONFIG_FILES +sed -i "s/__NUM_CPUS_PER_SOCKET__/${cores_per_socket}/g" $RFM_CONFIG_FILES + +# Workaround for https://github.com/EESSI/software-layer/pull/467#issuecomment-1973341966 +export PSM3_DEVICES='self,shm' # this is enough, since we only run single node for now + +# Check we can run reframe +reframe --version +if [[ $? -eq 0 ]]; then + echo_green "Succesfully ran 'reframe --version'" +else + fatal_error "Failed to run 'reframe --version'" +fi + +# List the tests we want to run +export REFRAME_ARGS='--tag CI --tag 1_node --nocolor' +echo "Listing tests: reframe ${REFRAME_ARGS} --list" +reframe ${REFRAME_ARGS} --list +if [[ $? -eq 0 ]]; then + echo_green "Succesfully listed ReFrame tests with command: reframe ${REFRAME_ARGS} --list" +else + fatal_error "Failed to list ReFrame tests with command: reframe ${REFRAME_ARGS} --list" +fi + +# Run all tests +echo "Running tests: reframe ${REFRAME_ARGS} --run" +reframe ${REFRAME_ARGS} --run +reframe_exit_code=$? +if [[ ${reframe_exit_code} -eq 0 ]]; then + echo_green "ReFrame runtime ran succesfully with command: reframe ${REFRAME_ARGS} --run." +else + fatal_error "ReFrame runtime failed to run with command: reframe ${REFRAME_ARGS} --run." +fi + +echo ">> Cleaning up ${TMPDIR}..." +rm -r ${TMPDIR} + +exit ${reframe_exit_code} diff --git a/tests/archdetect/aarch64/cortex_a72/debian-rpi4.all.output b/tests/archdetect/aarch64/cortex_a72/debian-rpi4.all.output new file mode 100644 index 0000000000..fff57f72ef --- /dev/null +++ b/tests/archdetect/aarch64/cortex_a72/debian-rpi4.all.output @@ -0,0 +1 @@ +aarch64/cortex_a72:aarch64/generic diff --git a/tests/archdetect/aarch64/cortex_a72/debian-rpi4.cpuinfo b/tests/archdetect/aarch64/cortex_a72/debian-rpi4.cpuinfo new file mode 100644 index 0000000000..045264e878 --- /dev/null +++ b/tests/archdetect/aarch64/cortex_a72/debian-rpi4.cpuinfo @@ -0,0 +1,8 @@ +processor : 0 +BogoMIPS : 108.00 +Features : fp asimd evtstrm crc32 cpuid +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x0 +CPU part : 0xd08 +CPU revision : 3 diff --git a/tests/archdetect/aarch64/cortex_a72/debian-rpi4.output b/tests/archdetect/aarch64/cortex_a72/debian-rpi4.output new file mode 100644 index 0000000000..1282f7c0c2 --- /dev/null +++ b/tests/archdetect/aarch64/cortex_a72/debian-rpi4.output @@ -0,0 +1 @@ +aarch64/cortex_a72