From 53661bf07c9d240da40a12ae81bb02703c6bd4df Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Thu, 19 Sep 2024 08:05:49 +0200 Subject: [PATCH 1/9] Fix issue with grep not returning anything on the CPU prefix --- create_tarball.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/create_tarball.sh b/create_tarball.sh index e70a9b18d6..9c212681a5 100755 --- a/create_tarball.sh +++ b/create_tarball.sh @@ -64,9 +64,9 @@ for subdir in ${cpu_arch_subdir} ${cpu_arch_subdir}/accel/${accel_subdir}; do if [ -d ${eessi_version}/software/${os}/${subdir}/modules ]; then # module files - find ${eessi_version}/software/${os}/${subdir}/modules -type f | grep -v '/\.wh\.' >> ${files_list} + find ${eessi_version}/software/${os}/${subdir}/modules -type f | grep -v '/\.wh\.' >> ${files_list} || true # Make sure we don't exit because of set -e if grep doesn't return a match # module symlinks - find ${eessi_version}/software/${os}/${subdir}/modules -type l | grep -v '/\.wh\.' >> ${files_list} + find ${eessi_version}/software/${os}/${subdir}/modules -type l | grep -v '/\.wh\.' >> ${files_list} || true # Make sure we don't exit because of set -e if grep doesn't return a match # module files and symlinks find ${eessi_version}/software/${os}/${subdir}/modules/all -type f -o -type l \ | grep -v '/\.wh\.' | grep -v '/\.modulerc\.lua' | sed -e 's/.lua$//' | sed -e 's@.*/modules/all/@@g' | sort -u \ @@ -83,7 +83,7 @@ for subdir in ${cpu_arch_subdir} ${cpu_arch_subdir}/accel/${accel_subdir}; do for package_version in $(cat ${module_files_list}); do echo "handling ${package_version}" ls -d ${eessi_version}/software/${os}/${subdir}/software/${package_version} \ - | grep -v '/\.wh\.' >> ${files_list} + | grep -v '/\.wh\.' >> ${files_list} || true # Make sure we don't exit because of set -e if grep doesn't return a match done fi done From ce09733386a5a9b99695c548731c4989bd0041c4 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Thu, 19 Sep 2024 08:41:25 +0200 Subject: [PATCH 2/9] Readd cuda reinstall --- .../20240918-eb-4.9.3-CUDA-12.1.1-in-accel-prefix.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 easystacks/software.eessi.io/2023.06/rebuilds/20240918-eb-4.9.3-CUDA-12.1.1-in-accel-prefix.yml diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240918-eb-4.9.3-CUDA-12.1.1-in-accel-prefix.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240918-eb-4.9.3-CUDA-12.1.1-in-accel-prefix.yml new file mode 100644 index 0000000000..755bea096e --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240918-eb-4.9.3-CUDA-12.1.1-in-accel-prefix.yml @@ -0,0 +1,7 @@ +# 2024.09.18 +# We need to reinstall CUDA in the accelerator prefixes +# See https://github.com/EESSI/software-layer/pull/720 +easyconfigs: + - CUDA-12.1.1.eb: + options: + accept-eula-for: CUDA From 8fa60259ea3dda1bef61de33dad61f9c966595f5 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Tue, 24 Sep 2024 12:51:22 +0200 Subject: [PATCH 3/9] Limit CUDA hook to EESSI installs only, and remove duplication when creating symlinks --- eb_hooks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index b3e457cfe3..e3c6c4faeb 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -684,7 +684,8 @@ def post_sanitycheck_cuda(self, *args, **kwargs): Remove files from CUDA installation that we are not allowed to ship, and replace them with a symlink to a corresponding installation under host_injections. """ - if self.name == 'CUDA': + # Make sure we only do this for CUDA and only if we are doing a CVMFS installation + if self.name == 'CUDA' and self.installdir.startswith('/cvmfs/software.eessi.io/versions'): print_msg("Replacing files in CUDA installation that we can not ship with symlinks to host_injections...") # read CUDA EULA, construct allowlist based on section 2.6 that specifies list of files that can be shipped @@ -733,6 +734,9 @@ def post_sanitycheck_cuda(self, *args, **kwargs): basename, full_path) # if it is not in the allowlist, delete the file and create a symlink to host_injections host_inj_path = full_path.replace('versions', 'host_injections') + # CUDA itself doesn't care about compute capability so remove this duplication from + # under host_injections + host_inj_path = re.sub(r"accel/nvidia/cc\d+/", '', host_inj_path) # make sure source and target of symlink are not the same if full_path == host_inj_path: raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " From 087b7d7d42436b07c035b3433d03c9318fd9ec4c Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Tue, 24 Sep 2024 15:46:08 +0200 Subject: [PATCH 4/9] Use EESSI_ACCELERATOR_TARGET rather than regex --- eb_hooks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index e3c6c4faeb..0a42a91d49 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -736,7 +736,9 @@ def post_sanitycheck_cuda(self, *args, **kwargs): host_inj_path = full_path.replace('versions', 'host_injections') # CUDA itself doesn't care about compute capability so remove this duplication from # under host_injections - host_inj_path = re.sub(r"accel/nvidia/cc\d+/", '', host_inj_path) + accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") + if accel_subdir: + host_inj_path = host_inj_path.replace('/accel/%s' % accel_subdir, 'host_injections') # make sure source and target of symlink are not the same if full_path == host_inj_path: raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " From b8555d1ee64330a0a43df838f4d1ec58a916a4b5 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Tue, 24 Sep 2024 16:00:32 +0200 Subject: [PATCH 5/9] Ensure we are making an EESSI install when using the CUDA hook --- eb_hooks.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 0a42a91d49..4d94357e8c 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -685,7 +685,12 @@ def post_sanitycheck_cuda(self, *args, **kwargs): and replace them with a symlink to a corresponding installation under host_injections. """ # Make sure we only do this for CUDA and only if we are doing a CVMFS installation - if self.name == 'CUDA' and self.installdir.startswith('/cvmfs/software.eessi.io/versions'): + is_eessi_install = ( + self.installdir.startswith("/cvmfs/software.eessi.io/versions") + and not build_option("sanity_check_only") + and not build_option("module_only") + ) + if self.name == 'CUDA' and is_eessi_install: print_msg("Replacing files in CUDA installation that we can not ship with symlinks to host_injections...") # read CUDA EULA, construct allowlist based on section 2.6 that specifies list of files that can be shipped From fb2c0858a6a5c42d6056b019c62e043b8172ad92 Mon Sep 17 00:00:00 2001 From: ocaisa Date: Tue, 24 Sep 2024 16:07:16 +0200 Subject: [PATCH 6/9] Sloppy copy/paste --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 4d94357e8c..96fa48e129 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -743,7 +743,7 @@ def post_sanitycheck_cuda(self, *args, **kwargs): # under host_injections accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") if accel_subdir: - host_inj_path = host_inj_path.replace('/accel/%s' % accel_subdir, 'host_injections') + host_inj_path = host_inj_path.replace('/accel/%s' % accel_subdir, '') # make sure source and target of symlink are not the same if full_path == host_inj_path: raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " From cf63589f1c5a0f0f08a4eb44384ee0a920754848 Mon Sep 17 00:00:00 2001 From: ocaisa Date: Tue, 24 Sep 2024 16:08:37 +0200 Subject: [PATCH 7/9] Update eb_hooks.py --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 96fa48e129..ed006bae1c 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -743,7 +743,7 @@ def post_sanitycheck_cuda(self, *args, **kwargs): # under host_injections accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") if accel_subdir: - host_inj_path = host_inj_path.replace('/accel/%s' % accel_subdir, '') + host_inj_path = host_inj_path.replace("/accel/%s" % accel_subdir, '') # make sure source and target of symlink are not the same if full_path == host_inj_path: raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " From d37a958d008ba679af7872b4bb9038df74338842 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Wed, 25 Sep 2024 10:54:02 +0200 Subject: [PATCH 8/9] Move CUDA hook to post-install, allow hook to trigger for any EESSI distributed repo (but always make symlinks to software.eessi.io) --- eb_hooks.py | 76 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 4d94357e8c..788a0419ba 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -131,7 +131,8 @@ def pre_prepare_hook(self, *args, **kwargs): def post_prepare_hook_gcc_prefixed_ld_rpath_wrapper(self, *args, **kwargs): """ Post-configure hook for GCCcore: - - copy RPATH wrapper script for linker commands to also have a wrapper in place with system type prefix like 'x86_64-pc-linux-gnu' + - copy RPATH wrapper script for linker commands to also have a wrapper in + place with system type prefix like 'x86_64-pc-linux-gnu' """ if self.name == 'GCCcore': config_guess = obtain_config_guess() @@ -279,10 +280,10 @@ def parse_hook_qt5_check_qtwebengine_disable(ec, eprefix): Disable check for QtWebEngine in Qt5 as workaround for problem with determining glibc version. """ if ec.name == 'Qt5': - # workaround for glibc version being reported as "UNKNOWN" in Gentoo Prefix environment by EasyBuild v4.7.2, - # see also https://github.com/easybuilders/easybuild-framework/pull/4290 - ec['check_qtwebengine'] = False - print_msg("Checking for QtWebEgine in Qt5 installation has been disabled") + # workaround for glibc version being reported as "UNKNOWN" in Gentoo Prefix environment by EasyBuild v4.7.2, + # see also https://github.com/easybuilders/easybuild-framework/pull/4290 + ec['check_qtwebengine'] = False + print_msg("Checking for QtWebEgine in Qt5 installation has been disabled") else: raise EasyBuildError("Qt5-specific hook triggered for non-Qt5 easyconfig?!") @@ -341,7 +342,7 @@ def pre_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwarg if self.name == 'Highway': tcname, tcversion = self.toolchain.name, self.toolchain.version cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') - # note: keep condition in sync with the one used in + # note: keep condition in sync with the one used in # post_prepare_hook_highway_handle_test_compilation_issues if self.version in ['1.0.4'] and tcname == 'GCCcore' and tcversion == '12.3.0': if cpu_target in [CPU_TARGET_A64FX, CPU_TARGET_NEOVERSE_V1]: @@ -360,12 +361,13 @@ def post_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwar if self.name == 'Highway': tcname, tcversion = self.toolchain.name, self.toolchain.version cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') - # note: keep condition in sync with the one used in + # note: keep condition in sync with the one used in # pre_prepare_hook_highway_handle_test_compilation_issues if self.version in ['1.0.4'] and tcname == 'GCCcore' and tcversion == '12.3.0': if cpu_target == CPU_TARGET_NEOVERSE_N1: update_build_option('optarch', self.orig_optarch) + def pre_configure_hook(self, *args, **kwargs): """Main pre-configure hook: trigger custom functions based on software name.""" if self.name in PRE_CONFIGURE_HOOKS: @@ -389,6 +391,7 @@ def pre_configure_hook_BLIS_a64fx(self, *args, **kwargs): else: raise EasyBuildError("BLIS-specific hook triggered for non-BLIS easyconfig?!") + def pre_configure_hook_extrae(self, *args, **kwargs): """ Pre-configure hook for Extrae @@ -414,7 +417,11 @@ def pre_configure_hook_extrae(self, *args, **kwargs): # replace use of 'which' with 'command -v', since 'which' is broken in EESSI build container; # this must be done *after* running configure script, because initial configuration re-writes configure script, # and problem due to use of which only pops up when running make ?! - self.cfg.update('prebuildopts', "cp config/mpi-macros.m4 config/mpi-macros.m4.orig && sed -i 's/`which /`command -v /g' config/mpi-macros.m4 && ") + self.cfg.update( + 'prebuildopts', + "cp config/mpi-macros.m4 config/mpi-macros.m4.orig &&" + "sed -i 's/`which /`command -v /g' config/mpi-macros.m4 && " + ) else: raise EasyBuildError("Extrae-specific hook triggered for non-Extrae easyconfig?!") @@ -445,7 +452,10 @@ def pre_configure_hook_gromacs(self, *args, **kwargs): cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') if LooseVersion(self.version) <= LooseVersion('2024.1') and cpu_target == CPU_TARGET_NEOVERSE_V1: self.cfg.update('configopts', '-DGMX_SIMD=ARM_NEON_ASIMD') - print_msg("Avoiding use of SVE instructions for GROMACS %s by using ARM_NEON_ASIMD as GMX_SIMD value", self.version) + print_msg( + "Avoiding use of SVE instructions for GROMACS %s by using ARM_NEON_ASIMD as GMX_SIMD value", + self.version + ) else: raise EasyBuildError("GROMACS-specific hook triggered for non-GROMACS easyconfig?!") @@ -506,12 +516,12 @@ def pre_configure_hook_wrf_aarch64(self, *args, **kwargs): pattern = "Linux x86_64 ppc64le, gfortran" repl = "Linux x86_64 aarch64 ppc64le, gfortran" if LooseVersion(self.version) <= LooseVersion('3.9.0'): - self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure_new.defaults && " % (pattern, repl)) - print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) + self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure_new.defaults && " % (pattern, repl)) + print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) if LooseVersion('4.0.0') <= LooseVersion(self.version) <= LooseVersion('4.2.1'): - self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure.defaults && " % (pattern, repl)) - print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) + self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure.defaults && " % (pattern, repl)) + print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) else: raise EasyBuildError("WRF-specific hook triggered for non-WRF easyconfig?!") @@ -533,7 +543,7 @@ def pre_configure_hook_LAMMPS_zen4(self, *args, **kwargs): raise EasyBuildError("LAMMPS-specific hook triggered for non-LAMMPS easyconfig?!") -def pre_test_hook(self,*args, **kwargs): +def pre_test_hook(self, *args, **kwargs): """Main pre-test hook: trigger custom functions based on software name.""" if self.name in PRE_TEST_HOOKS: PRE_TEST_HOOKS[self.name](self, *args, **kwargs) @@ -596,6 +606,7 @@ def pre_test_hook_ignore_failing_tests_SciPybundle(self, *args, **kwargs): elif cpu_target == CPU_TARGET_A64FX and self.version in scipy_bundle_versions_a64fx: self.cfg['testopts'] = "|| echo ignoring failing tests" + def pre_test_hook_ignore_failing_tests_netCDF(self, *args, **kwargs): """ Pre-test hook for netCDF: skip failing tests for selected netCDF versions on neoverse_v1 @@ -609,6 +620,7 @@ def pre_test_hook_ignore_failing_tests_netCDF(self, *args, **kwargs): if self.name == 'netCDF' and self.version == '4.9.2' and cpu_target == CPU_TARGET_NEOVERSE_V1: self.cfg['testopts'] = "|| echo ignoring failing tests" + def pre_test_hook_increase_max_failed_tests_arm_PyTorch(self, *args, **kwargs): """ Pre-test hook for PyTorch: increase max failing tests for ARM for PyTorch 2.1.2 @@ -673,24 +685,24 @@ def pre_single_extension_testthat(ext, *args, **kwargs): ext.cfg['preinstallopts'] = "sed -i 's/SIGSTKSZ/32768/g' inst/include/testthat/vendor/catch.h && " -def post_sanitycheck_hook(self, *args, **kwargs): - """Main post-sanity-check hook: trigger custom functions based on software name.""" - if self.name in POST_SANITYCHECK_HOOKS: - POST_SANITYCHECK_HOOKS[self.name](self, *args, **kwargs) +def post_postproc_hook(self, *args, **kwargs): + """Main post-postprocessing hook: trigger custom functions based on software name.""" + if self.name in POST_POSTPROC_HOOKS: + POST_POSTPROC_HOOKS[self.name](self, *args, **kwargs) -def post_sanitycheck_cuda(self, *args, **kwargs): +def post_postproc_cuda(self, *args, **kwargs): """ Remove files from CUDA installation that we are not allowed to ship, and replace them with a symlink to a corresponding installation under host_injections. """ - # Make sure we only do this for CUDA and only if we are doing a CVMFS installation - is_eessi_install = ( - self.installdir.startswith("/cvmfs/software.eessi.io/versions") - and not build_option("sanity_check_only") - and not build_option("module_only") - ) - if self.name == 'CUDA' and is_eessi_install: + + # We need to check if we are doing an EESSI-distributed installation + eessi_pattern = r"^/cvmfs/[^/]*.eessi.io/versions/" + host_injections_location = "/cvmfs/software.eessi.io/host_injections/" + eessi_installation = bool(re.search(eessi_pattern, self.installdir)) + + if self.name == 'CUDA' and eessi_installation: print_msg("Replacing files in CUDA installation that we can not ship with symlinks to host_injections...") # read CUDA EULA, construct allowlist based on section 2.6 that specifies list of files that can be shipped @@ -738,12 +750,14 @@ def post_sanitycheck_cuda(self, *args, **kwargs): self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s", basename, full_path) # if it is not in the allowlist, delete the file and create a symlink to host_injections - host_inj_path = full_path.replace('versions', 'host_injections') + + # the host_injections path is under a fixed repo/location for CUDA + host_inj_path = re.sub(eessi_pattern, host_injections_location, full_path) # CUDA itself doesn't care about compute capability so remove this duplication from # under host_injections accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") if accel_subdir: - host_inj_path = host_inj_path.replace('/accel/%s' % accel_subdir, 'host_injections') + host_inj_path = host_inj_path.replace('/accel/%s' % accel_subdir, '') # make sure source and target of symlink are not the same if full_path == host_inj_path: raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " @@ -775,7 +789,7 @@ def inject_gpu_property(ec): ec_dict['builddependencies'].append(dep) value = '\n'.join([value, 'setenv("EESSICUDAVERSION","%s")' % cuda_version]) if key in ec_dict: - if not value in ec_dict[key]: + if value not in ec_dict[key]: ec[key] = '\n'.join([ec_dict[key], value]) else: ec[key] = value @@ -835,6 +849,6 @@ def inject_gpu_property(ec): 'numpy': post_single_extension_numpy, } -POST_SANITYCHECK_HOOKS = { - 'CUDA': post_sanitycheck_cuda, +POST_POSTPROC_HOOKS = { + 'CUDA': post_postproc_cuda, } From c58033861c94c4b01dca443ef7a672b63da274fa Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Wed, 25 Sep 2024 11:14:29 +0200 Subject: [PATCH 9/9] Address review comments --- eb_hooks.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index f9ecefe1f6..9b0e9c8dcb 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -32,6 +32,9 @@ SYSTEM = EASYCONFIG_CONSTANTS['SYSTEM'][0] +EESSI_INSTALLATION_REGEX = r"^/cvmfs/[^/]*.eessi.io/versions/" +HOST_INJECTIONS_LOCATION = "/cvmfs/software.eessi.io/host_injections/" + def get_eessi_envvar(eessi_envvar): """Get an EESSI environment variable from the environment""" @@ -419,7 +422,7 @@ def pre_configure_hook_extrae(self, *args, **kwargs): # and problem due to use of which only pops up when running make ?! self.cfg.update( 'prebuildopts', - "cp config/mpi-macros.m4 config/mpi-macros.m4.orig &&" + "cp config/mpi-macros.m4 config/mpi-macros.m4.orig && " "sed -i 's/`which /`command -v /g' config/mpi-macros.m4 && " ) else: @@ -698,9 +701,7 @@ def post_postproc_cuda(self, *args, **kwargs): """ # We need to check if we are doing an EESSI-distributed installation - eessi_pattern = r"^/cvmfs/[^/]*.eessi.io/versions/" - host_injections_location = "/cvmfs/software.eessi.io/host_injections/" - eessi_installation = bool(re.search(eessi_pattern, self.installdir)) + eessi_installation = bool(re.search(EESSI_INSTALLATION_REGEX, self.installdir)) if self.name == 'CUDA' and eessi_installation: print_msg("Replacing files in CUDA installation that we can not ship with symlinks to host_injections...") @@ -752,12 +753,12 @@ def post_postproc_cuda(self, *args, **kwargs): # if it is not in the allowlist, delete the file and create a symlink to host_injections # the host_injections path is under a fixed repo/location for CUDA - host_inj_path = re.sub(eessi_pattern, host_injections_location, full_path) + host_inj_path = re.sub(EESSI_INSTALLATION_REGEX, HOST_INJECTIONS_LOCATION, full_path) # CUDA itself doesn't care about compute capability so remove this duplication from - # under host_injections + # under host_injections (symlink to a single CUDA installation for all compute + # capabilities) accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") if accel_subdir: - host_inj_path = host_inj_path.replace('/accel/%s' % accel_subdir, '') host_inj_path = host_inj_path.replace("/accel/%s" % accel_subdir, '') # make sure source and target of symlink are not the same if full_path == host_inj_path: