Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{numlib}[NVHPC/24.1-CUDA-12.4.0] NVHPC v24.1, nvofbf v2023b, nvompi v2023b, ... w/ CUDA 12.4.0, fb #21890

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from

Conversation

tanmoy1989
Copy link
Contributor

@tanmoy1989 tanmoy1989 commented Nov 20, 2024

(created using eb --new-pr)
The main focus was to update nvofbf to 2023b thus other dependencies needed also an update similar to nvofbf/2023a (see: #21530).

…i-2023b.eb, OpenMPI-4.1.6-NVHPC-24.1-CUDA-12.4.0-CUDA-12.4.0.eb, OpenBLAS-0.3.24-NVHPC-24.1-CUDA-12.4.0.eb, FlexiBLAS-3.3.1-NVHPC-24.1-CUDA-12.4.0.eb, FFTW-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb, FFTW.MPI-3.3.10-nvompi-2023b.eb, ScaLAPACK-2.2.0-nvompi-2023b-fb.eb, UCC-CUDA-1.2.0-GCCcore-13.2.0-CUDA-12.4.0.eb
Copy link

Updated software FFTW.MPI-3.3.10-nvompi-2023b.eb

Diff against FFTW.MPI-3.3.10-gompi-2024a.eb

easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-gompi-2024a.eb

diff --git a/easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-gompi-2024a.eb b/easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-nvompi-2023b.eb
index 2ed420c412..21ac22e854 100644
--- a/easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-gompi-2024a.eb
+++ b/easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-nvompi-2023b.eb
@@ -5,7 +5,7 @@ homepage = 'https://www.fftw.org'
 description = """FFTW is a C subroutine library for computing the discrete Fourier transform (DFT)
 in one or more dimensions, of arbitrary input size, and of both real and complex data."""
 
-toolchain = {'name': 'gompi', 'version': '2024a'}
+toolchain = {'name': 'nvompi', 'version': '2023b'}
 toolchainopts = {'pic': True}
 
 source_urls = [homepage]
Diff against FFTW.MPI-3.3.10-gmpich-2024.06.eb

easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-gmpich-2024.06.eb

diff --git a/easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-gmpich-2024.06.eb b/easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-nvompi-2023b.eb
index 4b79e9e7c2..21ac22e854 100644
--- a/easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-gmpich-2024.06.eb
+++ b/easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-nvompi-2023b.eb
@@ -5,7 +5,7 @@ homepage = 'https://www.fftw.org'
 description = """FFTW is a C subroutine library for computing the discrete Fourier transform (DFT)
 in one or more dimensions, of arbitrary input size, and of both real and complex data."""
 
-toolchain = {'name': 'gmpich', 'version': '2024.06'}
+toolchain = {'name': 'nvompi', 'version': '2023b'}
 toolchainopts = {'pic': True}
 
 source_urls = [homepage]

Updated software FFTW-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb

Diff against FFTW-3.3.10-GCC-13.3.0.eb

easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-GCC-13.3.0.eb

diff --git a/easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-GCC-13.3.0.eb b/easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb
index bf18544e70..d62cf4ca1b 100644
--- a/easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-GCC-13.3.0.eb
+++ b/easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb
@@ -5,13 +5,16 @@ homepage = 'https://www.fftw.org'
 description = """FFTW is a C subroutine library for computing the discrete Fourier transform (DFT)
 in one or more dimensions, of arbitrary input size, and of both real and complex data."""
 
-toolchain = {'name': 'GCC', 'version': '13.3.0'}
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
 toolchainopts = {'pic': True}
 
 source_urls = [homepage]
 sources = [SOURCELOWER_TAR_GZ]
 checksums = ['56c932549852cddcfafdab3820b0200c7742675be92179e59e6215b340e26467']
 
+# Does not work with nvc
+with_quad_prec = False
+
 runtest = 'check'
 
 moduleclass = 'numlib'
Diff against FFTW-3.3.10-GCC-13.2.0.eb

easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-GCC-13.2.0.eb

diff --git a/easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-GCC-13.2.0.eb b/easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb
index 32652387f8..d62cf4ca1b 100644
--- a/easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-GCC-13.2.0.eb
+++ b/easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb
@@ -5,13 +5,16 @@ homepage = 'https://www.fftw.org'
 description = """FFTW is a C subroutine library for computing the discrete Fourier transform (DFT)
 in one or more dimensions, of arbitrary input size, and of both real and complex data."""
 
-toolchain = {'name': 'GCC', 'version': '13.2.0'}
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
 toolchainopts = {'pic': True}
 
 source_urls = [homepage]
 sources = [SOURCELOWER_TAR_GZ]
 checksums = ['56c932549852cddcfafdab3820b0200c7742675be92179e59e6215b340e26467']
 
+# Does not work with nvc
+with_quad_prec = False
+
 runtest = 'check'
 
 moduleclass = 'numlib'

Updated software FlexiBLAS-3.3.1-NVHPC-24.1-CUDA-12.4.0.eb

Diff against FlexiBLAS-3.4.4-GCC-13.3.0.eb

easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.4.4-GCC-13.3.0.eb

diff --git a/easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.4.4-GCC-13.3.0.eb b/easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.3.1-NVHPC-24.1-CUDA-12.4.0.eb
index 9b9f5e8edb..e6f5779078 100644
--- a/easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.4.4-GCC-13.3.0.eb
+++ b/easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.3.1-NVHPC-24.1-CUDA-12.4.0.eb
@@ -1,29 +1,28 @@
 easyblock = 'Bundle'
 
 name = 'FlexiBLAS'
-version = '3.4.4'
+version = '3.3.1'
 
 homepage = 'https://gitlab.mpi-magdeburg.mpg.de/software/flexiblas-release'
 description = """FlexiBLAS is a wrapper library that enables the exchange of the BLAS and LAPACK implementation
 used by a program without recompiling or relinking it."""
 
-toolchain = {'name': 'GCC', 'version': '13.3.0'}
-local_extra_flags = "-fstack-protector-strong -fstack-clash-protection"
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
+local_extra_flags = "-D__ELF__"
 toolchainopts = {'pic': True, 'extra_cflags': local_extra_flags, 'extra_fflags': local_extra_flags}
 
 builddependencies = [
-    ('CMake', '3.29.3'),
-    ('Python', '3.12.3'),  # required for running the tests
-    ('BLIS', '1.0'),
+    ('CMake', '3.27.6'),
+    ('Python', '3.11.5'),  # required for running the tests
 ]
 
 dependencies = [
-    ('OpenBLAS', '0.3.27'),
+    ('OpenBLAS', '0.3.24'),
 ]
 
 # note: first listed backend will be used as default by FlexiBLAS,
 # unless otherwise specified via easyconfig parameter flexiblas_default
-local_backends = ['OpenBLAS', 'BLIS']
+local_backends = ['OpenBLAS']
 
 # imkl supplies its backend via the imkl module, not as a dependency
 if ARCH == 'x86_64':
@@ -36,16 +35,16 @@ sanity_check_all_components = True
 components = [
     (name, version, {
         'source_urls':
-        ['https://gitlab.mpi-magdeburg.mpg.de/api/v4/projects/386/packages/generic/flexiblas-source/v3.4.4/'],
+        ['https://gitlab.mpi-magdeburg.mpg.de/api/v4/projects/386/packages/generic/flexiblas-source/v3.3.1/'],
         'sources': [SOURCELOWER_TAR_GZ],
-        'checksums': ['05040ae092142dd0bf38d1bb9ce33f6b475d9f9bb455e33be997932ae855c22b'],
+        'checksums': ['bbeebf5e5a006924558fec43f49affbe1aaa4cbacfc472a9ff6066ffda142e18'],
         'backends': local_backends,
     }),
-    ('LAPACK', '3.12.0', {
+    ('LAPACK', '3.11.0', {
         'easyblock': 'CMakeMake',
         'source_urls': ['https://github.com/Reference-LAPACK/lapack/archive/'],
         'sources': ['v%(version)s.tar.gz'],
-        'checksums': ['eac9570f8e0ad6f30ce4b963f4f033f0f643e7c3912fc9ee6cd99120675ad48b'],
+        'checksums': ['4b9ba79bfd4921ca820e83979db76ab3363155709444a787979e81c22285ffa9'],
         'configopts': ('-DBUILD_SHARED_LIBS=ON -DUSE_OPTIMIZED_BLAS=ON -DLAPACKE=ON '
                        '-DUSE_OPTIMIZED_LAPACK=ON -DBUILD_DEPRECATED=ON '
                        '-DCMAKE_INSTALL_INCLUDEDIR=%(installdir)s/include/flexiblas'),
Diff against FlexiBLAS-3.3.1-GCC-13.2.0.eb

easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.3.1-GCC-13.2.0.eb

diff --git a/easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.3.1-GCC-13.2.0.eb b/easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.3.1-NVHPC-24.1-CUDA-12.4.0.eb
index 4c6e509040..e6f5779078 100644
--- a/easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.3.1-GCC-13.2.0.eb
+++ b/easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.3.1-NVHPC-24.1-CUDA-12.4.0.eb
@@ -7,14 +7,13 @@ homepage = 'https://gitlab.mpi-magdeburg.mpg.de/software/flexiblas-release'
 description = """FlexiBLAS is a wrapper library that enables the exchange of the BLAS and LAPACK implementation
 used by a program without recompiling or relinking it."""
 
-toolchain = {'name': 'GCC', 'version': '13.2.0'}
-local_extra_flags = "-fstack-protector-strong -fstack-clash-protection"
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
+local_extra_flags = "-D__ELF__"
 toolchainopts = {'pic': True, 'extra_cflags': local_extra_flags, 'extra_fflags': local_extra_flags}
 
 builddependencies = [
     ('CMake', '3.27.6'),
     ('Python', '3.11.5'),  # required for running the tests
-    ('BLIS', '0.9.0'),
 ]
 
 dependencies = [
@@ -23,7 +22,7 @@ dependencies = [
 
 # note: first listed backend will be used as default by FlexiBLAS,
 # unless otherwise specified via easyconfig parameter flexiblas_default
-local_backends = ['OpenBLAS', 'BLIS']
+local_backends = ['OpenBLAS']
 
 # imkl supplies its backend via the imkl module, not as a dependency
 if ARCH == 'x86_64':

Updated software NVHPC-24.1-CUDA-12.4.0.eb

Diff against NVHPC-24.9-CUDA-12.6.0.eb

easybuild/easyconfigs/n/NVHPC/NVHPC-24.9-CUDA-12.6.0.eb

diff --git a/easybuild/easyconfigs/n/NVHPC/NVHPC-24.9-CUDA-12.6.0.eb b/easybuild/easyconfigs/n/NVHPC/NVHPC-24.1-CUDA-12.4.0.eb
index d164eabc04..2ecdd1778b 100644
--- a/easybuild/easyconfigs/n/NVHPC/NVHPC-24.9-CUDA-12.6.0.eb
+++ b/easybuild/easyconfigs/n/NVHPC/NVHPC-24.1-CUDA-12.4.0.eb
@@ -1,5 +1,5 @@
 name = 'NVHPC'
-version = '24.9'
+version = '24.1'
 versionsuffix = '-CUDA-%(cudaver)s'
 
 homepage = 'https://developer.nvidia.com/hpc-sdk/'
@@ -10,25 +10,27 @@ toolchain = SYSTEM
 local_tarball_tmpl = 'nvhpc_2024_%%(version_major)s%%(version_minor)s_Linux_%s_cuda_multi.tar.gz'
 # By downloading, you accept the HPC SDK Software License Agreement
 # https://docs.nvidia.com/hpc-sdk/eula/index.html
-# accept_eula = True
+accept_eula = True
 source_urls = ['https://developer.download.nvidia.com/hpc-sdk/%(version)s/']
 sources = [local_tarball_tmpl % '%(arch)s']
 checksums = [
     {
         local_tarball_tmpl % 'aarch64':
-            '8d900f798ef806c64993fd4fedf2c2c812dd1ccdbac2a0d33fabcd0cd36f19cf',
+            '8c2ce561d5901a03eadce7f07dce5fbc55e8e88c87b74cf60e01e2eca231c41c',
+        local_tarball_tmpl % 'ppc64le':
+            'e7330eb35e23dcd9b0b3bedc67c0d5443c4fd76b59caa894a76ecb0d17f71f43',
         local_tarball_tmpl % 'x86_64':
-            '30c493350cf67481e84cea60a3a869e01fa0bcb71df8e898266273fbdf0a7f26',
+            '27992e5fd56af8738501830daddc5e9510ebd553326fea8730236fee4f0f1dd8',
     }
 ]
 
-local_gccver = '13.3.0'
+local_gccver = '13.2.0'
 dependencies = [
     ('GCCcore', local_gccver),
-    ('binutils', '2.42', '', ('GCCcore', local_gccver)),
+    ('binutils', '2.40', '', ('GCCcore', local_gccver)),
     # This is necessary to avoid cases where just libnuma.so.1 is present in the system and -lnuma fails
-    ('numactl', '2.0.18', '', ('GCCcore', local_gccver)),
-    ('CUDA', '12.6.0', '', SYSTEM),
+    ('numactl', '2.0.16', '', ('GCCcore', local_gccver)),
+    ('CUDA', '12.4.0', '', SYSTEM),
 ]
 
 module_add_cuda = False
@@ -69,5 +71,10 @@ default_cuda_version = '%(cudaver)s'
 #   module_add_nvshmem = False    # Add NVHPC's NVSHMEM library
 #   module_add_cuda = False       # Add NVHPC's bundled CUDA
 
+modextrapaths = {
+    'LD_LIBRARY_PATH': ['Linux_x86_64/%(version)s/compilers/extras/qd/lib'],
+    'LIBRARY_PATH': ['Linux_x86_64/%(version)s/compilers/extras/qd/lib'],
+}
+
 # this bundle serves as a compiler-only toolchain, so it should be marked as compiler (important for HMNS)
 moduleclass = 'compiler'
Diff against NVHPC-24.1-CUDA-12.3.0.eb

easybuild/easyconfigs/n/NVHPC/NVHPC-24.1-CUDA-12.3.0.eb

diff --git a/easybuild/easyconfigs/n/NVHPC/NVHPC-24.1-CUDA-12.3.0.eb b/easybuild/easyconfigs/n/NVHPC/NVHPC-24.1-CUDA-12.4.0.eb
index 2bab93e8ab..2ecdd1778b 100644
--- a/easybuild/easyconfigs/n/NVHPC/NVHPC-24.1-CUDA-12.3.0.eb
+++ b/easybuild/easyconfigs/n/NVHPC/NVHPC-24.1-CUDA-12.4.0.eb
@@ -10,7 +10,7 @@ toolchain = SYSTEM
 local_tarball_tmpl = 'nvhpc_2024_%%(version_major)s%%(version_minor)s_Linux_%s_cuda_multi.tar.gz'
 # By downloading, you accept the HPC SDK Software License Agreement
 # https://docs.nvidia.com/hpc-sdk/eula/index.html
-# accept_eula = True
+accept_eula = True
 source_urls = ['https://developer.download.nvidia.com/hpc-sdk/%(version)s/']
 sources = [local_tarball_tmpl % '%(arch)s']
 checksums = [
@@ -30,7 +30,7 @@ dependencies = [
     ('binutils', '2.40', '', ('GCCcore', local_gccver)),
     # This is necessary to avoid cases where just libnuma.so.1 is present in the system and -lnuma fails
     ('numactl', '2.0.16', '', ('GCCcore', local_gccver)),
-    ('CUDA', '12.3.0', '', SYSTEM),
+    ('CUDA', '12.4.0', '', SYSTEM),
 ]
 
 module_add_cuda = False
@@ -71,5 +71,10 @@ default_cuda_version = '%(cudaver)s'
 #   module_add_nvshmem = False    # Add NVHPC's NVSHMEM library
 #   module_add_cuda = False       # Add NVHPC's bundled CUDA
 
+modextrapaths = {
+    'LD_LIBRARY_PATH': ['Linux_x86_64/%(version)s/compilers/extras/qd/lib'],
+    'LIBRARY_PATH': ['Linux_x86_64/%(version)s/compilers/extras/qd/lib'],
+}
+
 # this bundle serves as a compiler-only toolchain, so it should be marked as compiler (important for HMNS)
 moduleclass = 'compiler'

Updated software nvofbf-2023b.eb

Diff against nvofbf-2022.07.eb

easybuild/easyconfigs/n/nvofbf/nvofbf-2022.07.eb

diff --git a/easybuild/easyconfigs/n/nvofbf/nvofbf-2022.07.eb b/easybuild/easyconfigs/n/nvofbf/nvofbf-2023b.eb
index 172f0f07ea..5452870505 100644
--- a/easybuild/easyconfigs/n/nvofbf/nvofbf-2022.07.eb
+++ b/easybuild/easyconfigs/n/nvofbf/nvofbf-2023b.eb
@@ -1,7 +1,11 @@
+# This file is an EasyBuild reciPY as per https://easybuilders.github.io/easybuild/
+# Author: Tanmoy Chakraborty (University of Warwick)
+# Email: [email protected]
+
 easyblock = 'Toolchain'
 
 name = 'nvofbf'
-version = '2022.07'
+version = '2023b'
 
 homepage = '(none)'
 description = """NVHPC based toolchain, including OpenMPI for MPI support,
@@ -9,14 +13,14 @@ OpenBLAS (via FlexiBLAS for BLAS and LAPACK support), FFTW and ScaLAPACK."""
 
 toolchain = SYSTEM
 
-local_compiler = ('NVHPC', '22.7-CUDA-11.7.0')
+local_compiler = ('NVHPC', '24.1-CUDA-12.4.0')
 
 local_comp_mpi_tc = ('nvompi', version)
 
 dependencies = [
     local_compiler,
-    ('OpenMPI', '4.1.4', '', local_compiler),
-    ('FlexiBLAS', '3.2.0', '', local_compiler),
+    ('OpenMPI', '4.1.6', '-CUDA-12.4.0', local_compiler),
+    ('FlexiBLAS', '3.3.1', '', local_compiler),
     ('FFTW', '3.3.10', '', local_compiler),
     ('FFTW.MPI', '3.3.10', '', local_comp_mpi_tc),
     ('ScaLAPACK', '2.2.0', '-fb', local_comp_mpi_tc),

Updated software nvompi-2023b.eb

Diff against nvompi-2022.07.eb

easybuild/easyconfigs/n/nvompi/nvompi-2022.07.eb

diff --git a/easybuild/easyconfigs/n/nvompi/nvompi-2022.07.eb b/easybuild/easyconfigs/n/nvompi/nvompi-2023b.eb
index 1a1647cbfa..c7a918f6eb 100644
--- a/easybuild/easyconfigs/n/nvompi/nvompi-2022.07.eb
+++ b/easybuild/easyconfigs/n/nvompi/nvompi-2023b.eb
@@ -1,19 +1,23 @@
+# This file is an EasyBuild reciPY as per https://easybuilders.github.io/easybuild/
+# Author: Tanmoy Chakraborty (University of Warwick)
+# Email: [email protected]
+
 easyblock = 'Toolchain'
 
 name = 'nvompi'
-version = '2022.07'
+version = '2023b'
 
 homepage = '(none)'
 description = 'NVHPC based compiler toolchain, including OpenMPI for MPI support.'
 
 toolchain = SYSTEM
 
-local_compiler = ('NVHPC', '22.7-CUDA-11.7.0')
+local_compiler = ('NVHPC', '24.1-CUDA-12.4.0')
 
 dependencies = [
     local_compiler,
-    ('OpenMPI', '4.1.4', '', local_compiler),
-    ('CUDA', '11.7.0', '', SYSTEM),
+    ('OpenMPI', '4.1.6', '-CUDA-12.4.0', local_compiler),
+    ('CUDA', '12.4.0', '', SYSTEM),
 ]
 
 moduleclass = 'toolchain'

Updated software OpenBLAS-0.3.24-NVHPC-24.1-CUDA-12.4.0.eb

Diff against OpenBLAS-0.3.27-GCC-13.3.0-seq-iface64.eb

easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.27-GCC-13.3.0-seq-iface64.eb

diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.27-GCC-13.3.0-seq-iface64.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-NVHPC-24.1-CUDA-12.4.0.eb
index 5527c667f6..4c4e2945fb 100644
--- a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.27-GCC-13.3.0-seq-iface64.eb
+++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-NVHPC-24.1-CUDA-12.4.0.eb
@@ -1,11 +1,10 @@
 name = 'OpenBLAS'
-version = '0.3.27'
-versionsuffix = '-seq-iface64'
+version = '0.3.24'
 
-homepage = 'https://www.openblas.net/'
+homepage = 'http://www.openblas.net/'
 description = "OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version."
 
-toolchain = {'name': 'GCC', 'version': '13.3.0'}
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
 
 source_urls = [
     # order matters, trying to download the large.tgz/timing.tgz LAPACK tarballs from GitHub causes trouble
@@ -17,41 +16,35 @@ patches = [
     ('large.tgz', '.'),
     ('timing.tgz', '.'),
     'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch',
+    'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch',
     'OpenBLAS-0.3.21_fix-order-vectorization.patch',
-    'OpenBLAS-0.3.26_lapack_qr_noninittest.patch',
-    'OpenBLAS-0.3.27_fix_zscal.patch',
-    'OpenBLAS-0.3.27_riscv-drop-static-fortran-flag.patch',
+    'OpenBLAS-0.3.23_disable-xDRGES-LAPACK-test.patch',
+    'OpenBLAS-0.3.24_fix-czasum.patch',
+    'OpenBLAS-0.3.24_fix-A64FX.patch',
 ]
 checksums = [
-    {'v0.3.27.tar.gz': 'aa2d68b1564fe2b13bc292672608e9cdeeeb6dc34995512e65c3b10f4599e897'},
+    {'v0.3.24.tar.gz': 'ceadc5065da97bd92404cac7254da66cc6eb192679cf1002098688978d4d5132'},
     {'large.tgz': 'f328d88b7fa97722f271d7d0cfea1c220e0f8e5ed5ff01d8ef1eb51d6f4243a1'},
     {'timing.tgz': '999c65f8ea8bd4eac7f1c7f3463d4946917afd20a997807300fe35d70122f3af'},
     {'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch':
      'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'},
+    {'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch':
+     '1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'},
     {'OpenBLAS-0.3.21_fix-order-vectorization.patch':
      '08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'},
-    {'OpenBLAS-0.3.26_lapack_qr_noninittest.patch': '4781bf1d7b239374fd8069e15b4e2c0ef0e8efaa1a7d4c33557bd5b27e5de77c'},
-    {'OpenBLAS-0.3.27_fix_zscal.patch': '9210d7b66538dabaddbe1bfceb16f8225708856f60876ca5561b19d3599f9fd1'},
-    {'OpenBLAS-0.3.27_riscv-drop-static-fortran-flag.patch':
-     'f374e41efffd592ab1c9034df9e7abf1045ed151f4fc0fd0da618ce9826f2d4b'},
+    {'OpenBLAS-0.3.23_disable-xDRGES-LAPACK-test.patch':
+     'ab7e0af05f9b2a2ced32f3875e1e3767d9c3531a455421a38f7324350178a0ff'},
+    {'OpenBLAS-0.3.24_fix-czasum.patch': '8132b87c519fb08caa3bd7291fe8a1d0e1afe6fcb667d16f3020b46122afe20c'},
+    {'OpenBLAS-0.3.24_fix-A64FX.patch': '3712e8c3f0024c7bb327958779c388ad0234ad6d58b7b118e605256ec089964c'},
 ]
 
 builddependencies = [
     ('make', '4.4.1'),
     # required by LAPACK test suite
-    ('Python', '3.12.3'),
+    ('Python', '3.11.5'),
 ]
 
-# INTERFACE64=1 needs if you link OpenBLAS for fortran code compied with 64 bit integers (-i8)
-# This would be in intel library naming convention ilp64
-# The USE_OPENMP=0 and USE_THREAD=0 needs for the single threaded version
-# The USE_LOCKING=1 needs for thread safe version (if threaded software calls OpenBLAS, without it
-# OpenBLAS is not thread safe (so only single threaded software would be able to use it)
-buildopts = "INTERFACE64=1 USE_OPENMP=0 USE_THREAD=0 USE_LOCKING=1 "
-testopts = buildopts
-installopts = buildopts
-
-run_lapack_tests = True
+run_lapack_tests = False
 max_failing_lapack_tests_num_errors = 150
 
 # extensive testing can be enabled by uncommenting the line below
Diff against OpenBLAS-0.3.27-GCC-13.2.0-seq-iface64.eb

easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.27-GCC-13.2.0-seq-iface64.eb

diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.27-GCC-13.2.0-seq-iface64.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-NVHPC-24.1-CUDA-12.4.0.eb
index f205e063da..4c4e2945fb 100644
--- a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.27-GCC-13.2.0-seq-iface64.eb
+++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-NVHPC-24.1-CUDA-12.4.0.eb
@@ -1,11 +1,10 @@
 name = 'OpenBLAS'
-version = '0.3.27'
-versionsuffix = '-seq-iface64'
+version = '0.3.24'
 
-homepage = 'https://www.openblas.net/'
+homepage = 'http://www.openblas.net/'
 description = "OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version."
 
-toolchain = {'name': 'GCC', 'version': '13.2.0'}
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
 
 source_urls = [
     # order matters, trying to download the large.tgz/timing.tgz LAPACK tarballs from GitHub causes trouble
@@ -17,23 +16,26 @@ patches = [
     ('large.tgz', '.'),
     ('timing.tgz', '.'),
     'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch',
+    'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch',
     'OpenBLAS-0.3.21_fix-order-vectorization.patch',
-    'OpenBLAS-0.3.26_lapack_qr_noninittest.patch',
-    'OpenBLAS-0.3.27_fix_zscal.patch',
-    'OpenBLAS-0.3.27_riscv-drop-static-fortran-flag.patch',
+    'OpenBLAS-0.3.23_disable-xDRGES-LAPACK-test.patch',
+    'OpenBLAS-0.3.24_fix-czasum.patch',
+    'OpenBLAS-0.3.24_fix-A64FX.patch',
 ]
 checksums = [
-    {'v0.3.27.tar.gz': 'aa2d68b1564fe2b13bc292672608e9cdeeeb6dc34995512e65c3b10f4599e897'},
+    {'v0.3.24.tar.gz': 'ceadc5065da97bd92404cac7254da66cc6eb192679cf1002098688978d4d5132'},
     {'large.tgz': 'f328d88b7fa97722f271d7d0cfea1c220e0f8e5ed5ff01d8ef1eb51d6f4243a1'},
     {'timing.tgz': '999c65f8ea8bd4eac7f1c7f3463d4946917afd20a997807300fe35d70122f3af'},
     {'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch':
      'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'},
+    {'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch':
+     '1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'},
     {'OpenBLAS-0.3.21_fix-order-vectorization.patch':
      '08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'},
-    {'OpenBLAS-0.3.26_lapack_qr_noninittest.patch': '4781bf1d7b239374fd8069e15b4e2c0ef0e8efaa1a7d4c33557bd5b27e5de77c'},
-    {'OpenBLAS-0.3.27_fix_zscal.patch': '9210d7b66538dabaddbe1bfceb16f8225708856f60876ca5561b19d3599f9fd1'},
-    {'OpenBLAS-0.3.27_riscv-drop-static-fortran-flag.patch':
-     'f374e41efffd592ab1c9034df9e7abf1045ed151f4fc0fd0da618ce9826f2d4b'},
+    {'OpenBLAS-0.3.23_disable-xDRGES-LAPACK-test.patch':
+     'ab7e0af05f9b2a2ced32f3875e1e3767d9c3531a455421a38f7324350178a0ff'},
+    {'OpenBLAS-0.3.24_fix-czasum.patch': '8132b87c519fb08caa3bd7291fe8a1d0e1afe6fcb667d16f3020b46122afe20c'},
+    {'OpenBLAS-0.3.24_fix-A64FX.patch': '3712e8c3f0024c7bb327958779c388ad0234ad6d58b7b118e605256ec089964c'},
 ]
 
 builddependencies = [
@@ -42,16 +44,7 @@ builddependencies = [
     ('Python', '3.11.5'),
 ]
 
-# INTERFACE64=1 needs if you link OpenBLAS for fortran code compied with 64 bit integers (-i8)
-# This would be in intel library naming convention ilp64
-# The USE_OPENMP=0 and USE_THREAD=0 needs for the single threaded version
-# The USE_LOCKING=1 needs for thread safe version (if threaded software calls OpenBLAS, without it
-# OpenBLAS is not thread safe (so only single threaded software would be able to use it)
-buildopts = "INTERFACE64=1 USE_OPENMP=0 USE_THREAD=0 USE_LOCKING=1 "
-testopts = buildopts
-installopts = buildopts
-
-run_lapack_tests = True
+run_lapack_tests = False
 max_failing_lapack_tests_num_errors = 150
 
 # extensive testing can be enabled by uncommenting the line below

Updated software OpenMPI-4.1.6-NVHPC-24.1-CUDA-12.4.0-CUDA-12.4.0.eb

Diff against OpenMPI-5.0.3-GCC-13.3.0.eb

easybuild/easyconfigs/o/OpenMPI/OpenMPI-5.0.3-GCC-13.3.0.eb

diff --git a/easybuild/easyconfigs/o/OpenMPI/OpenMPI-5.0.3-GCC-13.3.0.eb b/easybuild/easyconfigs/o/OpenMPI/OpenMPI-4.1.6-NVHPC-24.1-CUDA-12.4.0-CUDA-12.4.0.eb
index 6864e213a9..54a2f9d9e2 100644
--- a/easybuild/easyconfigs/o/OpenMPI/OpenMPI-5.0.3-GCC-13.3.0.eb
+++ b/easybuild/easyconfigs/o/OpenMPI/OpenMPI-4.1.6-NVHPC-24.1-CUDA-12.4.0-CUDA-12.4.0.eb
@@ -1,38 +1,77 @@
 name = 'OpenMPI'
-version = '5.0.3'
+version = '4.1.6'
+versionsuffix = '-CUDA-12.4.0'
 
 homepage = 'https://www.open-mpi.org/'
 description = """The Open MPI Project is an open source MPI-3 implementation."""
 
-toolchain = {'name': 'GCC', 'version': '13.3.0'}
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
+toolchainopts = {'pic': True}
 
 source_urls = ['https://www.open-mpi.org/software/ompi/v%(version_major_minor)s/downloads']
 sources = [SOURCELOWER_TAR_BZ2]
-patches = [('OpenMPI-5.0.2_build-with-internal-cuda-header.patch', 1)]
+patches = [
+    'OpenMPI-4.1.1_build-with-internal-cuda-header.patch',
+    'OpenMPI-4.1.1_opal-datatype-cuda-performance.patch',
+    'OpenMPI-4.1.x_add_atomic_wmb.patch',
+]
 checksums = [
-    {'openmpi-5.0.3.tar.bz2': '990582f206b3ab32e938aa31bbf07c639368e4405dca196fabe7f0f76eeda90b'},
-    {'OpenMPI-5.0.2_build-with-internal-cuda-header.patch':
-     'f52dc470543f35efef10d651dd159c771ae25f8f76a420d20d87abf4dc769ed7'},
+    {'openmpi-4.1.6.tar.bz2': 'f740994485516deb63b5311af122c265179f5328a0d857a567b85db00b11e415'},
+    {'OpenMPI-4.1.1_build-with-internal-cuda-header.patch':
+     '63eac52736bdf7644c480362440a7f1f0ae7c7cae47b7565f5635c41793f8c83'},
+    {'OpenMPI-4.1.1_opal-datatype-cuda-performance.patch':
+     'b767c7166cf0b32906132d58de5439c735193c9fd09ec3c5c11db8d5fa68750e'},
+    {'OpenMPI-4.1.x_add_atomic_wmb.patch': '9494bbc546d661ba5189e44b4c84a7f8df30a87cdb9d96ce2e73a7c8fecba172'},
 ]
 
 builddependencies = [
-    ('pkgconf', '2.2.0'),
-    ('Autotools', '20231222'),
+    ('pkgconf', '2.0.3'),
+    ('Perl', '5.38.0'),
+    ('Autotools', '20220317'),
 ]
 
 dependencies = [
-    ('zlib', '1.3.1'),
-    ('hwloc', '2.10.0'),
+    ('zlib', '1.2.13'),
+    ('hwloc', '2.9.2'),
     ('libevent', '2.1.12'),
-    ('UCX', '1.16.0'),
-    ('libfabric', '1.21.0'),
-    ('PMIx', '5.0.2'),
-    ('PRRTE', '3.0.5'),
-    ('UCC', '1.3.0'),
+    ('UCX', '1.15.0'),
+    ('libfabric', '1.19.0'),
+    ('PMIx', '4.2.6'),
+    ('UCC', '1.2.0'),
+    ('UCC-CUDA', '1.2.0', '-CUDA-%(cudaver)s'),
 ]
 
+# Update configure to include changes from the "internal-cuda" patch
+# by running a subset of autogen.pl sufficient to achieve this
+# without doing the full, long-running regeneration.
+preconfigopts = ' && '.join([
+    'cd config',
+    'autom4te --language=m4sh opal_get_version.m4sh -o opal_get_version.sh',
+    'cd ..',
+    'autoconf',
+    'autoheader',
+    'aclocal',
+    'automake',
+    ''
+])
+
+cuda_compute_capabilities = ['8.0', '8.9']
+
 # CUDA related patches and custom configure option can be removed if CUDA support isn't wanted.
-preconfigopts = 'gcc -Iopal/mca/cuda/include -shared opal/mca/cuda/lib/cuda.c -o opal/mca/cuda/lib/libcuda.so && '
-configopts = '--with-cuda=%(start_dir)s/opal/mca/cuda --with-show-load-errors=no '
+configopts = '--with-cuda=$EBROOTCUDACORE '
+
+# disable MPI1 compatibility for now, see what breaks...
+# configopts += '--enable-mpi1-compatibility '
+
+# to enable SLURM integration (site-specific)
+# configopts += '--with-slurm --with-pmi=/usr/include/slurm --with-pmi-libdir=/usr'
+
+configopts += '--with-slurm '
+
+modextravars = {
+    'OMPI_MCA_btl': '^ofi',
+    'OMPI_MCA_mtl': '^ofi',
+    'SLURM_MPI_TYPE': 'pmix',
+}
 
 moduleclass = 'mpi'
Diff against OpenMPI-4.1.6-GCC-13.2.0.eb

easybuild/easyconfigs/o/OpenMPI/OpenMPI-4.1.6-GCC-13.2.0.eb

diff --git a/easybuild/easyconfigs/o/OpenMPI/OpenMPI-4.1.6-GCC-13.2.0.eb b/easybuild/easyconfigs/o/OpenMPI/OpenMPI-4.1.6-NVHPC-24.1-CUDA-12.4.0-CUDA-12.4.0.eb
index 831148339a..54a2f9d9e2 100644
--- a/easybuild/easyconfigs/o/OpenMPI/OpenMPI-4.1.6-GCC-13.2.0.eb
+++ b/easybuild/easyconfigs/o/OpenMPI/OpenMPI-4.1.6-NVHPC-24.1-CUDA-12.4.0-CUDA-12.4.0.eb
@@ -1,10 +1,12 @@
 name = 'OpenMPI'
 version = '4.1.6'
+versionsuffix = '-CUDA-12.4.0'
 
 homepage = 'https://www.open-mpi.org/'
 description = """The Open MPI Project is an open source MPI-3 implementation."""
 
-toolchain = {'name': 'GCC', 'version': '13.2.0'}
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
+toolchainopts = {'pic': True}
 
 source_urls = ['https://www.open-mpi.org/software/ompi/v%(version_major_minor)s/downloads']
 sources = [SOURCELOWER_TAR_BZ2]
@@ -36,6 +38,7 @@ dependencies = [
     ('libfabric', '1.19.0'),
     ('PMIx', '4.2.6'),
     ('UCC', '1.2.0'),
+    ('UCC-CUDA', '1.2.0', '-CUDA-%(cudaver)s'),
 ]
 
 # Update configure to include changes from the "internal-cuda" patch
@@ -52,8 +55,10 @@ preconfigopts = ' && '.join([
     ''
 ])
 
+cuda_compute_capabilities = ['8.0', '8.9']
+
 # CUDA related patches and custom configure option can be removed if CUDA support isn't wanted.
-configopts = '--with-cuda=internal '
+configopts = '--with-cuda=$EBROOTCUDACORE '
 
 # disable MPI1 compatibility for now, see what breaks...
 # configopts += '--enable-mpi1-compatibility '
@@ -61,4 +66,12 @@ configopts = '--with-cuda=internal '
 # to enable SLURM integration (site-specific)
 # configopts += '--with-slurm --with-pmi=/usr/include/slurm --with-pmi-libdir=/usr'
 
+configopts += '--with-slurm '
+
+modextravars = {
+    'OMPI_MCA_btl': '^ofi',
+    'OMPI_MCA_mtl': '^ofi',
+    'SLURM_MPI_TYPE': 'pmix',
+}
+
 moduleclass = 'mpi'

Updated software ScaLAPACK-2.2.0-nvompi-2023b-fb.eb

Diff against ScaLAPACK-2.2.0-gompi-2024a-fb.eb

easybuild/easyconfigs/s/ScaLAPACK/ScaLAPACK-2.2.0-gompi-2024a-fb.eb

diff --git a/easybuild/easyconfigs/s/ScaLAPACK/ScaLAPACK-2.2.0-gompi-2024a-fb.eb b/easybuild/easyconfigs/s/ScaLAPACK/ScaLAPACK-2.2.0-nvompi-2023b-fb.eb
index 1bccc16f38..2c34c5f352 100644
--- a/easybuild/easyconfigs/s/ScaLAPACK/ScaLAPACK-2.2.0-gompi-2024a-fb.eb
+++ b/easybuild/easyconfigs/s/ScaLAPACK/ScaLAPACK-2.2.0-nvompi-2023b-fb.eb
@@ -6,7 +6,7 @@ homepage = 'https://www.netlib.org/scalapack/'
 description = """The ScaLAPACK (or Scalable LAPACK) library includes a subset of LAPACK routines
  redesigned for distributed memory MIMD parallel computers."""
 
-toolchain = {'name': 'gompi', 'version': '2024a'}
+toolchain = {'name': 'nvompi', 'version': '2023b'}
 toolchainopts = {'extra_fflags': '-lpthread', 'openmp': True, 'pic': True, 'usempi': True}
 
 source_urls = [homepage]
@@ -18,11 +18,11 @@ checksums = [
 ]
 
 builddependencies = [
-    ('CMake', '3.29.3'),
+    ('CMake', '3.27.6'),
 ]
 
 dependencies = [
-    ('FlexiBLAS', '3.4.4'),
+    ('FlexiBLAS', '3.3.1'),
 ]
 
 # Config Opts based on AOCL User Guide:
Diff against ScaLAPACK-2.2.0-gmpich-2024.06-fb.eb

easybuild/easyconfigs/s/ScaLAPACK/ScaLAPACK-2.2.0-gmpich-2024.06-fb.eb

diff --git a/easybuild/easyconfigs/s/ScaLAPACK/ScaLAPACK-2.2.0-gmpich-2024.06-fb.eb b/easybuild/easyconfigs/s/ScaLAPACK/ScaLAPACK-2.2.0-nvompi-2023b-fb.eb
index 28d4f766d5..2c34c5f352 100644
--- a/easybuild/easyconfigs/s/ScaLAPACK/ScaLAPACK-2.2.0-gmpich-2024.06-fb.eb
+++ b/easybuild/easyconfigs/s/ScaLAPACK/ScaLAPACK-2.2.0-nvompi-2023b-fb.eb
@@ -6,7 +6,7 @@ homepage = 'https://www.netlib.org/scalapack/'
 description = """The ScaLAPACK (or Scalable LAPACK) library includes a subset of LAPACK routines
  redesigned for distributed memory MIMD parallel computers."""
 
-toolchain = {'name': 'gmpich', 'version': '2024.06'}
+toolchain = {'name': 'nvompi', 'version': '2023b'}
 toolchainopts = {'extra_fflags': '-lpthread', 'openmp': True, 'pic': True, 'usempi': True}
 
 source_urls = [homepage]
@@ -18,7 +18,7 @@ checksums = [
 ]
 
 builddependencies = [
-    ('CMake', '3.26.3'),
+    ('CMake', '3.27.6'),
 ]
 
 dependencies = [

Updated software UCC-CUDA-1.2.0-GCCcore-13.2.0-CUDA-12.4.0.eb

Diff against UCC-CUDA-1.2.0-GCCcore-12.3.0-CUDA-12.1.1.eb

easybuild/easyconfigs/u/UCC-CUDA/UCC-CUDA-1.2.0-GCCcore-12.3.0-CUDA-12.1.1.eb

diff --git a/easybuild/easyconfigs/u/UCC-CUDA/UCC-CUDA-1.2.0-GCCcore-12.3.0-CUDA-12.1.1.eb b/easybuild/easyconfigs/u/UCC-CUDA/UCC-CUDA-1.2.0-GCCcore-13.2.0-CUDA-12.4.0.eb
index 8594d50984..dc0b38a7c5 100644
--- a/easybuild/easyconfigs/u/UCC-CUDA/UCC-CUDA-1.2.0-GCCcore-12.3.0-CUDA-12.1.1.eb
+++ b/easybuild/easyconfigs/u/UCC-CUDA/UCC-CUDA-1.2.0-GCCcore-13.2.0-CUDA-12.4.0.eb
@@ -8,11 +8,9 @@ homepage = 'https://www.openucx.org/'
 description = """UCC (Unified Collective Communication) is a collective
 communication operations API and library that is flexible, complete, and 
 feature-rich for current and emerging programming models and runtimes.
-
-This module adds the UCC CUDA support.
 """
 
-toolchain = {'name': 'GCCcore', 'version': '12.3.0'}
+toolchain = {'name': 'GCCcore', 'version': '13.2.0'}
 toolchainopts = {'pic': True}
 
 source_urls = ['https://github.com/openucx/ucc/archive/refs/tags']
@@ -33,9 +31,9 @@ builddependencies = [
 
 dependencies = [
     ('UCC', version),
-    ('CUDA',  '12.1.1', '', SYSTEM),
-    ('UCX-CUDA', '1.14.1', '-CUDA-%(cudaver)s'),
-    ('NCCL', '2.18.3', '-CUDA-%(cudaver)s'),
+    ('CUDA',  '12.4.0', '', SYSTEM),
+    ('UCX-CUDA', '1.15.0', '-CUDA-%(cudaver)s'),
+    ('NCCL', '2.20.5', '-CUDA-%(cudaver)s'),
 ]
 
 preconfigopts = "./autogen.sh && "
@@ -52,4 +50,6 @@ sanity_check_commands = ["ucc_info -c"]
 
 modextrapaths = {'EB_UCC_EXTRA_COMPONENT_PATH': 'lib/ucc'}
 
+cuda_compute_capabilities = ['8.0', '8.9']
+
 moduleclass = 'lib'
Diff against UCC-CUDA-1.1.0-GCCcore-12.2.0-CUDA-12.0.0.eb

easybuild/easyconfigs/u/UCC-CUDA/UCC-CUDA-1.1.0-GCCcore-12.2.0-CUDA-12.0.0.eb

diff --git a/easybuild/easyconfigs/u/UCC-CUDA/UCC-CUDA-1.1.0-GCCcore-12.2.0-CUDA-12.0.0.eb b/easybuild/easyconfigs/u/UCC-CUDA/UCC-CUDA-1.2.0-GCCcore-13.2.0-CUDA-12.4.0.eb
index bfe211063d..dc0b38a7c5 100644
--- a/easybuild/easyconfigs/u/UCC-CUDA/UCC-CUDA-1.1.0-GCCcore-12.2.0-CUDA-12.0.0.eb
+++ b/easybuild/easyconfigs/u/UCC-CUDA/UCC-CUDA-1.2.0-GCCcore-13.2.0-CUDA-12.4.0.eb
@@ -1,43 +1,39 @@
 easyblock = 'ConfigureMake'
 
 name = 'UCC-CUDA'
-version = '1.1.0'
+version = '1.2.0'
 versionsuffix = '-CUDA-%(cudaver)s'
 
 homepage = 'https://www.openucx.org/'
 description = """UCC (Unified Collective Communication) is a collective
 communication operations API and library that is flexible, complete, and 
 feature-rich for current and emerging programming models and runtimes.
-
-This module adds the UCC CUDA support.
 """
 
-toolchain = {'name': 'GCCcore', 'version': '12.2.0'}
+toolchain = {'name': 'GCCcore', 'version': '13.2.0'}
 toolchainopts = {'pic': True}
 
 source_urls = ['https://github.com/openucx/ucc/archive/refs/tags']
 sources = ['v%(version)s.tar.gz']
 patches = [
-    '%(name)s-1.0.0_link_against_existing_UCC_libs.patch',
-    '%(name)s-%(version)s_cuda_12_mem_ops.patch',
+    '%(name)s-%(version)s_link_against_existing_UCC_libs.patch',
 ]
 checksums = [
-    {'v1.1.0.tar.gz': '74c8ba75037b5bd88cb703e8c8ae55639af3fecfd4428912a433c010c97b4df7'},
-    {'UCC-CUDA-1.0.0_link_against_existing_UCC_libs.patch':
-     '9fa11cf6779174f4e9048df5812096e4261e1769d465cc7f34a6354398876856'},
-    {'UCC-CUDA-1.1.0_cuda_12_mem_ops.patch': 'fc3ea1487d29dc626db2363ef5a79e7f0906f6a7507a363fa6167a812b143eb6'},
+    {'v1.2.0.tar.gz': 'c1552797600835c0cf401b82dc89c4d27d5717f4fb805d41daca8e19f65e509d'},
+    {'UCC-CUDA-1.2.0_link_against_existing_UCC_libs.patch':
+     '84157be5eae96d2501df076bcf0598b104adf80abeca028a144c4fb098638207'},
 ]
 
 builddependencies = [
-    ('binutils', '2.39'),
+    ('binutils', '2.40'),
     ('Autotools', '20220317'),
 ]
 
 dependencies = [
-    ('UCC', '1.1.0'),
-    ('CUDA',  '12.0.0', '', SYSTEM),
-    ('UCX-CUDA', '1.13.1', '-CUDA-%(cudaver)s'),
-    ('NCCL', '2.16.2', '-CUDA-%(cudaver)s'),
+    ('UCC', version),
+    ('CUDA',  '12.4.0', '', SYSTEM),
+    ('UCX-CUDA', '1.15.0', '-CUDA-%(cudaver)s'),
+    ('NCCL', '2.20.5', '-CUDA-%(cudaver)s'),
 ]
 
 preconfigopts = "./autogen.sh && "
@@ -54,4 +50,6 @@ sanity_check_commands = ["ucc_info -c"]
 
 modextrapaths = {'EB_UCC_EXTRA_COMPONENT_PATH': 'lib/ucc'}
 
+cuda_compute_capabilities = ['8.0', '8.9']
+
 moduleclass = 'lib'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant