Skip to content

Commit

Permalink
Merge branch '2023.06-software.eessi.io' into ALL_0.9.2_2023a
Browse files Browse the repository at this point in the history
  • Loading branch information
bedroge authored Dec 23, 2023
2 parents 203cbd1 + 44b563c commit 61de6f2
Show file tree
Hide file tree
Showing 13 changed files with 326 additions and 68 deletions.
91 changes: 48 additions & 43 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -189,63 +189,68 @@ pr_diff=$(ls [0-9]*.diff | head -1)

# install any additional required scripts
# order is important: these are needed to install a full CUDA SDK in host_injections
install_scripts_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^install_scripts.sh$' > /dev/null; echo $?)
if [ ${install_scripts_changed} == '0' ]; then
# for now, this just reinstalls all scripts. Note the most elegant, but works
${TOPDIR}/install_scripts.sh --prefix ${EESSI_CVMFS_REPO}
fi
# for now, this just reinstalls all scripts. Note the most elegant, but works
${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}

# Install full CUDA SDK in host_injections
# Hardcode this for now, see if it works
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
${EESSI_CVMFS_REPO}/gpu_support/nvidia/install_cuda_host_injections.sh 12.1.1
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula

# Install drivers in host_injections
${EESSI_CVMFS_REPO}/gpu_support/nvidia/link_nvidia_host_libraries.sh
# TODO: this is commented out for now, because the script assumes that nvidia-smi is available and works;
# if not, an error is produced, and the bot flags the whole build as failed (even when not installing GPU software)
# ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh

# use PR patch file to determine in which easystack files stuff was added
for easystack_file in $(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing'); do

echo -e "Processing easystack file ${easystack_file}...\n\n"

# determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file
eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g')

# load EasyBuild module (will be installed if it's not available yet)
source ${TOPDIR}/load_easybuild_module.sh ${eb_version}

${EB} --show-config

echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..."

if [ -f ${easystack_file} ]; then
echo_green "Feeding easystack file ${easystack_file} to EasyBuild..."

${EB} --easystack ${TOPDIR}/${easystack_file} --robot
ec=$?

# copy EasyBuild log file if EasyBuild exited with an error
if [ ${ec} -ne 0 ]; then
eb_last_log=$(unset EB_VERBOSE; eb --last-log)
# copy to current working directory
cp -a ${eb_last_log} .
echo "Last EasyBuild log file copied from ${eb_last_log} to ${PWD}"
# copy to build logs dir (with context added)
copy_build_log "${eb_last_log}" "${build_logs_dir}"
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing')
if [ -z ${changed_easystacks} ]; then
echo "No missing installations, party time!" # Ensure the bot report success, as there was nothing to be build here
else
for easystack_file in ${changed_easystacks}; do

echo -e "Processing easystack file ${easystack_file}...\n\n"

# determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file
eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g')

# load EasyBuild module (will be installed if it's not available yet)
source ${TOPDIR}/load_easybuild_module.sh ${eb_version}

${EB} --show-config

echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..."

if [ -f ${easystack_file} ]; then
echo_green "Feeding easystack file ${easystack_file} to EasyBuild..."

${EB} --easystack ${TOPDIR}/${easystack_file} --robot
ec=$?

# copy EasyBuild log file if EasyBuild exited with an error
if [ ${ec} -ne 0 ]; then
eb_last_log=$(unset EB_VERBOSE; eb --last-log)
# copy to current working directory
cp -a ${eb_last_log} .
echo "Last EasyBuild log file copied from ${eb_last_log} to ${PWD}"
# copy to build logs dir (with context added)
copy_build_log "${eb_last_log}" "${build_logs_dir}"
fi

$TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file}
else
fatal_error "Easystack file ${easystack_file} not found!"
fi

$TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file}
else
fatal_error "Easystack file ${easystack_file} not found!"
fi

done

done
fi

### add packages here

echo ">> Creating/updating Lmod cache..."
export LMOD_RC="${EASYBUILD_INSTALLPATH}/.lmod/lmodrc.lua"
if [ ! -f $LMOD_RC ]; then
lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?)
if [ ! -f $LMOD_RC ] || [ ${lmodrc_changed} == '0' ]; then
python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH}
check_exit_code $? "$LMOD_RC created" "Failed to create $LMOD_RC"
fi
Expand Down
5 changes: 5 additions & 0 deletions bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR}
declare -a BUILD_STEP_ARGS=()
BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}")
BUILD_STEP_ARGS+=("--storage" "${STORAGE}")
# add options required to handle NVIDIA support
BUILD_STEP_ARGS+=("--nvidia" "all")
if [[ ! -z ${SHARED_FS_PATH} ]]; then
BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections")
fi

# prepare arguments to install_software_layer.sh (specific to build step)
declare -a INSTALL_SCRIPT_ARGS=()
Expand Down
80 changes: 80 additions & 0 deletions create_lmodrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,85 @@
}
"""

GPU_LMOD_RC ="""require("strict")
local hook = require("Hook")
local open = io.open
local function read_file(path)
local file = open(path, "rb") -- r read mode and b binary mode
if not file then return nil end
local content = file:read "*a" -- *a or *all reads the whole file
file:close()
return content
end
local function cuda_enabled_load_hook(t)
local frameStk = require("FrameStk"):singleton()
local mt = frameStk:mt()
local simpleName = string.match(t.modFullName, "(.-)/")
-- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections.
-- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse
-- to load the CUDA module and print an informative message on how to set up GPU support for EESSI
local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n"
if simpleName == 'CUDA' then
-- get the full host_injections path
local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
-- build final path where the CUDA software should be installed
local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
local cudaDirExists = isDir(cudaEasyBuildDir)
if not cudaDirExists then
local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI "
advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI "
advice = advice .. "can find it.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYou requested to load ", simpleName, " ", advice)
end
end
-- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker,
-- otherwise, refuse to load the requested module and print error message
local haveGpu = mt:haveProperty(simpleName,"arch","gpu")
if haveGpu then
local arch = os.getenv("EESSI_CPU_FAMILY") or ""
local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt"
local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so"
local cudaDriverExists = isFile(cudaDriverFile)
local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so")
if not (cudaDriverExists or singularityCudaExists) then
local advice = "which relies on the CUDA runtime environment and driver libraries. "
advice = advice .. "In order to be able to use the module, you will need "
advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYou requested to load ", simpleName, " ", advice)
else
-- CUDA driver exists, now we check its version to see if an update is needed
if cudaDriverExists then
local cudaVersion = read_file(cudaVersionFile)
local cudaVersion_req = os.getenv("EESSICUDAVERSION")
-- driver CUDA versions don't give a patch version for CUDA
local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)")
local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)")
local driver_libs_need_update = false
if major < major_req then
driver_libs_need_update = true
elseif major == major_req then
if minor < minor_req then
driver_libs_need_update = true
end
end
if driver_libs_need_update == true then
local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". "
advice = advice .. "Please update your CUDA driver libraries and then "
advice = advice .. "let EESSI know about the update.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice)
end
end
end
end
end
hook.register("load", cuda_enabled_load_hook)
"""

def error(msg):
sys.stderr.write("ERROR: %s\n" % msg)
Expand All @@ -36,6 +115,7 @@ def error(msg):
'dot_lmod': DOT_LMOD,
'prefix': prefix,
}
lmodrc_txt += '\n' + GPU_LMOD_RC
try:
os.makedirs(os.path.dirname(lmodrc_path), exist_ok=True)
with open(lmodrc_path, 'w') as fp:
Expand Down
15 changes: 14 additions & 1 deletion create_tarball.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,23 @@ echo ">> Collecting list of files/directories to include in tarball via ${PWD}..
files_list=${tmpdir}/files.list.txt
module_files_list=${tmpdir}/module_files.list.txt

# include Lmod cache and configuration file (lmodrc.lua),
if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then
# skip whiteout files and backup copies of Lmod cache (spiderT.old.*)
find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' >> ${files_list}
fi

if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then
# include Lmod cache and configuration file (lmodrc.lua),
# skip whiteout files and backup copies of Lmod cache (spiderT.old.*)
find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' > ${files_list}
find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' >> ${files_list}
fi

# include scripts that were copied by install_scripts.sh, which we want to ship in EESSI repository
if [ -d ${eessi_version}/scripts ]; then
find ${eessi_version}/scripts -type f | grep -v '/\.wh\.' >> ${files_list}
fi

if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules ]; then
# module files
find ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules -type f | grep -v '/\.wh\.' >> ${files_list}
Expand All @@ -55,6 +67,7 @@ if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules ]; then
| grep -v '/\.wh\.' | grep -v '/\.modulerc\.lua' | sed -e 's/.lua$//' | sed -e 's@.*/modules/all/@@g' | sort -u \
>> ${module_files_list}
fi

if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/software -a -r ${module_files_list} ]; then
# installation directories but only those for which module files were created
# Note, we assume that module names (as defined by 'PACKAGE_NAME/VERSION.lua'
Expand Down
7 changes: 7 additions & 0 deletions easystacks/software.eessi.io/2023.06/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
File naming matters, since it determines the order in which easystack files are processed.

Software installed with system toolchain should be installed first,
this includes EasyBuild itself, see `eessi-2023.06-eb-4.8.2-001-system.yml` .

CUDA installations must be done before CUDA is required as dependency for something
built with a non-system toolchain, see `eessi-2023.06-eb-4.8.2-010-CUDA.yml` .
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
easyconfigs:
- CUDA-12.1.1.eb:
options:
include-easyblocks-from-pr: 3045
accept-eula-for: CUDA
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ easyconfigs:
- Boost-1.82.0-GCC-12.3.0.eb
- netCDF-4.9.2-gompi-2023a.eb
- FFmpeg-6.0-GCCcore-12.3.0.eb
- CUDA-Samples-12.1-GCC-12.3.0-CUDA-12.1.1.eb:
# use easyconfig that only install subset of CUDA samples,
# to circumvent problem with nvcc linking to glibc of host OS,
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/19189;
# and where additional samples are excluded because they fail to build on aarch64,
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451;
options:
from-pr: 19451
- ALL-0.9.2-foss-2023a.eb:
options:
from-pr: 19455
Loading

0 comments on commit 61de6f2

Please sign in to comment.