forked from EESSI/software-layer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
EESSI-install-software.sh
executable file
·331 lines (280 loc) · 12.8 KB
/
EESSI-install-software.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
#!/bin/bash
#
# Script to install EESSI software stack (version set through init/eessi_defaults)
# see example parsing of command line arguments at
# https://wiki.bash-hackers.org/scripting/posparams#using_a_while_loop
# https://stackoverflow.com/questions/192249/how-do-i-parse-command-line-arguments-in-bash
display_help() {
echo "usage: $0 [OPTIONS]"
echo " --build-logs-dir - location to copy EasyBuild logs to for failed builds"
echo " -g | --generic - instructs script to build for generic architecture target"
echo " -h | --help - display this usage information"
echo " -x | --http-proxy URL - provides URL for the environment variable http_proxy"
echo " -y | --https-proxy URL - provides URL for the environment variable https_proxy"
echo " --shared-fs-path - path to directory on shared filesystem that can be used"
echo " --skip-cuda-install - disable installing a full CUDA SDK in the host_injections prefix (e.g. in CI)"
}
# Function to check if a command exists
function command_exists() {
command -v "$1" >/dev/null 2>&1
}
function copy_build_log() {
# copy specified build log to specified directory, with some context added
build_log=${1}
build_logs_dir=${2}
# also copy to build logs directory, if specified
if [ ! -z "${build_logs_dir}" ]; then
log_filename="$(basename ${build_log})"
if [ ! -z "${SLURM_JOB_ID}" ]; then
# use subdirectory for build log in context of a Slurm job
build_log_path="${build_logs_dir}/jobs/${SLURM_JOB_ID}/${log_filename}"
else
build_log_path="${build_logs_dir}/non-jobs/${log_filename}"
fi
mkdir -p $(dirname ${build_log_path})
cp -a ${build_log} ${build_log_path}
chmod 0644 ${build_log_path}
# add context to end of copied log file
echo >> ${build_log_path}
echo "Context from which build log was copied:" >> ${build_log_path}
echo "- original path of build log: ${build_log}" >> ${build_log_path}
echo "- working directory: ${PWD}" >> ${build_log_path}
echo "- Slurm job ID: ${SLURM_OUT}" >> ${build_log_path}
echo "- EasyBuild version: ${eb_version}" >> ${build_log_path}
echo "- easystack file: ${easystack_file}" >> ${build_log_path}
echo "EasyBuild log file ${build_log} copied to ${build_log_path} (with context appended)"
fi
}
POSITIONAL_ARGS=()
while [[ $# -gt 0 ]]; do
case $1 in
-g|--generic)
EASYBUILD_OPTARCH="GENERIC"
shift
;;
-h|--help)
display_help # Call your function
# no shifting needed here, we're done.
exit 0
;;
-x|--http-proxy)
export http_proxy="$2"
shift 2
;;
-y|--https-proxy)
export https_proxy="$2"
shift 2
;;
--build-logs-dir)
export build_logs_dir="${2}"
shift 2
;;
--shared-fs-path)
export shared_fs_path="${2}"
shift 2
;;
--skip-cuda-install)
export skip_cuda_install=True
shift 1
;;
-*|--*)
echo "Error: Unknown option: $1" >&2
exit 1
;;
*) # No more options
POSITIONAL_ARGS+=("$1") # save positional arg
shift
;;
esac
done
set -- "${POSITIONAL_ARGS[@]}"
TOPDIR=$(dirname $(realpath $0))
source $TOPDIR/scripts/utils.sh
# honor $TMPDIR if it is already defined, use /tmp otherwise
if [ -z $TMPDIR ]; then
export WORKDIR=/tmp/$USER
else
export WORKDIR=$TMPDIR/$USER
fi
TMPDIR=$(mktemp -d)
echo ">> Setting up environment..."
source $TOPDIR/init/minimal_eessi_env
if [ -d $EESSI_CVMFS_REPO ]; then
echo_green "$EESSI_CVMFS_REPO available, OK!"
else
fatal_error "$EESSI_CVMFS_REPO is not available!"
fi
# make sure we're in Prefix environment by checking $SHELL
if [[ ${SHELL} = ${EPREFIX}/bin/bash ]]; then
echo_green ">> It looks like we're in a Gentoo Prefix environment, good!"
else
fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!"
fi
# avoid that pyc files for EasyBuild are stored in EasyBuild installation directory
export PYTHONPYCACHEPREFIX=$TMPDIR/pycache
DETECTION_PARAMETERS=''
GENERIC=0
EB='eb'
if [[ "$EASYBUILD_OPTARCH" == "GENERIC" ]]; then
echo_yellow ">> GENERIC build requested, taking appropriate measures!"
DETECTION_PARAMETERS="$DETECTION_PARAMETERS --generic"
GENERIC=1
EB='eb --optarch=GENERIC'
fi
echo ">> Determining software subdirectory to use for current build host..."
if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then
export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS)
echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script"
else
echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}"
# make sure directory exists (since it's expected by init/eessi_environment_variables when using archdetect)
mkdir -p ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
fi
# if we run the script for the first time, e.g., to start building for a new
# stack, we need to ensure certain files are present in
# ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
# - .lmod/lmodrc.lua
# - .lmod/SitePackage.lua
_eessi_software_path=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
_lmod_cfg_dir=${_eessi_software_path}/.lmod
_lmod_rc_file=${_lmod_cfg_dir}/lmodrc.lua
if [ ! -f ${_lmod_rc_file} ]; then
command -V python3
python3 ${TOPDIR}/create_lmodrc.py ${_eessi_software_path}
fi
_lmod_sitepackage_file=${_lmod_cfg_dir}/SitePackage.lua
if [ ! -f ${_lmod_sitepackage_file} ]; then
command -V python3
python3 ${TOPDIR}/create_lmodsitepackage.py ${_eessi_software_path}
fi
# Set all the EESSI environment variables (respecting $EESSI_SOFTWARE_SUBDIR_OVERRIDE)
# $EESSI_SILENT - don't print any messages
# $EESSI_BASIC_ENV - give a basic set of environment variables
EESSI_SILENT=1 EESSI_BASIC_ENV=1 source $TOPDIR/init/eessi_environment_variables
if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then
fatal_error "Failed to determine software subdirectory?!"
elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then
fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!"
else
echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!"
fi
echo ">> Initializing Lmod..."
source $EPREFIX/usr/share/Lmod/init/bash
ml_version_out=$TMPDIR/ml.out
ml --version &> $ml_version_out
if [[ $? -eq 0 ]]; then
echo_green ">> Found Lmod ${LMOD_VERSION}"
else
fatal_error "Failed to initialize Lmod?! (see output in ${ml_version_out}"
fi
echo ">> Configuring EasyBuild..."
source $TOPDIR/configure_easybuild
if [ ! -z "${shared_fs_path}" ]; then
shared_eb_sourcepath=${shared_fs_path}/easybuild/sources
echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path"
export EASYBUILD_SOURCEPATH=${shared_eb_sourcepath}:${EASYBUILD_SOURCEPATH}
fi
echo ">> Setting up \$MODULEPATH..."
# make sure no modules are loaded
module --force purge
# ignore current $MODULEPATH entirely
module unuse $MODULEPATH
# if an accelerator target is specified, we need to make sure that the CPU-only modules are also still available
if [ ! -z ${EESSI_ACCELERATOR_TARGET} ]; then
CPU_ONLY_MODULES_PATH=$(echo $EASYBUILD_INSTALLPATH | sed "s@/accel/${EESSI_ACCELERATOR_TARGET}@@g")/modules/all
if [ -d ${CPU_ONLY_MODULES_PATH} ]; then
module use ${CPU_ONLY_MODULES_PATH}
else
fatal_error "Derived path to CPU-only modules does not exist: ${CPU_ONLY_MODULES_PATH}"
fi
fi
module use $EASYBUILD_INSTALLPATH/modules/all
if [[ -z ${MODULEPATH} ]]; then
fatal_error "Failed to set up \$MODULEPATH?!"
else
echo_green ">> MODULEPATH set up: ${MODULEPATH}"
fi
# assume there's only one diff file that corresponds to the PR patch file
pr_diff=$(ls [0-9]*.diff | head -1)
# install any additional required scripts
# order is important: these are needed to install a full CUDA SDK in host_injections
# for now, this just reinstalls all scripts. Note the most elegant, but works
${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}
# Install full CUDA SDK in host_injections
# Hardcode this for now, see if it works
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
# Allow skipping CUDA SDK install in e.g. CI environments
# The install_cuda... script uses EasyBuild. So, we need to check if we have EB
# or skip this step.
module_avail_out=$TMPDIR/ml.out
module avail 2>&1 | grep EasyBuild &> ${module_avail_out}
if [[ $? -eq 0 ]]; then
echo_green ">> Found an EasyBuild module"
else
echo_yellow ">> No EasyBuild module found: skipping step to install CUDA (see output in ${module_avail_out})"
export skip_cuda_install=True
fi
if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula
else
echo "Skipping installation of CUDA SDK in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found"
fi
# Install NVIDIA drivers in host_injections (if they exist)
if command_exists "nvidia-smi"; then
echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..."
${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
fi
# use PR patch file to determine in which easystack files stuff was added
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing')
if [ -z "${changed_easystacks}" ]; then
echo "No missing installations, party time!" # Ensure the bot report success, as there was nothing to be build here
else
# first process rebuilds, if any, then easystack files for new installations
# "|| true" is used to make sure that the grep command always returns success
rebuild_easystacks=$(echo "${changed_easystacks}" | (grep "/rebuilds/" || true))
new_easystacks=$(echo "${changed_easystacks}" | (grep -v "/rebuilds/" || true))
for easystack_file in ${rebuild_easystacks} ${new_easystacks}; do
echo -e "Processing easystack file ${easystack_file}...\n\n"
# determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file
eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g')
# load EasyBuild module (will be installed if it's not available yet)
source ${TOPDIR}/load_easybuild_module.sh ${eb_version}
${EB} --show-config
echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..."
if [ -f ${easystack_file} ]; then
echo_green "Feeding easystack file ${easystack_file} to EasyBuild..."
${EB} --easystack ${TOPDIR}/${easystack_file} --robot
ec=$?
# copy EasyBuild log file if EasyBuild exited with an error
if [ ${ec} -ne 0 ]; then
eb_last_log=$(unset EB_VERBOSE; eb --last-log)
# copy to current working directory
cp -a ${eb_last_log} .
echo "Last EasyBuild log file copied from ${eb_last_log} to ${PWD}"
# copy to build logs dir (with context added)
copy_build_log "${eb_last_log}" "${build_logs_dir}"
fi
$TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file} ${TOPDIR}/${pr_diff}
else
fatal_error "Easystack file ${easystack_file} not found!"
fi
done
fi
echo ">> Creating/updating Lmod RC file..."
export LMOD_CONFIG_DIR="${EASYBUILD_INSTALLPATH}/.lmod"
lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua"
lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?)
if [ ! -f $lmod_rc_file ] || [ ${lmodrc_changed} == '0' ]; then
python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH}
check_exit_code $? "$lmod_rc_file created" "Failed to create $lmod_rc_file"
fi
echo ">> Creating/updating Lmod SitePackage.lua ..."
export LMOD_PACKAGE_PATH="${EASYBUILD_INSTALLPATH}/.lmod"
lmod_sitepackage_file="$LMOD_PACKAGE_PATH/SitePackage.lua"
sitepackage_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodsitepackage.py$' > /dev/null; echo $?)
if [ ! -f "$lmod_sitepackage_file" ] || [ "${sitepackage_changed}" == '0' ]; then
python3 $TOPDIR/create_lmodsitepackage.py ${EASYBUILD_INSTALLPATH}
check_exit_code $? "$lmod_sitepackage_file created" "Failed to create $lmod_sitepackage_file"
fi
echo ">> Cleaning up ${TMPDIR}..."
rm -r ${TMPDIR}