Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{2023.06,2023a,CUDA} dependencies for PyTorch-bundle-CUDA WITH compute capabilities #397

Open
wants to merge 16 commits into
base: nessi.no-2023.06
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ display_help() {
echo " --skip-cuda-install - disable installing a full CUDA SDK in the host_injections prefix (e.g. in CI)"
}

# Function to check if a command exists
function command_exists() {
command -v "$1" >/dev/null 2>&1
}

function copy_build_log() {
# copy specified build log to specified directory, with some context added
build_log=${1}
Expand Down Expand Up @@ -159,8 +154,13 @@ fi
# are:
# - .lmod/lmodrc.lua
# - .lmod/SitePackage.lua
#
# We run scripts to create them if they don't exist or if the scripts have been
# changed in the PR.
#
# (TODO do we need to change the path if we have sub-directories for
# accelerators? And would we need different scripts for creating lua files under
# different directories?)

# Set base directory for software and for Lmod config files
_eessi_software_path=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
Expand Down Expand Up @@ -256,6 +256,12 @@ if command_exists "nvidia-smi"; then
${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
fi

# Install extra software that is needed (e.g., for providing a custom ctypes
# library when needed)
cd ${TOPDIR}/scripts/extra
./install_extra_packages.sh --temp-dir /tmp/temp --easystack eessi-2023.06-extra-packages.yml
cd ${TOPDIR}

# use PR patch file to determine in which easystack files stuff was added
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing')
if [ -z "${changed_easystacks}" ]; then
Expand Down
1 change: 1 addition & 0 deletions _replace_files.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__EESSI_SOFTWARE_PATH__/Python/3.11.3-GCCcore-12.3.0/lib/python3.11/ctypes/util.py replacement_files/ctypes/util.py
64 changes: 59 additions & 5 deletions bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -266,15 +266,69 @@ mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR}
BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}")
BUILD_STEP_ARGS+=("--storage" "${STORAGE}")
# add options required to handle NVIDIA support
BUILD_STEP_ARGS+=("--nvidia" "all")
if command_exists "nvidia-smi"; then
echo "Command 'nvidia-smi' found, using available GPU"
BUILD_STEP_ARGS+=("--nvidia" "all")
else
echo "No 'nvidia-smi' found, no available GPU but allowing overriding this check"
BUILD_STEP_ARGS+=("--nvidia" "install")
fi
# Retain location for host injections so we don't reinstall CUDA
# (Always need to run the driver installation as available driver may change)
if [[ ! -z ${SHARED_FS_PATH} ]]; then
BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections")
fi

# Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway)
echo "EESSI_OVERRIDE_GPU_CHECK='${EESSI_OVERRIDE_GPU_CHECK}'"
export EESSI_OVERRIDE_GPU_CHECK=1
echo "EESSI_OVERRIDE_GPU_CHECK='${EESSI_OVERRIDE_GPU_CHECK}'"
# replace some files using lower_dirs mechanism
# - read replacements from replace_files.txt
# each line has the format __EESSI_SOFTWARE_PATH__/some_path relative_path
# /cvmfs/repo_name/versions/repo_version/software/os_type/software_dir/some_path
# - for each replacement do
# - check if the target exists in the repository
# - create directory for replacement
# - copy target into directory
rm -f ADD_LOWER_DIRS
if [[ -f "replace_files.txt" ]]; then
LOWER_DIRS="${STORAGE}/lower_dirs"
mkdir -p "${LOWER_DIRS}"
echo "LOWER_DIRS: '${LOWER_DIRS}'"

repo_name=${EESSI_CVMFS_REPO_OVERRIDE}
repo_version=${EESSI_VERSION_OVERRIDE}
os_type=${EESSI_OS_TYPE}
software_subdir_override=${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
software_path="/cvmfs/${repo_name}/versions/${repo_version}/software/${os_type}/${software_subdir_override}/software"

cat replace_files.txt | while read replace_spec; do
echo "replace_spec: '${replace_spec}'"
target=$(echo "${replace_spec}" | cut -f1 -d' ')
target_full_path=$(echo "${target}" | sed -e "s+__EESSI_SOFTWARE_PATH__+${software_path}+")
replace=$(echo "${replace_spec}" | cut -f2 -d' ')
echo "target: '${target}'"
echo "target_full_path: '${target_full_path}'"
echo "replace: '${replace}'"
if [[ -f ${replace} ]]; then
echo "replacement file exists"
target_lower_path=$(echo "${target_full_path}" | cut -f4- -d/)
echo "target_lower_path: '${target_lower_path}'"
target_lower_dir=$(dirname ${target_lower_path})
echo "target_lower_dir: '${target_lower_dir}'"
mkdir -p ${LOWER_DIRS}/${target_lower_dir}
cp -a ${replace} ${LOWER_DIRS}/${target_lower_dir}/.
ls -lisa ${LOWER_DIRS}/${target_lower_dir}
touch ADD_LOWER_DIRS
else
echo "replacement file does NOT exist; ignoring replacement"
fi
done
fi
echo "LOWER_DIRS: '${LOWER_DIRS}'"
if [[ -f ADD_LOWER_DIRS ]]; then
BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}")
echo "Added '--lower-dirs ${LOWER_DIRS}' to build step arguments"
else
echo "Nothing to be added for LOWER_DIRS"
fi

# create tmp file for output of build step
build_outerr=$(mktemp build.outerr.XXXX)
Expand Down
1 change: 1 addition & 0 deletions create_lmodsitepackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
-- simpleName is a module in packagesList
-- get the full host_injections path
local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')

-- build final path where the software should be installed
local packageEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
local packageDirExists = isDir(packageEasyBuildDir)
Expand Down
42 changes: 42 additions & 0 deletions easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,45 @@ easyconfigs:
- PyTorch-2.1.2-foss-2023a-CUDA-12.1.1.eb:
options:
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
# dependencies for PyTorch-bundle-2.1.2-foss-2023a-CUDA-12.1.1.eb
- librosa-0.10.1-foss-2023a.eb:
options:
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
- NLTK-3.8.1-foss-2023a.eb:
options:
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
- parameterized-0.9.0-GCCcore-12.3.0.eb:
options:
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
- Scalene-1.5.26-GCCcore-12.3.0.eb:
options:
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
- scikit-image-0.22.0-foss-2023a.eb:
options:
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
- SentencePiece-0.2.0-GCC-12.3.0.eb:
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/19987
options:
from-pr: 19987
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
- libmad-0.15.1b-GCCcore-12.3.0.eb:
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/19987
options:
from-pr: 19987
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
- SoX-14.4.2-GCCcore-12.3.0.eb:
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/19987
options:
from-pr: 19987
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
- tensorboard-2.15.1-gfbf-2023a.eb:
options:
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
- tqdm-4.66.1-GCCcore-12.3.0.eb:
options:
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
#- PyTorch-bundle-2.1.2-foss-2023a-CUDA-12.1.1.eb:
# # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20484
# options:
# from-pr: 20484
# cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
84 changes: 84 additions & 0 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import easybuild.tools.environment as env
from easybuild.easyblocks.generic.configuremake import obtain_config_guess
from easybuild.easyblocks.python import EXTS_FILTER_PYTHON_PACKAGES
from easybuild.framework.easyconfig.constants import EASYCONFIG_CONSTANTS
from easybuild.tools.build_log import EasyBuildError, print_msg
from easybuild.tools.config import build_option, update_build_option
Expand Down Expand Up @@ -311,6 +312,30 @@ def parse_hook_qt5_check_qtwebengine_disable(ec, eprefix):
raise EasyBuildError("Qt5-specific hook triggered for non-Qt5 easyconfig?!")


def parse_hook_sentencepiece_disable_tcmalloc_aarch64(ec, eprefix):
"""
Disable using TCMalloc
"""
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if ec.name == 'SentencePiece' and ec.version in ['0.2.0']:
if cpu_target == CPU_TARGET_AARCH64_GENERIC:
print_msg("parse_hook for SentencePiece: OLD '%s'", ec['components'])
new_components = []
for item in ec['components']:
if item[2]['easyblock'] == 'CMakeMake':
new_item = item[2]
new_item['configopts'] = '-DSPM_ENABLE_TCMALLOC=OFF'
new_components.append((item[0], item[1], new_item))
else:
new_components.append(item)
ec['components'] = new_components
print_msg("parse_hook for SentencePiece: NEW '%s'", ec['components'])
else:
print_msg("parse_hook for SentencePiece on %s -> leaving configopts unchanged", cpu_target)
else:
raise EasyBuildError("SentencePiece-specific hook triggered for non-SentencePiece easyconfig?!")


def parse_hook_ucx_eprefix(ec, eprefix):
"""Make UCX aware of compatibility layer via additional configuration options."""
if ec.name == 'UCX':
Expand Down Expand Up @@ -349,6 +374,30 @@ def parse_hook_lammps_remove_deps_for_CI_aarch64(ec, *args, **kwargs):
raise EasyBuildError("LAMMPS-specific hook triggered for non-LAMMPS easyconfig?!")


def parse_hook_librosa_custom_ctypes(ec, *args, **kwargs):
"""
Add exts_filter to soundfile extension in exts_list
"""
if ec.name == 'librosa' and ec.version in ('0.10.1',):
ec_dict = ec.asdict()
eessi_software_path = get_eessi_envvar('EESSI_SOFTWARE_PATH')
custom_ctypes_path = os.path.join(eessi_software_path, "software", "custom_ctypes", "1.2")
ebpythonprefixes = "EBPYTHONPREFIXES=%s" % custom_ctypes_path
exts_list_new = []
for item in ec_dict['exts_list']:
if item[0] == 'soundfile':
ext_dict = item[2]
ext_dict['exts_filter'] = (ebpythonprefixes + ' ' + EXTS_FILTER_PYTHON_PACKAGES[0],
EXTS_FILTER_PYTHON_PACKAGES[1])
exts_list_new.append((item[0], item[1], ext_dict))
else:
exts_list_new.append(item)
ec['exts_list'] = exts_list_new
print_msg("New exts_list: '%s'", ec['exts_list'])
else:
raise EasyBuildError("librosa/0.10.1-specific hook triggered for non-librosa/0.10.1 easyconfig?!")


def pre_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwargs):
"""
Solve issues with compiling or running the tests on both
Expand Down Expand Up @@ -852,17 +901,48 @@ def inject_gpu_property(ec):
return ec


def pre_module_hook(self, *args, **kwargs):
"""Main pre-module-check hook: trigger custom functions based on software name."""
if self.name in PRE_MODULE_HOOKS:
PRE_MODULE_HOOKS[self.name](self, *args, **kwargs)


def pre_module_hook_librosa_augment_modluafooter(self, *args, **kwargs):
"""
Add EBPYTHONPREFIXES to modluafooter
"""
if self.name == 'librosa' and self.version == '0.10.1':
eessi_software_path = get_eessi_envvar('EESSI_SOFTWARE_PATH')
custom_ctypes_path = os.path.join(eessi_software_path, "software", "custom_ctypes", "1.2")
key = 'modluafooter'
values = ['prepend_path("EBPYTHONPREFIXES","%s")' % (custom_ctypes_path)]
print_msg("Adding '%s' to modluafooter", values[0])
if not key in self.cfg:
self.cfg[key] = '\n'.join(values)
else:
new_value = self.cfg[key]
for value in values:
if not value in new_value:
new_value = '\n'.join([new_value, value])
self.cfg[key] = new_value
print_msg("Full modluafooter is '%s'", self.cfg[key])
else:
raise EasyBuildError("librosa/0.10.1-specific hook triggered for non-librosa/0.10.1 easyconfig?!")


PARSE_HOOKS = {
'casacore': parse_hook_casacore_disable_vectorize,
'CGAL': parse_hook_cgal_toolchainopts_precise,
'fontconfig': parse_hook_fontconfig_add_fonts,
'GPAW': parse_hook_gpaw_harcoded_path,
'ImageMagick': parse_hook_imagemagick_add_dependency,
'LAMMPS': parse_hook_lammps_remove_deps_for_CI_aarch64,
'librosa': parse_hook_librosa_custom_ctypes,
'OpenBLAS': parse_hook_openblas_relax_lapack_tests_num_errors,
'Pillow-SIMD' : parse_hook_Pillow_SIMD_harcoded_paths,
'pybind11': parse_hook_pybind11_replace_catch2,
'Qt5': parse_hook_qt5_check_qtwebengine_disable,
'SentencePiece': parse_hook_sentencepiece_disable_tcmalloc_aarch64,
'UCX': parse_hook_ucx_eprefix,
}

Expand Down Expand Up @@ -909,3 +989,7 @@ def inject_gpu_property(ec):
'cuDNN': post_sanitycheck_cudnn,
'cuTENSOR': post_sanitycheck_cutensor,
}

PRE_MODULE_HOOKS = {
'librosa': pre_module_hook_librosa_augment_modluafooter,
}
5 changes: 5 additions & 0 deletions eessi_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,11 @@ if [[ ${SETUP_NVIDIA} -eq 1 ]]; then
mkdir -p ${EESSI_USR_LOCAL_CUDA}
BIND_PATHS="${BIND_PATHS},${EESSI_VAR_LOG}:/var/log,${EESSI_USR_LOCAL_CUDA}:/usr/local/cuda"
[[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}"
if [[ "${NVIDIA_MODE}" == "install" ]] ; then
# No GPU so we need to "trick" Lmod to allow us to load CUDA modules even without a CUDA driver
# (this variable means EESSI_OVERRIDE_GPU_CHECK=1 will be set inside the container)
export SINGULARITYENV_EESSI_OVERRIDE_GPU_CHECK=1
fi
fi
fi

Expand Down
Loading
Loading