From eeade026b281f7011ed5141be0efa05e7d309770 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Tue, 11 Jun 2024 13:00:53 +0000 Subject: [PATCH 1/5] {2023.06}[gompi/2023b] Valgrind V3.21.0 --- .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023b.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023b.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023b.yml index 03da218a78..b2f5a75627 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023b.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023b.yml @@ -26,3 +26,6 @@ easyconfigs: - IPython-8.17.2-GCCcore-13.2.0.eb - Qt5-5.15.13-GCCcore-13.2.0.eb - NLTK-3.8.1-foss-2023b.eb + - Valgrind-3.23.0-gompi-2023b.eb: + options: + from-pr: 20792 From ba68691f674633c165c45bd9885117fd9cc4583d Mon Sep 17 00:00:00 2001 From: Richard Top Date: Fri, 14 Jun 2024 06:50:16 +0000 Subject: [PATCH 2/5] {2023.06}[system] EasyBuild V4.9.2 --- .../2023.06/eessi-2023.06-eb-4.9.1-001-system.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml index 8e850e19b9..d827086bc4 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml @@ -6,3 +6,6 @@ easyconfigs: - cuDNN-8.9.2.26-CUDA-12.1.1.eb - cuTENSOR-2.0.1.2-CUDA-12.1.1.eb - Nextflow-23.10.0.eb + - EasyBuild-4.9.2.eb: + options: + from-pr: 20818 From 1ce68199ec5fa9b8268feaf14cdd97e7b8e8411e Mon Sep 17 00:00:00 2001 From: Richard Top Date: Mon, 17 Jun 2024 09:10:14 +0000 Subject: [PATCH 3/5] Use default memory option during reframe tests --- test_suite.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_suite.sh b/test_suite.sh index 7c24aa5a56..59407f49cb 100755 --- a/test_suite.sh +++ b/test_suite.sh @@ -177,7 +177,8 @@ sed -i "s/__NUM_CPUS__/${cpu_count}/g" $RFM_CONFIG_FILES sed -i "s/__NUM_SOCKETS__/${socket_count}/g" $RFM_CONFIG_FILES sed -i "s/__NUM_CPUS_PER_CORE__/${threads_per_core}/g" $RFM_CONFIG_FILES sed -i "s/__NUM_CPUS_PER_SOCKET__/${cores_per_socket}/g" $RFM_CONFIG_FILES -sed -i "s/__MEM_PER_NODE__/${cgroup_mem_mib}/g" $RFM_CONFIG_FILES +# on local systems the change below is not the case, it works on AWS +# sed -i "s/__MEM_PER_NODE__/${cgroup_mem_mib}/g" $RFM_CONFIG_FILES # Workaround for https://github.com/EESSI/software-layer/pull/467#issuecomment-1973341966 export PSM3_DEVICES='self,shm' # this is enough, since we only run single node for now From 2a9d46b79743b4334858c330c5496ce5980dcc79 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Mon, 17 Jun 2024 09:39:13 +0000 Subject: [PATCH 4/5] Use default memory option during reframe tests --- test_suite.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_suite.sh b/test_suite.sh index 59407f49cb..04f230e742 100755 --- a/test_suite.sh +++ b/test_suite.sh @@ -165,7 +165,7 @@ if [[ "${cpuinfo}" =~ (Core\(s\) per socket:[^0-9]*([0-9]+)) ]]; then else fatal_error "Failed to get the number of cores per socket for the current test hardware with lscpu." fi -cgroup_mem_bytes=$(cat /hostsys/fs/cgroup/memory/slurm/uid_${UID}/job_${SLURM_JOB_ID}/memory.limit_in_bytes) +# cgroup_mem_bytes=$(cat /hostsys/fs/cgroup/memory/slurm/uid_${UID}/job_${SLURM_JOB_ID}/memory.limit_in_bytes) if [[ $? -eq 0 ]]; then # Convert to MiB cgroup_mem_mib=$((cgroup_mem_bytes/(1024*1024))) From 36c11e3dfd79f36ce582701c7b9bfff6c1e983ec Mon Sep 17 00:00:00 2001 From: Richard Top Date: Mon, 17 Jun 2024 11:44:33 +0000 Subject: [PATCH 5/5] Revert back to default memory usage in ReFrame tests --- bot/test.sh | 2 +- reframe_config_bot.py.tmpl | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bot/test.sh b/bot/test.sh index 04bff346cd..d48fd74734 100755 --- a/bot/test.sh +++ b/bot/test.sh @@ -206,7 +206,7 @@ else fi # Bind mount /sys/fs/cgroup so that we can determine the amount of memory available in our cgroup for # Reframe configuration -TEST_STEP_ARGS+=("--extra-bind-paths" "/sys/fs/cgroup:/hostsys/fs/cgroup:ro") +# TEST_STEP_ARGS+=("--extra-bind-paths" "/sys/fs/cgroup:/hostsys/fs/cgroup:ro") # prepare arguments to test_suite.sh (specific to test step) declare -a TEST_SUITE_ARGS=() diff --git a/reframe_config_bot.py.tmpl b/reframe_config_bot.py.tmpl index 607373767a..63d13c7ec2 100644 --- a/reframe_config_bot.py.tmpl +++ b/reframe_config_bot.py.tmpl @@ -34,11 +34,11 @@ site_configuration = { 'options': ['--mem={size}'], } ], - 'extras': { - # Make sure to round down, otherwise a job might ask for more mem than is available - # per node - 'mem_per_node': __MEM_PER_NODE__, - }, +# 'extras': { +# # Make sure to round down, otherwise a job might ask for more mem than is available +# # per node +# 'mem_per_node': __MEM_PER_NODE__, +# }, 'max_jobs': 1 } ]