diff --git a/reframe_config_bot.py.tmpl b/reframe_config_bot.py.tmpl index 0cc3e9f530..607373767a 100644 --- a/reframe_config_bot.py.tmpl +++ b/reframe_config_bot.py.tmpl @@ -34,6 +34,11 @@ site_configuration = { 'options': ['--mem={size}'], } ], + 'extras': { + # Make sure to round down, otherwise a job might ask for more mem than is available + # per node + 'mem_per_node': __MEM_PER_NODE__, + }, 'max_jobs': 1 } ] diff --git a/test_suite.sh b/test_suite.sh index 2f304dd9bc..104f09fc2c 100755 --- a/test_suite.sh +++ b/test_suite.sh @@ -135,7 +135,7 @@ export RFM_PREFIX=$PWD/reframe_runs echo "Configured reframe with the following environment variables:" env | grep "RFM_" -# Inject correct CPU properties into the ReFrame config file +# Inject correct CPU/memory properties into the ReFrame config file cpuinfo=$(lscpu) if [[ "${cpuinfo}" =~ CPU\(s\):[^0-9]*([0-9]+) ]]; then cpu_count=${BASH_REMATCH[1]} @@ -165,6 +165,13 @@ if [[ "${cpuinfo}" =~ (Core\(s\) per socket:[^0-9]*([0-9]+)) ]]; then else fatal_error "Failed to get the number of cores per socket for the current test hardware with lscpu." fi +cgroup_mem_bytes=$(cat /sys/fs/cgroup/memory/slurm/uid_${UID}/job_${SLURM_JOB_ID}/memory.limit_in_bytes) +if [[ $? -eq 0 ]]; then + # Convert to MiB + cgroup_mem_mib=$((cgroup_mem_bytes/(1024*1024))) +else + fatal_error "Failed to get the memory limit in bytes from the current cgroup" +fi cp ${RFM_CONFIG_FILE_TEMPLATE} ${RFM_CONFIG_FILES} sed -i "s/__NUM_CPUS__/${cpu_count}/g" $RFM_CONFIG_FILES sed -i "s/__NUM_SOCKETS__/${socket_count}/g" $RFM_CONFIG_FILES