diff --git a/config/aws_mc.py b/config/aws_mc.py index 4419ea27..2cf5e3f4 100644 --- a/config/aws_mc.py +++ b/config/aws_mc.py @@ -105,6 +105,11 @@ # steps inherit environment. It doesn't hurt to define this even if srun is not used 'export SLURM_EXPORT_ENV=ALL' ], + 'extras': { + # Node types have somewhat varying amounts of memory, but we'll make it easy on ourselves + # All should _at least_ have this amount (30GB * 1E9 / (1024*1024) = 28610 MiB) + 'mem_per_node': 28610 + }, } for system in site_configuration['systems']: for partition in system['partitions']: diff --git a/config/github_actions.py b/config/github_actions.py index b2196b6b..7ed97422 100644 --- a/config/github_actions.py +++ b/config/github_actions.py @@ -33,7 +33,7 @@ # Make sure to round down, otherwise a job might ask for more mem than is available # per node # This is a fictional amount, GH actions probably has less, but only does --dry-run - 'mem_per_node': 30 # in GiB + 'mem_per_node': 30 * 1024 # in MiB }, } ] diff --git a/config/it4i_karolina.py b/config/it4i_karolina.py index 2bdfa035..b9dae87c 100644 --- a/config/it4i_karolina.py +++ b/config/it4i_karolina.py @@ -62,7 +62,7 @@ 'extras': { # Make sure to round down, otherwise a job might ask for more mem than is available # per node - 'mem_per_node': 219.345 # in GiB + 'mem_per_node': 235520 # in MiB }, 'descr': 'CPU Universal Compute Nodes, see https://docs.it4i.cz/karolina/hardware-overview/' }, diff --git a/config/izum_vega.py b/config/izum_vega.py index f7193aed..939c089c 100644 --- a/config/izum_vega.py +++ b/config/izum_vega.py @@ -62,7 +62,9 @@ 'extras': { # Make sure to round down, otherwise a job might ask for more mem than is available # per node - 'mem_per_node': 238.418 # in GiB + # NB: Vega's MaxMemPerNode is set to 256000, but this MUST be a MB/MiB units mistake + # Most likely, it is 256 GB, so 256*1E9/(1024*1024) MiB + 'mem_per_node': 244140 # in MiB }, 'descr': 'CPU partition Standard, see https://en-doc.vega.izum.si/architecture/' }, @@ -106,7 +108,7 @@ # 'extras': { # # Make sure to round down, otherwise a job might ask for more mem than is available # # per node - # 'mem_per_node': 476.837 # in GiB (should be checked, its unclear from slurm.conf) + # 'mem_per_node': 476.837 * 1024 # in MiB (should be checked, its unclear from slurm.conf) # }, # 'descr': 'GPU partition, see https://en-doc.vega.izum.si/architecture/' # }, diff --git a/config/surf_snellius.py b/config/surf_snellius.py index d8bcc36c..827ca32c 100644 --- a/config/surf_snellius.py +++ b/config/surf_snellius.py @@ -56,7 +56,7 @@ 'extras': { # Make sure to round down, otherwise a job might ask for more mem than is available # per node - 'mem_per_node': 213.623 # in GiB + 'mem_per_node': 229376 # in MiB }, 'descr': 'AMD Rome CPU partition with native EESSI stack' }, @@ -80,7 +80,7 @@ 'extras': { # Make sure to round down, otherwise a job might ask for more mem than is available # per node - 'mem_per_node': 320.434 # in GiB + 'mem_per_node': 344064 # in MiB }, 'descr': 'AMD Genoa CPU partition with native EESSI stack' }, @@ -117,7 +117,7 @@ GPU_VENDOR: GPU_VENDORS[NVIDIA], # Make sure to round down, otherwise a job might ask for more mem than is available # per node - 'mem_per_node': 457.763 # in GiB + 'mem_per_node': 491520 # in MiB }, 'descr': 'Nvidia A100 GPU partition with native EESSI stack' }, diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 1615330b..312c72f4 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -59,7 +59,7 @@ def command(self, job): 'extras': { # Make sure to round down, otherwise a job might ask for more mem than is available # per node - 'mem_per_node': 234 # in GiB + 'mem_per_node': 252160, # in MiB }, }, { @@ -91,7 +91,7 @@ def command(self, job): 'extras': { # Make sure to round down, otherwise a job might ask for more mem than is available # per node - 'mem_per_node': 473 # in GiB + 'mem_per_node': 508160, # in MiB }, }, { @@ -123,7 +123,7 @@ def command(self, job): 'extras': { # Make sure to round down, otherwise a job might ask for more mem than is available # per node - 'mem_per_node': 234 # in GiB + 'mem_per_node': 252160, # in MiB }, }, { @@ -150,7 +150,7 @@ def command(self, job): GPU_VENDOR: GPU_VENDORS[NVIDIA], # Make sure to round down, otherwise a job might ask for more mem than is available # per node - 'mem_per_node': 236 # in GiB + 'mem_per_node': 254400, # in MiB }, 'resources': [ { @@ -194,7 +194,7 @@ def command(self, job): GPU_VENDOR: GPU_VENDORS[NVIDIA], # Make sure to round down, otherwise a job might ask for more mem than is available # per node - 'mem_per_node': 475 # in GiB + 'mem_per_node': 510720, # in MiB }, 'resources': [ { diff --git a/eessi/testsuite/hooks.py b/eessi/testsuite/hooks.py index 5dd98a7f..ab711955 100644 --- a/eessi/testsuite/hooks.py +++ b/eessi/testsuite/hooks.py @@ -383,7 +383,7 @@ def filter_valid_systems_by_device_type(test: rfm.RegressionTest, required_devic log(f'valid_systems set to {test.valid_systems}') -def req_memory_per_node(test: rfm.RegressionTest, app_mem_req): +def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float): """ This hook will request a specific amount of memory per node to the batch scheduler. First, it computes which fraction of CPUs is requested from a node, and how much the corresponding (proportional) @@ -396,59 +396,57 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req): Arguments: - test: the ReFrame test to which this hook should apply - - app_mem_req: the amount of memory this application needs (per node) in GiB + - app_mem_req: the amount of memory this application needs (per node) in MiB Example 1: - - A system with 128 cores and 64 GiB per node. + - A system with 128 cores and 64,000 MiB per node. - The test is launched on 64 cores - - The app_mem_req is 40 (GiB) - In this case, the test requests 50% of the CPUs. Thus, the proportional amount of memory is 32 GiB. - The app_mem_req is higher. Thus, 40GiB (per node) is requested from the batch scheduler. + - The app_mem_req is 40,000 (MiB) + In this case, the test requests 50% of the CPUs. Thus, the proportional amount of memory is 32,000 MiB. + The app_mem_req is higher. Thus, 40,000 MiB (per node) is requested from the batch scheduler. Example 2: - - A system with 128 cores per node, 128 GiB mem per node is used. + - A system with 128 cores per node, 128,000 MiB mem per node. - The test is launched on 64 cores - - the app_mem_req is 40 (GiB) - In this case, the test requests 50% of the CPUs. Thus, the proportional amount of memory is 64 GiB. - This is higher than the app_mem_req. Thus, 64 GiB (per node) is requested from the batch scheduler. + - the app_mem_req is 40,000 (MiB) + In this case, the test requests 50% of the CPUs. Thus, the proportional amount of memory is 64,000 MiB. + This is higher than the app_mem_req. Thus, 64,000 MiB (per node) is requested from the batch scheduler. """ # Check that the systems.partitions.extra dict in the ReFrame config contains mem_per_node check_extras_key_defined(test, 'mem_per_node') # Skip if the current partition doesn't have sufficient memory to run the application - msg = f"Skipping test: nodes in this partition only have {test.current_partition.extras['mem_per_node']} GiB" + msg = f"Skipping test: nodes in this partition only have {test.current_partition.extras['mem_per_node']} MiB" msg += " memory available (per node) accodring to the current ReFrame configuration," - msg += f" but {app_mem_req} GiB is needed" + msg += f" but {app_mem_req} MiB is needed" test.skip_if(test.current_partition.extras['mem_per_node'] < app_mem_req, msg) # Compute what is higher: the requested memory, or the memory available proportional to requested CPUs # Fraction of CPU cores requested check_proc_attribute_defined(test, 'num_cpus') cpu_fraction = test.num_tasks_per_node * test.num_cpus_per_task / test.current_partition.processor.num_cpus - proportional_mem = cpu_fraction * test.current_partition.extras['mem_per_node'] + proportional_mem = math.floor(cpu_fraction * test.current_partition.extras['mem_per_node']) + app_mem_req = math.ceil(app_mem_req) scheduler_name = test.current_partition.scheduler.registered_name if scheduler_name == 'slurm' or scheduler_name == 'squeue': - # SLURMs --mem defines memory per node, see https://slurm.schedmd.com/sbatch.html - # SLURM uses megabytes and gigabytes, i.e. base-10, so conversion is 1000, not 1024 - # Thus, we convert from GiB (gibibytes) to MB (megabytes) (1024 * 1024 * 1024 / (1000 * 1000) = 1073.741824) - app_mem_req = math.ceil(1073.741824 * app_mem_req) - log(f"Memory requested by application: {app_mem_req} MB") - proportional_mem = math.floor(1073.741824 * proportional_mem) - log(f"Memory proportional to the core count: {proportional_mem} MB") + # SLURM defines --mem as memory per node, see https://slurm.schedmd.com/sbatch.html + # SLURM uses MiB units by default + log(f"Memory requested by application: {app_mem_req} MiB") + log(f"Memory proportional to the core count: {proportional_mem} MiB") # Request the maximum of the proportional_mem, and app_mem_req to the scheduler req_mem_per_node = max(proportional_mem, app_mem_req) test.extra_resources = {'memory': {'size': f'{req_mem_per_node}M'}} - log(f"Requested {req_mem_per_node} MB per node from the SLURM batch scheduler") + log(f"Requested {req_mem_per_node} MiB per node from the SLURM batch scheduler") elif scheduler_name == 'torque': # Torque/moab requires asking for --pmem (--mem only works single node and thus doesnt generalize) # See https://docs.adaptivecomputing.com/10-0-1/Torque/torque.htm#topics/torque/3-jobs/3.1.3-requestingRes.htm - # Units are MiB according to the documentation, thus, we simply multiply with 1024 + # Units are MiB according to the documentation # We immediately divide by num_tasks_per_node (before rounding), since -pmem specifies memroy _per process_ - app_mem_req_task = math.ceil(1024 * app_mem_req / test.num_tasks_per_node) - proportional_mem_task = math.floor(1024 * proportional_mem / test.num_tasks_per_node) + app_mem_req_task = math.ceil(app_mem_req / test.num_tasks_per_node) + proportional_mem_task = math.floor(proportional_mem / test.num_tasks_per_node) # Request the maximum of the proportional_mem, and app_mem_req to the scheduler req_mem_per_task = max(proportional_mem_task, app_mem_req_task) diff --git a/eessi/testsuite/tests/apps/QuantumESPRESSO.py b/eessi/testsuite/tests/apps/QuantumESPRESSO.py index 050e43d3..288354b2 100644 --- a/eessi/testsuite/tests/apps/QuantumESPRESSO.py +++ b/eessi/testsuite/tests/apps/QuantumESPRESSO.py @@ -100,7 +100,7 @@ def run_after_setup(self): @run_after('setup') def request_mem(self): memory_required = self.num_tasks_per_node * 0.9 + 4 - hooks.req_memory_per_node(test=self, app_mem_req=memory_required) + hooks.req_memory_per_node(test=self, app_mem_req=memory_required * 1024) @run_after('setup') def set_omp_num_threads(self): diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index 7bb2da0c..7cd59051 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -63,7 +63,7 @@ def set_mem(self): """ Setting an extra job option of memory. Here the assumption made is that HPC systems will contain at least 1 GB per core of memory.""" mem_required_per_node = self.num_tasks_per_node * 0.9 - hooks.req_memory_per_node(test=self, app_mem_req=mem_required_per_node) + hooks.req_memory_per_node(test=self, app_mem_req=mem_required_per_node * 1024) @deferrable def assert_completion(self):