Merge branch 'main' into espresso_fix_p3m_mesh

satishskamath · Jun 27, 2024 · b53196e · b53196e
2 parents d0ca622 + 5972a90
commit b53196e
Show file tree

Hide file tree

Showing 9 changed files with 43 additions and 38 deletions.
diff --git a/config/aws_mc.py b/config/aws_mc.py
@@ -105,6 +105,11 @@
         # steps inherit environment. It doesn't hurt to define this even if srun is not used
         'export SLURM_EXPORT_ENV=ALL'
     ],
+    'extras': {
+        # Node types have somewhat varying amounts of memory, but we'll make it easy on ourselves
+        # All should _at least_ have this amount (30GB * 1E9 / (1024*1024) = 28610 MiB)
+        'mem_per_node': 28610
+    },
 }
 for system in site_configuration['systems']:
     for partition in system['partitions']:

diff --git a/config/github_actions.py b/config/github_actions.py
@@ -33,7 +33,7 @@
                         # Make sure to round down, otherwise a job might ask for more mem than is available
                         # per node
                         # This is a fictional amount, GH actions probably has less, but only does --dry-run
-                        'mem_per_node': 30  # in GiB
+                        'mem_per_node': 30 * 1024  # in MiB
                     },
                 }
             ]

diff --git a/config/it4i_karolina.py b/config/it4i_karolina.py
@@ -62,7 +62,7 @@
                     'extras': {
                         # Make sure to round down, otherwise a job might ask for more mem than is available
                         # per node
-                        'mem_per_node': 219.345  # in GiB
+                        'mem_per_node': 235520  # in MiB
                     },
                     'descr': 'CPU Universal Compute Nodes, see https://docs.it4i.cz/karolina/hardware-overview/'
                 },

diff --git a/config/izum_vega.py b/config/izum_vega.py
@@ -62,7 +62,9 @@
                     'extras': {
                         # Make sure to round down, otherwise a job might ask for more mem than is available
                         # per node
-                        'mem_per_node': 238.418  # in GiB
+                        # NB: Vega's MaxMemPerNode is set to 256000, but this MUST be a MB/MiB units mistake
+                        # Most likely, it is 256 GB, so 256*1E9/(1024*1024) MiB
+                        'mem_per_node': 244140  # in MiB
                     },
                     'descr': 'CPU partition Standard, see https://en-doc.vega.izum.si/architecture/'
                 },
@@ -106,7 +108,7 @@
                 #     'extras': {
                 #         # Make sure to round down, otherwise a job might ask for more mem than is available
                 #         # per node
-                #         'mem_per_node': 476.837  # in GiB (should be checked, its unclear from slurm.conf)
+                #         'mem_per_node': 476.837 * 1024  # in MiB (should be checked, its unclear from slurm.conf)
                 #     },
                 #     'descr': 'GPU partition, see https://en-doc.vega.izum.si/architecture/'
                 # },

diff --git a/config/surf_snellius.py b/config/surf_snellius.py
@@ -56,7 +56,7 @@
                     'extras': {
                         # Make sure to round down, otherwise a job might ask for more mem than is available
                         # per node
-                        'mem_per_node': 213.623  # in GiB
+                        'mem_per_node': 229376  # in MiB
                     },
                     'descr': 'AMD Rome CPU partition with native EESSI stack'
                 },
@@ -80,7 +80,7 @@
                     'extras': {
                         # Make sure to round down, otherwise a job might ask for more mem than is available
                         # per node
-                        'mem_per_node': 320.434  # in GiB
+                        'mem_per_node': 344064  # in MiB
                     },
                     'descr': 'AMD Genoa CPU partition with native EESSI stack'
                 },
@@ -117,7 +117,7 @@
                         GPU_VENDOR: GPU_VENDORS[NVIDIA],
                         # Make sure to round down, otherwise a job might ask for more mem than is available
                         # per node
-                        'mem_per_node': 457.763  # in GiB
+                        'mem_per_node': 491520  # in MiB
                     },
                     'descr': 'Nvidia A100 GPU partition with native EESSI stack'
                 },

diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py
@@ -59,7 +59,7 @@ def command(self, job):
                     'extras': {
                         # Make sure to round down, otherwise a job might ask for more mem than is available
                         # per node
-                        'mem_per_node': 234  # in GiB
+                        'mem_per_node': 252160,  # in MiB
                     },
                 },
                 {
@@ -91,7 +91,7 @@ def command(self, job):
                     'extras': {
                         # Make sure to round down, otherwise a job might ask for more mem than is available
                         # per node
-                        'mem_per_node': 473  # in GiB
+                        'mem_per_node': 508160,  # in MiB
                     },
                 },
                 {
@@ -123,7 +123,7 @@ def command(self, job):
                     'extras': {
                         # Make sure to round down, otherwise a job might ask for more mem than is available
                         # per node
-                        'mem_per_node': 234  # in GiB
+                        'mem_per_node': 252160,  # in MiB
                     },
                 },
                 {
@@ -150,7 +150,7 @@ def command(self, job):
                         GPU_VENDOR: GPU_VENDORS[NVIDIA],
                         # Make sure to round down, otherwise a job might ask for more mem than is available
                         # per node
-                        'mem_per_node': 236  # in GiB
+                        'mem_per_node': 254400,  # in MiB
                     },
                     'resources': [
                         {
@@ -194,7 +194,7 @@ def command(self, job):
                         GPU_VENDOR: GPU_VENDORS[NVIDIA],
                         # Make sure to round down, otherwise a job might ask for more mem than is available
                         # per node
-                        'mem_per_node': 475  # in GiB
+                        'mem_per_node': 510720,  # in MiB
                     },
                     'resources': [
                         {

diff --git a/eessi/testsuite/hooks.py b/eessi/testsuite/hooks.py
@@ -383,7 +383,7 @@ def filter_valid_systems_by_device_type(test: rfm.RegressionTest, required_devic
     log(f'valid_systems set to {test.valid_systems}')
 
 
-def req_memory_per_node(test: rfm.RegressionTest, app_mem_req):
+def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float):
     """
     This hook will request a specific amount of memory per node to the batch scheduler.
     First, it computes which fraction of CPUs is requested from a node, and how much the corresponding (proportional)
@@ -396,59 +396,57 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req):
 
     Arguments:
     - test: the ReFrame test to which this hook should apply
-    - app_mem_req: the amount of memory this application needs (per node) in GiB
+    - app_mem_req: the amount of memory this application needs (per node) in MiB
 
     Example 1:
-    - A system with 128 cores and 64 GiB per node.
+    - A system with 128 cores and 64,000 MiB per node.
     - The test is launched on 64 cores
-    - The app_mem_req is 40 (GiB)
-    In this case, the test requests 50% of the CPUs. Thus, the proportional amount of memory is 32 GiB.
-    The app_mem_req is higher. Thus, 40GiB (per node) is requested from the batch scheduler.
+    - The app_mem_req is 40,000 (MiB)
+    In this case, the test requests 50% of the CPUs. Thus, the proportional amount of memory is 32,000 MiB.
+    The app_mem_req is higher. Thus, 40,000 MiB (per node) is requested from the batch scheduler.
 
     Example 2:
-    - A system with 128 cores per node, 128 GiB mem per node is used.
+    - A system with 128 cores per node, 128,000 MiB mem per node.
     - The test is launched on 64 cores
-    - the app_mem_req is 40 (GiB)
-    In this case, the test requests 50% of the CPUs. Thus, the proportional amount of memory is 64 GiB.
-    This is higher than the app_mem_req. Thus, 64 GiB (per node) is requested from the batch scheduler.
+    - the app_mem_req is 40,000 (MiB)
+    In this case, the test requests 50% of the CPUs. Thus, the proportional amount of memory is 64,000 MiB.
+    This is higher than the app_mem_req. Thus, 64,000 MiB (per node) is requested from the batch scheduler.
     """
     # Check that the systems.partitions.extra dict in the ReFrame config contains mem_per_node
     check_extras_key_defined(test, 'mem_per_node')
     # Skip if the current partition doesn't have sufficient memory to run the application
-    msg = f"Skipping test: nodes in this partition only have {test.current_partition.extras['mem_per_node']} GiB"
+    msg = f"Skipping test: nodes in this partition only have {test.current_partition.extras['mem_per_node']} MiB"
     msg += " memory available (per node) accodring to the current ReFrame configuration,"
-    msg += f" but {app_mem_req} GiB is needed"
+    msg += f" but {app_mem_req} MiB is needed"
     test.skip_if(test.current_partition.extras['mem_per_node'] < app_mem_req, msg)
 
     # Compute what is higher: the requested memory, or the memory available proportional to requested CPUs
     # Fraction of CPU cores requested
     check_proc_attribute_defined(test, 'num_cpus')
     cpu_fraction = test.num_tasks_per_node * test.num_cpus_per_task / test.current_partition.processor.num_cpus
-    proportional_mem = cpu_fraction * test.current_partition.extras['mem_per_node']
+    proportional_mem = math.floor(cpu_fraction * test.current_partition.extras['mem_per_node'])
+    app_mem_req = math.ceil(app_mem_req)
 
     scheduler_name = test.current_partition.scheduler.registered_name
     if scheduler_name == 'slurm' or scheduler_name == 'squeue':
-        # SLURMs --mem defines memory per node, see https://slurm.schedmd.com/sbatch.html
-        # SLURM uses megabytes and gigabytes, i.e. base-10, so conversion is 1000, not 1024
-        # Thus, we convert from GiB (gibibytes) to MB (megabytes) (1024 * 1024 * 1024 / (1000 * 1000) = 1073.741824)
-        app_mem_req = math.ceil(1073.741824 * app_mem_req)
-        log(f"Memory requested by application: {app_mem_req} MB")
-        proportional_mem = math.floor(1073.741824 * proportional_mem)
-        log(f"Memory proportional to the core count: {proportional_mem} MB")
+        # SLURM defines --mem as memory per node, see https://slurm.schedmd.com/sbatch.html
+        # SLURM uses MiB units by default
+        log(f"Memory requested by application: {app_mem_req} MiB")
+        log(f"Memory proportional to the core count: {proportional_mem} MiB")
 
         # Request the maximum of the proportional_mem, and app_mem_req to the scheduler
         req_mem_per_node = max(proportional_mem, app_mem_req)
 
         test.extra_resources = {'memory': {'size': f'{req_mem_per_node}M'}}
-        log(f"Requested {req_mem_per_node} MB per node from the SLURM batch scheduler")
+        log(f"Requested {req_mem_per_node} MiB per node from the SLURM batch scheduler")
 
     elif scheduler_name == 'torque':
         # Torque/moab requires asking for --pmem (--mem only works single node and thus doesnt generalize)
         # See https://docs.adaptivecomputing.com/10-0-1/Torque/torque.htm#topics/torque/3-jobs/3.1.3-requestingRes.htm
-        # Units are MiB according to the documentation, thus, we simply multiply with 1024
+        # Units are MiB according to the documentation
         # We immediately divide by num_tasks_per_node (before rounding), since -pmem specifies memroy _per process_
-        app_mem_req_task = math.ceil(1024 * app_mem_req / test.num_tasks_per_node)
-        proportional_mem_task = math.floor(1024 * proportional_mem / test.num_tasks_per_node)
+        app_mem_req_task = math.ceil(app_mem_req / test.num_tasks_per_node)
+        proportional_mem_task = math.floor(proportional_mem / test.num_tasks_per_node)
 
         # Request the maximum of the proportional_mem, and app_mem_req to the scheduler
         req_mem_per_task = max(proportional_mem_task, app_mem_req_task)

diff --git a/eessi/testsuite/tests/apps/QuantumESPRESSO.py b/eessi/testsuite/tests/apps/QuantumESPRESSO.py
@@ -100,7 +100,7 @@ def run_after_setup(self):
     @run_after('setup')
     def request_mem(self):
         memory_required = self.num_tasks_per_node * 0.9 + 4
-        hooks.req_memory_per_node(test=self, app_mem_req=memory_required)
+        hooks.req_memory_per_node(test=self, app_mem_req=memory_required * 1024)
 
     @run_after('setup')
     def set_omp_num_threads(self):

diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py
@@ -63,7 +63,7 @@ def set_mem(self):
         """ Setting an extra job option of memory. Here the assumption made is that HPC systems will contain at
         least 1 GB per core of memory."""
         mem_required_per_node = self.num_tasks_per_node * 0.9
-        hooks.req_memory_per_node(test=self, app_mem_req=mem_required_per_node)
+        hooks.req_memory_per_node(test=self, app_mem_req=mem_required_per_node * 1024)
 
     @deferrable
     def assert_completion(self):