Skip to content

Commit

Permalink
add support for setting the exact required memory
Browse files Browse the repository at this point in the history
  • Loading branch information
Samuel Moors committed Dec 1, 2024
1 parent 8054e92 commit 5ce05c3
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 13 deletions.
1 change: 1 addition & 0 deletions eessi/testsuite/eessi_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class EESSI_Mixin(RegressionMixin):

# Set defaults for these class variables, can be overwritten by child class if desired
measure_memory_usage = variable(bool, value=False)
exact_memory = variable(bool, value=False)
scale = parameter(SCALES.keys())
bench_name = None
bench_name_ci = None
Expand Down
26 changes: 13 additions & 13 deletions eessi/testsuite/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""
import math
import shlex
import warnings

import reframe as rfm
import reframe.core.logging as rflog
Expand Down Expand Up @@ -432,10 +431,10 @@ def _set_or_append_valid_systems(test: rfm.RegressionTest, valid_systems: str):
elif len(test.valid_systems) == 1:
test.valid_systems[0] = f'{test.valid_systems[0]} {valid_systems}'
else:
warn_msg = f"valid_systems has multiple ({len(test.valid_systems)}) items,"
warn_msg += " which is not supported by this hook."
warn_msg += " Make sure to handle filtering yourself."
warnings.warn(warn_msg)
msg = f"valid_systems has multiple ({len(test.valid_systems)}) items,"
msg += " which is not supported by this hook."
msg += " Make sure to handle filtering yourself."
rflog.getlogger().warning(msg)
return


Expand Down Expand Up @@ -529,7 +528,6 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float):
# and return from this hook (as setting test.extra_resources will be ignored in that case according to
# https://reframe-hpc.readthedocs.io/en/stable/regression_test_api.html#reframe.core.pipeline.RegressionTest.extra_resources
if 'memory' not in test.current_partition.resources:
logger = rflog.getlogger()
msg = "Your ReFrame configuration file does not specify any resource called 'memory' for this partition "
msg += f" ({test.current_partition.name})."
msg += " Without this, an explicit memory request cannot be made from the scheduler. This test will run,"
Expand All @@ -538,7 +536,7 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float):
msg += " 'memory' in your ReFrame configuration file for this partition."
msg += " For a SLURM system, one would e.g. define:"
msg += " 'resources': [{'name': 'memory', 'options': ['--mem={size}']}]"
logger.warning(msg)
rflog.getlogger().warning(msg)
# We return, as setting a test.extra_resources is pointless - it would be ignored anyway
# This way, we also don't add any lines to the log that a specific amount of memory was requested
return
Expand All @@ -557,8 +555,12 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float):
log(f"Memory requested by application: {app_mem_req} MiB")
log(f"Memory proportional to the core count: {proportional_mem} MiB")

# Request the maximum of the proportional_mem, and app_mem_req to the scheduler
req_mem_per_node = max(proportional_mem, app_mem_req)
if test.exact_memory:
# Request the exact amount of required memory
req_mem_per_node = app_mem_req
else:
# Request the maximum of the proportional_mem, and app_mem_req to the scheduler
req_mem_per_node = max(proportional_mem, app_mem_req)

test.extra_resources = {'memory': {'size': f'{req_mem_per_node}M'}}
log(f"Requested {req_mem_per_node} MiB per node from the SLURM batch scheduler")
Expand All @@ -580,14 +582,13 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float):
log(f"Requested {req_mem_per_task} MiB per task from the torque batch scheduler")

else:
logger = rflog.getlogger()
msg = "hooks.req_memory_per_node does not support the scheduler you configured"
msg += f" ({test.current_partition.scheduler.registered_name})."
msg += " The test will run, but since it doesn't request the required amount of memory explicitely,"
msg += " it may result in an out-of-memory error."
msg += " Please expand the functionality of hooks.req_memory_per_node for your scheduler."
# Warnings will, at default loglevel, be printed on stdout when executing the ReFrame command
logger.warning(msg)
rflog.getlogger().warning(msg)


def set_modules(test: rfm.RegressionTest):
Expand Down Expand Up @@ -671,14 +672,13 @@ def set_compact_process_binding(test: rfm.RegressionTest):
log(f'Set environment variable SLURM_DISTRIBUTION to {test.env_vars["SLURM_DISTRIBUTION"]}')
log(f'Set environment variable SLURM_CPU_BIND to {test.env_vars["SLURM_CPU_BIND"]}')
else:
logger = rflog.getlogger()
msg = "hooks.set_compact_process_binding does not support the current launcher"
msg += f" ({test.current_partition.launcher_type().registered_name})."
msg += " The test will run, but using the default binding strategy of your parallel launcher."
msg += " This may lead to suboptimal performance."
msg += " Please expand the functionality of hooks.set_compact_process_binding for your parallel launcher."
# Warnings will, at default loglevel, be printed on stdout when executing the ReFrame command
logger.warning(msg)
rflog.getlogger().warning(msg)


def set_compact_thread_binding(test: rfm.RegressionTest):
Expand Down

0 comments on commit 5ce05c3

Please sign in to comment.