Skip to content

Commit

Permalink
Merge branch 'main' into osu
Browse files Browse the repository at this point in the history
  • Loading branch information
Satish Kamath committed Nov 23, 2023
2 parents 3a32f0e + 02d4702 commit 693e4ef
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 39 deletions.
33 changes: 19 additions & 14 deletions eessi/testsuite/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,23 @@
}

SCALES = {
# required keys:
# - num_nodes
# - either node_part or (num_cpus_per_node and num_gpus_per_node)
'1_core': {'num_nodes': 1, 'num_cpus_per_node': 1, 'num_gpus_per_node': 1},
'2_cores': {'num_nodes': 1, 'num_cpus_per_node': 2, 'num_gpus_per_node': 1},
'4_cores': {'num_nodes': 1, 'num_cpus_per_node': 4, 'num_gpus_per_node': 1},
'1_8_node': {'num_nodes': 1, 'node_part': 8}, # 1/8 node
'1_4_node': {'num_nodes': 1, 'node_part': 4}, # 1/4 node
'1_2_node': {'num_nodes': 1, 'node_part': 2}, # 1/2 node
'1_node': {'num_nodes': 1, 'node_part': 1},
'2_nodes': {'num_nodes': 2, 'node_part': 1},
'4_nodes': {'num_nodes': 4, 'node_part': 1},
'8_nodes': {'num_nodes': 8, 'node_part': 1},
'16_nodes': {'num_nodes': 16, 'node_part': 1},
# required keys:
# - num_nodes
# - either node_part or (num_cpus_per_node and num_gpus_per_node)
# num_cpus_per_node and num_gpus_per_node are upper limits:
# the actual count depends on the specific configuration of cores, gpus, and sockets within the node,
# as well as the specific test being carried out.
'1_core': {'num_nodes': 1, 'num_cpus_per_node': 1, 'num_gpus_per_node': 1},
'2_cores': {'num_nodes': 1, 'num_cpus_per_node': 2, 'num_gpus_per_node': 1},
'4_cores': {'num_nodes': 1, 'num_cpus_per_node': 4, 'num_gpus_per_node': 1},
'1_cpn_2_nodes': {'num_nodes': 2, 'num_cpus_per_node': 1, 'num_gpus_per_node': 1},
'1_cpn_4_nodes': {'num_nodes': 4, 'num_cpus_per_node': 1, 'num_gpus_per_node': 1},
'1_8_node': {'num_nodes': 1, 'node_part': 8}, # 1/8 node
'1_4_node': {'num_nodes': 1, 'node_part': 4}, # 1/4 node
'1_2_node': {'num_nodes': 1, 'node_part': 2}, # 1/2 node
'1_node': {'num_nodes': 1, 'node_part': 1},
'2_nodes': {'num_nodes': 2, 'node_part': 1},
'4_nodes': {'num_nodes': 4, 'node_part': 1},
'8_nodes': {'num_nodes': 8, 'node_part': 1},
'16_nodes': {'num_nodes': 16, 'node_part': 1},
}
63 changes: 38 additions & 25 deletions eessi/testsuite/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,32 @@

import reframe as rfm

from eessi.testsuite.constants import *
from eessi.testsuite.utils import get_max_avail_gpus_per_node, is_cuda_required_module, log, check_proc_attribute_defined
from eessi.testsuite.constants import * # noqa
from eessi.testsuite.utils import (get_max_avail_gpus_per_node, is_cuda_required_module, log,
check_proc_attribute_defined)


def assign_default_num_cpus_per_node(test: rfm.RegressionTest):
"""
Check if the default number of cpus per node is already defined in the test
(e.g. by earlier hooks like set_tag_scale).
If so, check if it doesn't exceed the maximum available.
If not, set default_num_cpus_per_node based on the maximum available cpus and node_part
"""

if test.default_num_cpus_per_node:
# may skip if not enough CPUs
test.skip_if(
test.default_num_cpus_per_node > test.max_avail_cpus_per_node,
f'Requested CPUs per node ({test.default_num_cpus_per_node}) is higher than max available'
f' ({test.max_avail_cpus_per_node}) in current partition ({test.current_partition.name}).'
)
else:
# no default set yet, so setting one
test.default_num_cpus_per_node = int(test.max_avail_cpus_per_node / test.node_part)

log(f'default_num_cpus_per_node set to {test.default_num_cpus_per_node}')


def assign_one_task_per_compute_unit(test: rfm.RegressionTest, compute_unit: str):
"""
Expand All @@ -28,10 +52,10 @@ def assign_one_task_per_compute_unit(test: rfm.RegressionTest, compute_unit: str
- assign_one_task_per_compute_unit(test, COMPUTE_UNIT[CPU]) will launch 64 tasks with 1 thread
- assign_one_task_per_compute_unit(test, COMPUTE_UNIT[CPU_SOCKET]) will launch 2 tasks with 32 threads per task
Future work:
Future work:
Currently, on a single node with 2 sockets, 64 cores and 128 hyperthreads, this
- assign_one_task_per_compute_unit(test, COMPUTE_UNIT[CPU], true) will launch 128 tasks with 1 thread
- assign_one_task_per_compute_unit(test, COMPUTE_UNIT[CPU_SOCKET], true) will launch 2 tasks with 64 threads per task
- assign_one_task_per_compute_unit(test, COMPUTE_UNIT[CPU], true) launches 128 tasks with 1 thread
- assign_one_task_per_compute_unit(test, COMPUTE_UNIT[CPU_SOCKET], true) launches 2 tasks with 64 threads per task
In the future, we'd like to add an arugment that disables spawning tasks for hyperthreads.
"""
check_proc_attribute_defined(test, 'num_cpus')
Expand All @@ -48,22 +72,8 @@ def assign_one_task_per_compute_unit(test: rfm.RegressionTest, compute_unit: str
f' default num_gpus_per_node ({test.default_num_gpus_per_node}) must be defined and have integer values.'
)

# Check if the default number of cpus per node is already defined in the test
# (e.g. by earlier hooks like set_tag_scale).
# If so, check if it doesn't exceed the maximum available.
# If not, set default_num_cpus_per_node based on the maximum available cpus and node_part
if test.default_num_cpus_per_node:
# may skip if not enough CPUs
test.skip_if(
test.default_num_cpus_per_node > test.max_avail_cpus_per_node,
f'Requested CPUs per node ({test.default_num_cpus_per_node}) is higher than max available'
f' ({test.max_avail_cpus_per_node}) in current partition ({test.current_partition.name}).'
)
else:
# no default set yet, so setting one
test.default_num_cpus_per_node = int(test.max_avail_cpus_per_node / test.node_part)
assign_default_num_cpus_per_node(test)

log(f'default_num_cpus_per_node set to {test.default_num_cpus_per_node}')

if compute_unit == COMPUTE_UNIT[GPU]:
_assign_one_task_per_gpu(test)
Expand All @@ -74,21 +84,22 @@ def assign_one_task_per_compute_unit(test: rfm.RegressionTest, compute_unit: str
else:
raise ValueError(f'compute unit {compute_unit} is currently not supported')


def _assign_one_task_per_cpu_socket(test: rfm.RegressionTest):
"""
Determines the number of tasks per node by dividing the default_num_cpus_per_node by
the number of cpus available per socket, and rounding up. The result is that for full-node jobs the default
the number of cpus available per socket, and rounding up. The result is that for full-node jobs the default
will spawn one task per socket, with a number of cpus per task equal to the number of cpus per socket.
Other examples:
- half a node (i.e. node_part=2) on a 4-socket system would result in 2 tasks per node,
with number of cpus per task equal to the number of cpus per socket.
- 2 cores (i.e. default_num_cpus_per_node=2) on a 16 core system with 2 sockets would result in
- 2 cores (i.e. default_num_cpus_per_node=2) on a 16 core system with 2 sockets would result in
1 task per node, with 2 cpus per task
This default is set unless the test is run with:
--setvar num_tasks_per_node=<x> and/or
--setvar num_cpus_per_task=<y>.
In those cases, those take precedence, and the remaining variable (num_cpus_per task or
In those cases, those take precedence, and the remaining variable (num_cpus_per task or
num_tasks_per_node respectively) is calculated based on the equality
test.num_tasks_per_node * test.num_cpus_per_task == test.default_num_cpus_per_node.
Expand Down Expand Up @@ -128,6 +139,7 @@ def _assign_one_task_per_cpu_socket(test: rfm.RegressionTest):
log(f'Number of cpus per task set to {test.num_cpus_per_task}')
log(f'num_tasks set to {test.num_tasks}')


def _assign_one_task_per_cpu(test: rfm.RegressionTest):
"""
Sets num_tasks_per_node and num_cpus_per_task such that it will run one task per core,
Expand Down Expand Up @@ -335,7 +347,8 @@ def set_compact_process_binding(test: rfm.RegressionTest):
# and when number of tasks times cpus per task equals either socket, core or thread count
test.env_vars['SLURM_CPU_BIND'] = 'verbose'
log(f'Set environment variable I_MPI_PIN_DOMAIN to {test.env_vars["I_MPI_PIN_DOMAIN"]}')
log(f'Set environment variable OMPI_MCA_rmaps_base_mapping_policy to {test.env_vars["OMPI_MCA_rmaps_base_mapping_policy"]}')
log('Set environment variable OMPI_MCA_rmaps_base_mapping_policy to '
f'{test.env_vars["OMPI_MCA_rmaps_base_mapping_policy"]}')
log(f'Set environment variable SLURM_CPU_BIND to {test.env_vars["SLURM_CPU_BIND"]}')


Expand All @@ -352,7 +365,7 @@ def set_compact_thread_binding(test: rfm.RegressionTest):
# Set thread binding
test.env_vars['OMP_PLACES'] = 'cores'
test.env_vars['OMP_PROC_BIND'] = 'close'
# See https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/thread-affinity-interface.html
# See https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/thread-affinity-interface.html # noqa
test.env_vars['KMP_AFFINITY'] = 'granularity=fine,compact,1,0'
log(f'Set environment variable OMP_PLACES to {test.env_vars["OMP_PLACES"]}')
log(f'Set environment variable OMP_PROC_BIND to {test.env_vars["OMP_PROC_BIND"]}')
Expand Down

0 comments on commit 693e4ef

Please sign in to comment.