Skip to content

Commit

Permalink
Merge pull request EESSI#682 from casparvl/fix_missing_num_cores_per_…
Browse files Browse the repository at this point in the history
…numa_node_in_test_step

Use ReFrame's CPU autodetect in test step
  • Loading branch information
bedroge authored Sep 6, 2024
2 parents 9ce35c2 + c6e0cc2 commit 66219a9
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 42 deletions.
11 changes: 2 additions & 9 deletions reframe_config_bot.py.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,13 @@ site_configuration = {
'modules_system': 'lmod',
'partitions': [
{
'name': 'default',
'name': '__RFM_PARTITION__',
'scheduler': 'local',
'launcher': 'mpirun',
'environs': ['default'],
'features': [
FEATURES[CPU]
] + list(SCALES.keys()),
'processor': {
'num_cpus': __NUM_CPUS__,
'num_sockets': __NUM_SOCKETS__,
'num_cpus_per_core': __NUM_CPUS_PER_CORE__,
'num_cpus_per_socket': __NUM_CPUS_PER_SOCKET__,
},
'resources': [
{
'name': 'memory',
Expand Down Expand Up @@ -56,8 +50,7 @@ site_configuration = {
{
'purge_environment': True,
'resolve_module_conflicts': False, # avoid loading the module before submitting the job
# disable automatic detection of CPU architecture (since we're using local scheduler)
'remote_detect': False,
'remote_detect': True,
}
],
'logging': common_logging_config(),
Expand Down
38 changes: 5 additions & 33 deletions test_suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -141,34 +141,6 @@ export RFM_PREFIX=$PWD/reframe_runs
echo "Configured reframe with the following environment variables:"
env | grep "RFM_"

# Inject correct CPU/memory properties into the ReFrame config file
echo "Collecting system-specific input for the ReFrame configuration file"
cpuinfo=$(lscpu)
if [[ "${cpuinfo}" =~ CPU\(s\):[^0-9]*([0-9]+) ]]; then
cpu_count=${BASH_REMATCH[1]}
echo "Detected CPU count: ${cpu_count}"
else
fatal_error "Failed to get the number of CPUs for the current test hardware with lscpu."
fi
if [[ "${cpuinfo}" =~ Socket\(s\):[^0-9]*([0-9]+) ]]; then
socket_count=${BASH_REMATCH[1]}
echo "Detected socket count: ${socket_count}"
else
fatal_error "Failed to get the number of sockets for the current test hardware with lscpu."
fi
if [[ "${cpuinfo}" =~ (Thread\(s\) per core:[^0-9]*([0-9]+)) ]]; then
threads_per_core=${BASH_REMATCH[2]}
echo "Detected threads per core: ${threads_per_core}"
else
fatal_error "Failed to get the number of threads per core for the current test hardware with lscpu."
fi
if [[ "${cpuinfo}" =~ (Core\(s\) per socket:[^0-9]*([0-9]+)) ]]; then
cores_per_socket=${BASH_REMATCH[2]}
echo "Detected cores per socket: ${cores_per_socket}"
else
fatal_error "Failed to get the number of cores per socket for the current test hardware with lscpu."
fi

# The /sys inside the container is not the same as the /sys of the host
# We want to extract the memory limit from the cgroup on the host (which is typically set by SLURM).
# Thus, bot/test.sh bind-mounts the host's /sys/fs/cgroup into /hostsys/fs/cgroup
Expand Down Expand Up @@ -201,13 +173,13 @@ else
fi
echo "Detected available memory: ${cgroup_mem_mib} MiB"

echo "Replacing detected system information in template ReFrame config file..."
cp ${RFM_CONFIG_FILE_TEMPLATE} ${RFM_CONFIG_FILES}
sed -i "s/__NUM_CPUS__/${cpu_count}/g" $RFM_CONFIG_FILES
sed -i "s/__NUM_SOCKETS__/${socket_count}/g" $RFM_CONFIG_FILES
sed -i "s/__NUM_CPUS_PER_CORE__/${threads_per_core}/g" $RFM_CONFIG_FILES
sed -i "s/__NUM_CPUS_PER_SOCKET__/${cores_per_socket}/g" $RFM_CONFIG_FILES
echo "Replacing memory limit in the ReFrame config file with the detected CGROUP memory limit: ${cgroup_mem_mib} MiB"
sed -i "s/__MEM_PER_NODE__/${cgroup_mem_mib}/g" $RFM_CONFIG_FILES
RFM_PARTITION="${SLURM_JOB_PARTITION}"
echo "Replacing partition name in the template ReFrame config file: ${RFM_PARTITION}"
sed -i "s/__RFM_PARTITION__/${RFM_PARTITION}/g" $RFM_CONFIG_FILES

# Make debugging easier by printing the final config file:
echo "Final config file (after replacements):"
cat "${RFM_CONFIG_FILES}"
Expand Down

0 comments on commit 66219a9

Please sign in to comment.