Skip to content

Commit

Permalink
Merge branch 'nessi.no-2023.06' of gh-nessi:NorESSI/software-layer in…
Browse files Browse the repository at this point in the history
…to nessi.no-2023.06-EasyBuild/4.9.0/system
  • Loading branch information
Richard Top committed Jan 22, 2024
2 parents 780baac + 870a7b7 commit aa40a3f
Show file tree
Hide file tree
Showing 15 changed files with 173 additions and 200 deletions.
Original file line number Diff line number Diff line change
@@ -1,80 +1,66 @@
# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions
name: Tests relying on having EESSI pilot repo mounted
name: Check for missing software installations in pilot.nessi.no
on: [push, pull_request, workflow_dispatch]
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
pilot:
runs-on: ubuntu-20.04
check_missing:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
EESSI_VERSION:
- 2023.06
EESSI_SOFTWARE_SUBDIR:
EESSI_SOFTWARE_SUBDIR_OVERRIDE:
# - aarch64/generic
- x86_64/amd/zen2
- x86_64/intel/broadwell
# - x86_64/intel/cascadelake
- x86_64/intel/skylake_avx512
- x86_64/generic
EASYSTACK_FILE:
- eessi-2023.06-eb-4.7.2-2021a.yml
- eessi-2023.06-eb-4.7.2-2021b.yml
- eessi-2023.06-eb-4.7.2-2022a.yml
- eessi-2023.06-eb-4.7.2-2022b.yml
- eessi-2023.06-eb-4.7.2-system.yml
- eessi-2023.06-eb-4.8.0-system.yml
- eessi-2023.06-eb-4.8.1-2022a.yml
- eessi-2023.06-eb-4.8.1-system.yml
- eessi-2023.06-eb-4.8.2-2022a.yml
steps:
- name: Check out software-layer repository
uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0

- name: Mount EESSI CernVM-FS pilot repository
- name: Mount NESSI CernVM-FS repository
uses: cvmfs-contrib/github-action-cvmfs@d4641d0d591c9a5c3be23835ced2fb648b44c04b # v3.1
with:
cvmfs_config_package: https://github.com/NorESSI/filesystem-layer/releases/download/latest/cvmfs-config-nessi_latest_all.deb
cvmfs_http_proxy: DIRECT
cvmfs_repositories: pilot.nessi.no

- name: Test check_missing_installations.sh script with EESSI_SOFTWARE_SUBDIR_OVERRIDE
if: '!cancelled()'
run: |
export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR}}
source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash
module load EasyBuild
eb --version
export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}
export EESSI_OS_TYPE=linux
env | grep ^EESSI | sort
echo "just run check_missing_installations.sh (should use ${{matrix.EASYSTACK_FILE}})"
./check_missing_installations.sh ${{matrix.EASYSTACK_FILE}}
- name: Test check_missing_installations.sh script without EESSI_SOFTWARE_SUBDIR_OVERRIDE
if: '!cancelled()'
- name: Test check_missing_installations.sh script
run: |
export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}}
source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash
# set $EESSI_CPU_FAMILY to the CPU architecture that corresponds to $EESSI_SOFTWARE_SUBDIR_OVERRIDE (part before the first slash),
# to prevent issues with checks in the Easybuild configuration that use this variable
export EESSI_CPU_FAMILY=${EESSI_SOFTWARE_SUBDIR_OVERRIDE%%/*}
module load EasyBuild
which eb
eb --version
export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}
export EESSI_OS_TYPE=linux
export EESSI_SOFTWARE_SUBDIR=${{matrix.EESSI_SOFTWARE_SUBDIR}}
env | grep ^EESSI | sort
echo "just run check_missing_installations.sh (should use ${{matrix.EASYSTACK_FILE}})"
./check_missing_installations.sh ${{matrix.EASYSTACK_FILE}}
echo "just run check_missing_installations.sh (should use easystacks/pilot.nessi.no/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml)"
for easystack_file in $(ls easystacks/pilot.nessi.no/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do
echo "check missing installations for ${easystack_file}..."
./check_missing_installations.sh ${easystack_file}
ec=$?
if [[ ${ec} -ne 0 ]]; then echo "missing installations found for ${easystack_file}!" >&2; exit ${ec}; fi
done
- name: Test check_missing_installations.sh with missing package (GCC/8.3.0)
if: '!cancelled()'
run: |
export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}}
source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash
# set $EESSI_CPU_FAMILY to the CPU architecture that corresponds to $EESSI_SOFTWARE_SUBDIR_OVERRIDE (part before the first slash),
# to prevent issues with checks in the Easybuild configuration that use this variable
export EESSI_CPU_FAMILY=${EESSI_SOFTWARE_SUBDIR_OVERRIDE%%/*}
module load EasyBuild
which eb
eb --version
export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}
export EESSI_OS_TYPE=linux
export EESSI_SOFTWARE_SUBDIR=${{matrix.EESSI_SOFTWARE_SUBDIR}}
env | grep ^EESSI | sort
# create dummy easystack file with a single entry (something that is not installed in EESSI)
easystack_file="test.yml"
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/test_eessi_container_script.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
elif [[ ${{matrix.SCRIPT_TEST}} == 'listrepos_default' ]]; then
outfile=out_listrepos.txt
./eessi_container.sh --verbose --list-repos | tee ${outfile}
grep "EESSI-pilot" ${outfile}
grep "EESSI" ${outfile}
# test use of --list-repos with custom repos.cfg
elif [[ ${{matrix.SCRIPT_TEST}} == 'listrepos_custom' ]]; then
Expand All @@ -57,7 +57,7 @@ jobs:
echo "[EESSI/20HT.TP]" >> cfg/repos.cfg
echo "repo_version = 20HT.TP" >> cfg/repos.cfg
./eessi_container.sh --verbose --list-repos | tee ${outfile}
grep "EESSI-pilot" ${outfile}
grep "EESSI" ${outfile}
export EESSI_REPOS_CFG_DIR_OVERRIDE=${PWD}/cfg
./eessi_container.sh --verbose --list-repos | tee ${outfile2}
Expand Down Expand Up @@ -90,15 +90,15 @@ jobs:
elif [[ ${{matrix.SCRIPT_TEST}} == 'readwrite' ]]; then
outfile=out_readwrite.txt
fn="test_${RANDOM}.txt"
echo "touch /cvmfs/pilot.eessi-hpc.org/${fn}" > test_script.sh
echo "touch /cvmfs/pilot.nessi.no/${fn}" > test_script.sh
chmod u+x test_script.sh
export SINGULARITY_BIND="$PWD:/test"
./eessi_container.sh --verbose --access rw --mode run /test/test_script.sh > ${outfile}
tmpdir=$(grep "\-\-resume" ${outfile} | sed "s/.*--resume \([^']*\).*/\1/g")
# note: must use '--access rw' again here, since touched file is in overlay upper dir
./eessi_container.sh --verbose --resume ${tmpdir} --access rw --mode shell <<< "ls -l /cvmfs/pilot.eessi-hpc.org/${fn}" > ${outfile}
grep "/cvmfs/pilot.eessi-hpc.org/${fn}$" $outfile
./eessi_container.sh --verbose --resume ${tmpdir} --access rw --mode shell <<< "ls -l /cvmfs/pilot.nessi.no/${fn}" > ${outfile}
grep "/cvmfs/pilot.nessi.no/${fn}$" $outfile
# test use of --resume
elif [[ ${{matrix.SCRIPT_TEST}} == 'resume' ]]; then
Expand All @@ -120,12 +120,12 @@ jobs:
elif [[ ${{matrix.SCRIPT_TEST}} == 'save' ]]; then
outfile=out_save.txt
fn="test_${RANDOM}.txt"
test_cmd="touch /cvmfs/pilot.eessi-hpc.org/${fn}"
test_cmd="touch /cvmfs/pilot.nessi.no/${fn}"
./eessi_container.sh --verbose --mode shell --access rw --save test-save.tar <<< "${test_cmd}" 2>&1 | tee ${outfile}
rm -f ${outfile}
./eessi_container.sh --verbose --mode shell --access rw --resume test-save.tar <<< "ls -l /cvmfs/pilot.eessi-hpc.org/${fn}" > ${outfile}
grep "/cvmfs/pilot.eessi-hpc.org/${fn}$" $outfile
./eessi_container.sh --verbose --mode shell --access rw --resume test-save.tar <<< "ls -l /cvmfs/pilot.nessi.no/${fn}" > ${outfile}
grep "/cvmfs/pilot.nessi.no/${fn}$" $outfile
tar tfv test-save.tar | grep "overlay-upper/${fn}"
Expand Down
20 changes: 20 additions & 0 deletions .github/workflows/test_licenses.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions
name: Test software licenses
on: [push, pull_request]
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build:
runs-on: ubuntu-20.04
steps:
- name: Check out software-layer repository
uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0

- name: set up Python
uses: actions/setup-python@13ae5bb136fac2878aff31522b9efb785519f984 # v4.3.0
with:
python-version: '3.9'

- name: Check software licenses
run: |
python licenses/spdx.py licenses/licenses.json
45 changes: 40 additions & 5 deletions .github/workflows/tests_archdetect.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,59 @@ jobs:
- x86_64/intel/skylake_avx512/archspec-linux-6132
- x86_64/amd/zen2/Azure-CentOS7-7V12
- x86_64/amd/zen3/Azure-CentOS7-7V73X
- ppc64le/power9le/unknown-power9le
- aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra
- aarch64/arm/neoverse-n1/AWS-awslinux-graviton2
- aarch64/arm/neoverse-v1/AWS-awslinux-graviton3
# commented out since these targets are currently not supported in pilot.nessi.no repo
# (and some tests assume that the corresponding subdirectory in software layer is there)
# - ppc64le/power9le/unknown-power9le
# - aarch64/neoverse-n1/Azure-Ubuntu20-Altra
# - aarch64/neoverse-n1/AWS-awslinux-graviton2
# - aarch64/neoverse-v1/AWS-awslinux-graviton3
fail-fast: false
steps:
- name: checkout
uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0

- name: Mount NESSI CernVM-FS repository
uses: cvmfs-contrib/github-action-cvmfs@d4641d0d591c9a5c3be23835ced2fb648b44c04b # v3.1
with:
cvmfs_config_package: https://github.com/NorESSI/filesystem-layer/releases/download/latest/cvmfs-config-nessi_latest_all.deb
cvmfs_http_proxy: DIRECT
cvmfs_repositories: pilot.nessi.no

- name: test eessi_archdetect.sh
run: |
export EESSI_MACHINE_TYPE=${{matrix.proc_cpuinfo}}
export EESSI_MACHINE_TYPE=${EESSI_MACHINE_TYPE%%/*}
export EESSI_PROC_CPUINFO=./tests/archdetect/${{matrix.proc_cpuinfo}}.cpuinfo
# check that printing of best match works correctly
CPU_ARCH=$(./init/eessi_archdetect.sh cpupath)
if [[ $CPU_ARCH == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.output )" ]]; then
echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCH" >&2
echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCH"
else
echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCH" >&2
exit 1
fi
# check that $EESSI_SOFTWARE_SUBDIR_OVERRIDE is honored
export EESSI_SOFTWARE_SUBDIR_OVERRIDE='dummy/cpu'
CPU_ARCH=$(./init/eessi_archdetect.sh cpupath)
if [[ $CPU_ARCH == "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then
echo "Test for picking up on \$EESSI_SOFTWARE_SUBDIR_OVERRIDE PASSED"
else
echo "Test for picking up on \$EESSI_SOFTWARE_SUBDIR_OVERRIDE FAILED" >&2
exit 1
fi
unset EESSI_SOFTWARE_SUBDIR_OVERRIDE
# check that printing of all matches works correctly (-a option for cpupath action)
CPU_ARCHES=$(./init/eessi_archdetect.sh -a cpupath)
if [[ $CPU_ARCHES == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.all.output )" ]]; then
echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCHES"
else
echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCHES" >&2
exit 1
fi
# Check all those architectures actually exist (if this EESSI version has been populated already)
if [ -d ${EESSI_PREFIX}/software/linux ]; then
for dir in $(echo "$CPU_ARCHES" | tr ':' '\n'); do
# Search all EESSI versions as we may drop support at some point
ls -d ${EESSI_PREFIX}/software/linux/${dir}
done
fi
6 changes: 3 additions & 3 deletions .github/workflows/tests_scripts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
paths:
- build_container.sh
- create_directory_tarballs.sh
- EESSI-pilot-install-software.sh
- EESSI-install-software.sh
- install_software_layer.sh
- load_easybuild_module.sh
- run_in_compat_layer_env.sh
Expand All @@ -16,7 +16,7 @@ on:
paths:
- build_container.sh
- create_directory_tarballs.sh
- EESSI-pilot-install-software.sh
- EESSI-install-software.sh
- install_software_layer.sh
- load_easybuild_module.sh
- run_in_compat_layer_env.sh
Expand All @@ -40,7 +40,7 @@ jobs:
# bind current directory into container as /software-layer
export SINGULARITY_BIND="${PWD}:/software-layer"
# can't test with EasyBuild versions older than v4.5.2 when using EESSI pilot 2023.06,
# can't test with EasyBuild versions older than v4.5.2 when using EESSI 2023.06,
# since Python in compat layer is Python 3.11.x;
# testing with a single EasyBuild version takes a while in GitHub Actions, so stick to a single sensible version
for EB_VERSION in '4.6.0'; do
Expand Down
2 changes: 1 addition & 1 deletion create_tarball.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ module_files_list=${tmpdir}/module_files.list.txt
if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then
# include Lmod cache and configuration file (lmodrc.lua),
# skip whiteout files and backup copies of Lmod cache (spiderT.old.*)
find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' > ${files_list}
find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' >> ${files_list}
fi

# include scripts that were copied by install_scripts.sh, which we want to ship in EESSI repository
Expand Down
7 changes: 7 additions & 0 deletions easystacks/pilot.nessi.no/2023.06/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
File naming matters, since it determines the order in which easystack files are processed.

Software installed with system toolchain should be installed first,
this includes EasyBuild itself, see `eessi-2023.06-eb-4.8.2-001-system.yml` .

CUDA installations must be done before CUDA is required as dependency for something
built with a non-system toolchain, see `eessi-2023.06-eb-4.8.2-010-CUDA.yml` .
43 changes: 31 additions & 12 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,20 +185,26 @@ def parse_hook_fontconfig_add_fonts(ec, eprefix):


def parse_hook_openblas_relax_lapack_tests_num_errors(ec, eprefix):
"""Relax number of failing numerical LAPACK tests for aarch64/neoverse_v1 CPU target."""
"""Relax number of failing numerical LAPACK tests for aarch64/* CPU targets for OpenBLAS < 0.3.23"""
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if ec.name == 'OpenBLAS':
# relax maximum number of failed numerical LAPACK tests for aarch64/neoverse_v1 CPU target
# since the default setting of 150 that works well on other aarch64 targets and x86_64 is a bit too strict
# See https://github.com/EESSI/software-layer/issues/314
cfg_option = 'max_failing_lapack_tests_num_errors'
if cpu_target == CPU_TARGET_NEOVERSE_V1:
orig_value = ec[cfg_option]
ec[cfg_option] = 400
print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)",
ec.name, ec[cfg_option], orig_value)
else:
print_msg("Not changing option %s for %s on non-AARCH64", cfg_option, ec.name)
if LooseVersion(ec.version) < LooseVersion('0.3.23'):
# relax maximum number of failed numerical LAPACK tests for aarch64/neoverse_v1 CPU target
# since the default setting of 150 that works well on other aarch64 targets and x86_64 is a bit too strict
# See https://github.com/EESSI/software-layer/issues/314
cfg_option = 'max_failing_lapack_tests_num_errors'
if cpu_target == CPU_TARGET_NEOVERSE_V1:
orig_value = ec[cfg_option]
ec[cfg_option] = 400
print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)",
ec.name, ec[cfg_option], orig_value)
elif cpu_target == CPU_TARGET_AARCH64_GENERIC:
orig_value = ec[cfg_option]
ec[cfg_option] = 302
print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)",
ec.name, ec[cfg_option], orig_value) ec.name, ec[cfg_option], orig_value)
else:
print_msg("Not changing option %s for %s on non-AARCH64", cfg_option, ec.name)
else:
raise EasyBuildError("OpenBLAS-specific hook triggered for non-OpenBLAS easyconfig?!")

Expand Down Expand Up @@ -393,6 +399,18 @@ def pre_test_hook_ignore_failing_tests_SciPybundle(self, *args, **kwargs):
if self.name == 'SciPy-bundle' and self.version in scipy_bundle_versions and cpu_target == CPU_TARGET_NEOVERSE_V1:
self.cfg['testopts'] = "|| echo ignoring failing tests"

def pre_test_hook_ignore_failing_tests_netCDF(self, *args, **kwargs):
"""
Pre-test hook for netCDF: skip failing tests for selected netCDF versions on neoverse_v1
cfr. https://github.com/EESSI/software-layer/issues/425
The following tests are problematic:
163 - nc_test4_run_par_test (Timeout)
190 - h5_test_run_par_tests (Timeout)
A few other tests are skipped in the easyconfig and patches for similar issues, see above issue for details.
"""
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if self.name == 'netCDF' and self.version == '4.9.2' and cpu_target == CPU_TARGET_NEOVERSE_V1:
self.cfg['testopts'] = "|| echo ignoring failing tests"

def pre_single_extension_hook(ext, *args, **kwargs):
"""Main pre-extension: trigger custom functions based on software name."""
Expand Down Expand Up @@ -573,6 +591,7 @@ def inject_gpu_property(ec):
'ESPResSo': pre_test_hook_ignore_failing_tests_ESPResSo,
'FFTW.MPI': pre_test_hook_ignore_failing_tests_FFTWMPI,
'SciPy-bundle': pre_test_hook_ignore_failing_tests_SciPybundle,
'netCDF': pre_test_hook_ignore_failing_tests_netCDF,
}

PRE_SINGLE_EXTENSION_HOOKS = {
Expand Down
Loading

0 comments on commit aa40a3f

Please sign in to comment.