Skip to content

Commit

Permalink
Merge pull request EESSI#242 from TopRichard/nessi.no-2023.06-ESSI-Sy…
Browse files Browse the repository at this point in the history
…nc-Review

Took care of all noted changes
  • Loading branch information
trz42 authored Jan 22, 2024
2 parents b6b3352 + 27f47ef commit 870a7b7
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 25 deletions.
2 changes: 1 addition & 1 deletion create_tarball.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ module_files_list=${tmpdir}/module_files.list.txt
if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then
# include Lmod cache and configuration file (lmodrc.lua),
# skip whiteout files and backup copies of Lmod cache (spiderT.old.*)
find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' > ${files_list}
find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' >> ${files_list}
fi

# include scripts that were copied by install_scripts.sh, which we want to ship in EESSI repository
Expand Down
43 changes: 31 additions & 12 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,20 +185,26 @@ def parse_hook_fontconfig_add_fonts(ec, eprefix):


def parse_hook_openblas_relax_lapack_tests_num_errors(ec, eprefix):
"""Relax number of failing numerical LAPACK tests for aarch64/neoverse_v1 CPU target."""
"""Relax number of failing numerical LAPACK tests for aarch64/* CPU targets for OpenBLAS < 0.3.23"""
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if ec.name == 'OpenBLAS':
# relax maximum number of failed numerical LAPACK tests for aarch64/neoverse_v1 CPU target
# since the default setting of 150 that works well on other aarch64 targets and x86_64 is a bit too strict
# See https://github.com/EESSI/software-layer/issues/314
cfg_option = 'max_failing_lapack_tests_num_errors'
if cpu_target == CPU_TARGET_NEOVERSE_V1:
orig_value = ec[cfg_option]
ec[cfg_option] = 400
print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)",
ec.name, ec[cfg_option], orig_value)
else:
print_msg("Not changing option %s for %s on non-AARCH64", cfg_option, ec.name)
if LooseVersion(ec.version) < LooseVersion('0.3.23'):
# relax maximum number of failed numerical LAPACK tests for aarch64/neoverse_v1 CPU target
# since the default setting of 150 that works well on other aarch64 targets and x86_64 is a bit too strict
# See https://github.com/EESSI/software-layer/issues/314
cfg_option = 'max_failing_lapack_tests_num_errors'
if cpu_target == CPU_TARGET_NEOVERSE_V1:
orig_value = ec[cfg_option]
ec[cfg_option] = 400
print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)",
ec.name, ec[cfg_option], orig_value)
elif cpu_target == CPU_TARGET_AARCH64_GENERIC:
orig_value = ec[cfg_option]
ec[cfg_option] = 302
print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)",
ec.name, ec[cfg_option], orig_value) ec.name, ec[cfg_option], orig_value)
else:
print_msg("Not changing option %s for %s on non-AARCH64", cfg_option, ec.name)
else:
raise EasyBuildError("OpenBLAS-specific hook triggered for non-OpenBLAS easyconfig?!")

Expand Down Expand Up @@ -393,6 +399,18 @@ def pre_test_hook_ignore_failing_tests_SciPybundle(self, *args, **kwargs):
if self.name == 'SciPy-bundle' and self.version in scipy_bundle_versions and cpu_target == CPU_TARGET_NEOVERSE_V1:
self.cfg['testopts'] = "|| echo ignoring failing tests"

def pre_test_hook_ignore_failing_tests_netCDF(self, *args, **kwargs):
"""
Pre-test hook for netCDF: skip failing tests for selected netCDF versions on neoverse_v1
cfr. https://github.com/EESSI/software-layer/issues/425
The following tests are problematic:
163 - nc_test4_run_par_test (Timeout)
190 - h5_test_run_par_tests (Timeout)
A few other tests are skipped in the easyconfig and patches for similar issues, see above issue for details.
"""
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if self.name == 'netCDF' and self.version == '4.9.2' and cpu_target == CPU_TARGET_NEOVERSE_V1:
self.cfg['testopts'] = "|| echo ignoring failing tests"

def pre_single_extension_hook(ext, *args, **kwargs):
"""Main pre-extension: trigger custom functions based on software name."""
Expand Down Expand Up @@ -573,6 +591,7 @@ def inject_gpu_property(ec):
'ESPResSo': pre_test_hook_ignore_failing_tests_ESPResSo,
'FFTW.MPI': pre_test_hook_ignore_failing_tests_FFTWMPI,
'SciPy-bundle': pre_test_hook_ignore_failing_tests_SciPybundle,
'netCDF': pre_test_hook_ignore_failing_tests_netCDF,
}

PRE_SINGLE_EXTENSION_HOOKS = {
Expand Down
28 changes: 28 additions & 0 deletions eessi-2023.06-known-issues.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
- aarch64/neoverse_v1:
- ESPResSo-4.2.1-foss-2023a:
- issue: https://github.com/EESSI/software-layer/issues/363
- info: "ESPResSo tests failing due to timeouts"
- FFTW.MPI-3.3.10-gompi-2023a:
- issue: https://github.com/EESSI/software-layer/issues/325
- info: "Flaky FFTW tests, random failures"
- FFTW.MPI-3.3.10-gompi-2023b:
- issue: https://github.com/EESSI/software-layer/issues/325
- info: "Flaky FFTW tests, random failures"
- netCDF-4.9.2-gompi-2023a.eb:
- issue: https://github.com/EESSI/software-layer/issues/425
- info: "netCDF intermittent test failures"
- netCDF-4.9.2-gompi-2023b.eb:
- issue: https://github.com/EESSI/software-layer/issues/425
- info: "netCDF intermittent test failures"
- OpenBLAS-0.3.21-GCC-12.2.0:
- issue: https://github.com/EESSI/software-layer/issues/314
- info: "Increased number of numerical errors in OpenBLAS test suite (344 vs max. 150 on x86_64/*)"
- SciPy-bundle-2023.02-gfbf-2022b:
- issue: https://github.com/EESSI/software-layer/issues/318
- info: "numpy built with -march=armv8.4-a instead of -mcpu=native (no SVE) + 2 failing tests (vs 50005 passed) in scipy test suite"
- SciPy-bundle-2023.07-gfbf-2023a:
- issue: https://github.com/EESSI/software-layer/issues/318
- info: "2 failing tests (vs 54409 passed) in scipy test suite"
- SciPy-bundle-2023.11-gfbf-2023b:
- issue: https://github.com/EESSI/software-layer/issues/318
- info: "2 failing tests (vs 54876 passed) in scipy test suite"
10 changes: 5 additions & 5 deletions eessi_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
# -. initial settings & exit codes
TOPDIR=$(dirname $(realpath $0))

source ${TOPDIR}/scripts/utils.sh
source ${TOPDIR}/scripts/cfg_files.sh
source "${TOPDIR}"/scripts/utils.sh
source "${TOPDIR}"/scripts/cfg_files.sh

# exit codes: bitwise shift codes to allow for combination of exit codes
# ANY_ERROR_EXITCODE is sourced from ${TOPDIR}/scripts/utils.sh
Expand Down Expand Up @@ -83,7 +83,7 @@ display_help() {
echo " MODE==run (run a script or command) [default: shell]"
echo " -n | --nvidia MODE - configure the container to work with NVIDIA GPUs,"
echo " MODE==install for a CUDA installation, MODE==run to"
echo " attach a GPU, MODE==all for both [default: false]"
echo " attach a GPU, MODE==all for both [default: false]"
echo " -r | --repository CFG - configuration file or identifier defining the"
echo " repository to use [default: EESSI via"
echo " default container, see --container]"
Expand Down Expand Up @@ -164,7 +164,7 @@ while [[ $# -gt 0 ]]; do
SETUP_NVIDIA=1
NVIDIA_MODE="$2"
shift 2
;;
;;
-r|--repository)
REPOSITORY="$2"
shift 2
Expand Down Expand Up @@ -575,7 +575,7 @@ fi
declare -a EESSI_FUSE_MOUNTS=()

# always mount cvmfs-config repo (to get access to software.eessi.io)
# EESSI_FUSE_MOUNTS+=("--fusemount" "container:cvmfs2 cvmfs-config.cern.ch /cvmfs/cvmfs-config.cern.ch")
# Commented out intentionally EESSI_FUSE_MOUNTS+=("--fusemount" "container:cvmfs2 cvmfs-config.cern.ch /cvmfs/cvmfs-config.cern.ch")

if [[ "${ACCESS}" == "ro" ]]; then
export EESSI_READONLY="container:cvmfs2 ${repo_name} /cvmfs/${repo_name}"
Expand Down
2 changes: 1 addition & 1 deletion init/bash
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ if [ $? -eq 0 ]; then
source $EESSI_EPREFIX/usr/share/Lmod/init/bash

# prepend location of modules for EESSI software stack to $MODULEPATH
echo "Prepending $EESSI_MODULEPATH to \$MODULEPATH..." >> $output
show_msg "Prepending $EESSI_MODULEPATH to \$MODULEPATH..." >> $output
module use $EESSI_MODULEPATH

#show_msg ""
Expand Down
2 changes: 1 addition & 1 deletion init/eessi_archdetect.sh
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ cpupath(){
# each flag in this CPU specification must be found in the list of flags of the host
check_allinfirst "${cpu_flags[*]}" ${arch_spec[2]} && best_arch_match=${arch_spec[0]} && \
all_arch_matches="$best_arch_match:$all_arch_matches" && \
log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match"
log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match"
fi
done

Expand Down
10 changes: 5 additions & 5 deletions init/eessi_environment_variables
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
# $BASH_SOURCE points to correct path, see also http://mywiki.wooledge.org/BashFAQ/028
EESSI_INIT_DIR_PATH=$(dirname $(realpath $BASH_SOURCE))

function error() {
echo -e "\e[31mERROR: $1\e[0m" >&2
false
}

function show_msg {
# only echo msg if EESSI_SILENT is unset
msg=$1
Expand All @@ -10,11 +15,6 @@ function show_msg {
fi
}

function error() {
echo -e "\e[31mERROR: $1\e[0m" >&2
false
}

# set up minimal environment: $EESSI_PREFIX, $EESSI_VERSION, $EESSI_OS_TYPE, $EESSI_CPU_FAMILY, $EPREFIX
source $EESSI_INIT_DIR_PATH/minimal_eessi_env

Expand Down

0 comments on commit 870a7b7

Please sign in to comment.