Took care of all noted changes

TopRichard · Jan 22, 2024 · 50aff09 · 50aff09
1 parent b6b3352
commit 50aff09
Show file tree

Hide file tree

Showing 7 changed files with 71 additions and 24 deletions.
diff --git a/create_tarball.sh b/create_tarball.sh
@@ -43,7 +43,7 @@ module_files_list=${tmpdir}/module_files.list.txt
 if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then
     # include Lmod cache and configuration file (lmodrc.lua),
     # skip whiteout files and backup copies of Lmod cache (spiderT.old.*)
-    find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' > ${files_list}
+    find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' >> ${files_list}
 fi
 
 # include scripts that were copied by install_scripts.sh, which we want to ship in EESSI repository

diff --git a/eb_hooks.py b/eb_hooks.py
@@ -185,20 +185,26 @@ def parse_hook_fontconfig_add_fonts(ec, eprefix):
 
 
 def parse_hook_openblas_relax_lapack_tests_num_errors(ec, eprefix):
-    """Relax number of failing numerical LAPACK tests for aarch64/neoverse_v1 CPU target."""
+    """Relax number of failing numerical LAPACK tests for aarch64/neoverse_v1 CPU target for OpenBLAS < 0.3.23"""
     cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
     if ec.name == 'OpenBLAS':
-        # relax maximum number of failed numerical LAPACK tests for aarch64/neoverse_v1 CPU target
-        # since the default setting of 150 that works well on other aarch64 targets and x86_64 is a bit too strict
-        # See https://github.com/EESSI/software-layer/issues/314
-        cfg_option = 'max_failing_lapack_tests_num_errors'
-        if cpu_target == CPU_TARGET_NEOVERSE_V1:
-            orig_value = ec[cfg_option]
-            ec[cfg_option] = 400
-            print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)",
-                      ec.name, ec[cfg_option], orig_value)
-        else:
-            print_msg("Not changing option %s for %s on non-AARCH64", cfg_option, ec.name)
+        if LooseVersion(ec.version) < LooseVersion('0.3.23'):
+            # relax maximum number of failed numerical LAPACK tests for aarch64/neoverse_v1 CPU target
+            # since the default setting of 150 that works well on other aarch64 targets and x86_64 is a bit too strict
+            # See https://github.com/EESSI/software-layer/issues/314
+            cfg_option = 'max_failing_lapack_tests_num_errors'
+            if cpu_target == CPU_TARGET_NEOVERSE_V1:
+                orig_value = ec[cfg_option]
+                ec[cfg_option] = 400
+                print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)",
+                          ec.name, ec[cfg_option], orig_value)
+            elif cpu_target == CPU_TARGET_AARCH64_GENERIC:
+                orig_value = ec[cfg_option]
+                ec[cfg_option] = 302
+                print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)",
+                      ec.name, ec[cfg_option], orig_value)             ec.name, ec[cfg_option], orig_value)
+            else:
+                print_msg("Not changing option %s for %s on non-AARCH64", cfg_option, ec.name)
     else:
         raise EasyBuildError("OpenBLAS-specific hook triggered for non-OpenBLAS easyconfig?!")
 
@@ -393,6 +399,18 @@ def pre_test_hook_ignore_failing_tests_SciPybundle(self, *args, **kwargs):
     if self.name == 'SciPy-bundle' and self.version in scipy_bundle_versions and cpu_target == CPU_TARGET_NEOVERSE_V1:
         self.cfg['testopts'] = "|| echo ignoring failing tests"
 
+def pre_test_hook_ignore_failing_tests_netCDF(self, *args, **kwargs):
+    """
+    Pre-test hook for netCDF: skip failing tests for selected netCDF versions on neoverse_v1
+    cfr. https://github.com/EESSI/software-layer/issues/425
+    The following tests are problematic:
+        163 - nc_test4_run_par_test (Timeout)
+        190 - h5_test_run_par_tests (Timeout)
+    A few other tests are skipped in the easyconfig and patches for similar issues, see above issue for details.
+    """
+    cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
+    if self.name == 'netCDF' and self.version == '4.9.2' and cpu_target == CPU_TARGET_NEOVERSE_V1:
+        self.cfg['testopts'] = "|| echo ignoring failing tests" 
 
 def pre_single_extension_hook(ext, *args, **kwargs):
     """Main pre-extension: trigger custom functions based on software name."""
@@ -573,6 +591,7 @@ def inject_gpu_property(ec):
     'ESPResSo': pre_test_hook_ignore_failing_tests_ESPResSo,
     'FFTW.MPI': pre_test_hook_ignore_failing_tests_FFTWMPI,
     'SciPy-bundle': pre_test_hook_ignore_failing_tests_SciPybundle,
+    'netCDF': pre_test_hook_ignore_failing_tests_netCDF,
 }
 
 PRE_SINGLE_EXTENSION_HOOKS = {

diff --git a/eessi-2023.06-known-issues.yml b/eessi-2023.06-known-issues.yml
@@ -0,0 +1,28 @@
+- aarch64/neoverse_v1:
+  - ESPResSo-4.2.1-foss-2023a:
+      - issue: https://github.com/EESSI/software-layer/issues/363
+      - info: "ESPResSo tests failing due to timeouts"
+  - FFTW.MPI-3.3.10-gompi-2023a:
+      - issue: https://github.com/EESSI/software-layer/issues/325
+      - info: "Flaky FFTW tests, random failures"
+  - FFTW.MPI-3.3.10-gompi-2023b:
+      - issue: https://github.com/EESSI/software-layer/issues/325
+      - info: "Flaky FFTW tests, random failures"
+  - netCDF-4.9.2-gompi-2023a.eb:
+      - issue: https://github.com/EESSI/software-layer/issues/425
+      - info: "netCDF intermittent test failures"
+  - netCDF-4.9.2-gompi-2023b.eb:
+      - issue: https://github.com/EESSI/software-layer/issues/425
+      - info: "netCDF intermittent test failures"
+  - OpenBLAS-0.3.21-GCC-12.2.0:
+    - issue: https://github.com/EESSI/software-layer/issues/314
+    - info: "Increased number of numerical errors in OpenBLAS test suite (344 vs max. 150 on x86_64/*)"
+  - SciPy-bundle-2023.02-gfbf-2022b:
+    - issue: https://github.com/EESSI/software-layer/issues/318
+    - info: "numpy built with -march=armv8.4-a instead of -mcpu=native (no SVE) + 2 failing tests (vs 50005 passed) in scipy test suite"
+  - SciPy-bundle-2023.07-gfbf-2023a:
+    - issue: https://github.com/EESSI/software-layer/issues/318
+    - info: "2 failing tests (vs 54409 passed) in scipy test suite"
+  - SciPy-bundle-2023.11-gfbf-2023b:
+    - issue: https://github.com/EESSI/software-layer/issues/318
+    - info: "2 failing tests (vs 54876 passed) in scipy test suite"
diff --git a/eessi_container.sh b/eessi_container.sh
@@ -30,8 +30,8 @@
 # -. initial settings & exit codes
 TOPDIR=$(dirname $(realpath $0))
 
-source ${TOPDIR}/scripts/utils.sh
-source ${TOPDIR}/scripts/cfg_files.sh
+source "${TOPDIR}"/scripts/utils.sh
+source "${TOPDIR}"/scripts/cfg_files.sh
 
 # exit codes: bitwise shift codes to allow for combination of exit codes
 # ANY_ERROR_EXITCODE is sourced from ${TOPDIR}/scripts/utils.sh
@@ -83,7 +83,7 @@ display_help() {
   echo "                           MODE==run (run a script or command) [default: shell]"
   echo "  -n | --nvidia MODE     - configure the container to work with NVIDIA GPUs,"
   echo "                           MODE==install for a CUDA installation, MODE==run to"
-  echo "                           attach a GPU, MODE==all for both [default: false]"  
+  echo "                           attach a GPU, MODE==all for both [default: false]"
   echo "  -r | --repository CFG  - configuration file or identifier defining the"
   echo "                           repository to use [default: EESSI via"
   echo "                           default container, see --container]"
@@ -575,7 +575,7 @@ fi
 declare -a EESSI_FUSE_MOUNTS=()
 
 # always mount cvmfs-config repo (to get access to software.eessi.io)
-# EESSI_FUSE_MOUNTS+=("--fusemount" "container:cvmfs2 cvmfs-config.cern.ch /cvmfs/cvmfs-config.cern.ch")
+# Commented out intentionally EESSI_FUSE_MOUNTS+=("--fusemount" "container:cvmfs2 cvmfs-config.cern.ch /cvmfs/cvmfs-config.cern.ch")
 
 if [[ "${ACCESS}" == "ro" ]]; then
   export EESSI_READONLY="container:cvmfs2 ${repo_name} /cvmfs/${repo_name}"

diff --git a/init/bash b/init/bash
@@ -23,7 +23,7 @@ if [ $? -eq 0 ]; then
     source $EESSI_EPREFIX/usr/share/Lmod/init/bash
 
     # prepend location of modules for EESSI software stack to $MODULEPATH
-    echo "Prepending $EESSI_MODULEPATH to \$MODULEPATH..." >> $output
+    show_msg "Prepending $EESSI_MODULEPATH to \$MODULEPATH..." >> $output
     module use $EESSI_MODULEPATH
 
     #show_msg ""

diff --git a/init/eessi_archdetect.sh b/init/eessi_archdetect.sh
@@ -118,7 +118,7 @@ cpupath(){
             # each flag in this CPU specification must be found in the list of flags of the host
             check_allinfirst "${cpu_flags[*]}" ${arch_spec[2]} && best_arch_match=${arch_spec[0]} && \
                 all_arch_matches="$best_arch_match:$all_arch_matches" && \
-                log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match"
+                log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match" 
         fi
     done
 

diff --git a/init/eessi_environment_variables b/init/eessi_environment_variables
@@ -2,6 +2,11 @@
 # $BASH_SOURCE points to correct path, see also http://mywiki.wooledge.org/BashFAQ/028
 EESSI_INIT_DIR_PATH=$(dirname $(realpath $BASH_SOURCE))
 
+function error() {
+    echo -e "\e[31mERROR: $1\e[0m" >&2
+    false
+}
+
 function show_msg {
   # only echo msg if EESSI_SILENT is unset
   msg=$1
@@ -10,11 +15,6 @@ function show_msg {
   fi
 }
 
-function error() {
-    echo -e "\e[31mERROR: $1\e[0m" >&2
-    false
-}
-
 # set up minimal environment: $EESSI_PREFIX, $EESSI_VERSION, $EESSI_OS_TYPE, $EESSI_CPU_FAMILY, $EPREFIX
 source $EESSI_INIT_DIR_PATH/minimal_eessi_env