From ffdb1bc85ea4c7849346f2b80115cb436391bfc2 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Fri, 16 Aug 2024 17:32:13 +0000 Subject: [PATCH 01/14] Update ALCF Sunspot machine config --- cime_config/machines/config_machines.xml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 6cd9d585a25b..67611a0de225 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3413,13 +3413,9 @@ module /soft/packaging/lmod/lmod/libexec/lmod python - - /soft/modulefiles - spack cmake/3.26.3-gcc-11.2.0-vnn7ncx - prepend-deps/default + cmake - gcc oneapi/eng-compiler/2023.05.15.007 From 7c473e334e68aef5d142c55e245109f624a6aaaa Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Mon, 19 Aug 2024 22:26:49 +0000 Subject: [PATCH 02/14] Update oneapi to latest default --- cime_config/machines/config_machines.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 67611a0de225..446262a3a9b2 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3416,7 +3416,7 @@ cmake - oneapi/eng-compiler/2023.05.15.007 + oneapi/eng-compiler/2024.04.15.002 spack cmake From a4b7c3b9a75acdef005b3bbfc6dd671c7b2c2248 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Mon, 19 Aug 2024 22:29:07 +0000 Subject: [PATCH 03/14] Run BGC couppled cases on at least 2 nodes --- cime_config/allactive/config_pesall.xml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/cime_config/allactive/config_pesall.xml b/cime_config/allactive/config_pesall.xml index e99230e7cb04..78ba011d9b78 100644 --- a/cime_config/allactive/config_pesall.xml +++ b/cime_config/allactive/config_pesall.xml @@ -1472,6 +1472,21 @@ + + + sunspot: --compset BGC* --res ne30pg2_r05_IcoswISC30E3r5 on 2 nodes pure-MPI + + -2 + -2 + -2 + -2 + -2 + -2 + -2 + -2 + + + From e3e52e2b833f57d74231107cfcc3efc4c940b393 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Mon, 19 Aug 2024 22:34:11 +0000 Subject: [PATCH 04/14] Remove -check flags from debug builds They lead to link errors in currently available oneapi versions. --- cime_config/machines/cmake_macros/oneapi-ifx.cmake | 2 +- cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifx.cmake b/cime_config/machines/cmake_macros/oneapi-ifx.cmake index e98a65d32a67..9ab0cdda7d51 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifx.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifx.cmake @@ -7,7 +7,7 @@ endif() string(APPEND CMAKE_C_FLAGS_RELEASE " -O2") string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2") string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2") -string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created") +string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -fpe0") string(APPEND CMAKE_C_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_C_FLAGS " -fp-model precise -std=gnu99") diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake index 9d08ca6c6302..d7dfae002192 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake @@ -7,7 +7,7 @@ endif() string(APPEND CMAKE_C_FLAGS_RELEASE " -O2") string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2") string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2") -string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created") +string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -fpe0") string(APPEND CMAKE_C_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_C_FLAGS " -fp-model precise -std=gnu99") From c6e75e3fa6b3f9c1c1814ca7d368f4111a626949 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Mon, 19 Aug 2024 22:50:35 +0000 Subject: [PATCH 05/14] Cleanup unused mpilib refs --- cime_config/machines/config_machines.xml | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 446262a3a9b2..c94a2bbcf7d2 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3365,7 +3365,7 @@ uan-.* LINUX oneapi-ifx,oneapi-ifxgpu,gnu - mpich,impi,openmpi + mpich CSC249ADSE15_CNDA /gila/CSC249ADSE15_CNDA/performance_archive .* @@ -3398,13 +3398,6 @@ $ENV{GPU_TILE_COMPACT} - - mpirun - - --tag-output -n {{ total_tasks }} - --map-by ppr:{{ tasks_per_numa }}:socket:PE=$ENV{OMP_NUM_THREADS} --bind-to hwthread - - /soft/packaging/lmod/lmod/init/sh /soft/packaging/lmod/lmod/init/csh @@ -3434,12 +3427,6 @@ 1 - - 10 - omp - spread - unit - level_zero:gpu NO_GPU From eae53e54007c2be42ef3ee9f0d809e1ff88c53d9 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Tue, 20 Aug 2024 03:21:06 +0000 Subject: [PATCH 06/14] Run ne4-cases at 96x1 MPIxOMP --- cime_config/allactive/config_pesall.xml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/cime_config/allactive/config_pesall.xml b/cime_config/allactive/config_pesall.xml index 78ba011d9b78..84c0e24d8c9b 100644 --- a/cime_config/allactive/config_pesall.xml +++ b/cime_config/allactive/config_pesall.xml @@ -1759,6 +1759,21 @@ + + + allactive+sunspot: default, 96 mpi x 1 omp @ root 0 + + 96 + 96 + 96 + 96 + 96 + 96 + 96 + 96 + + + From ffec269d010a34f04b3ff132981dbeb0bcc24ac3 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Tue, 20 Aug 2024 03:22:36 +0000 Subject: [PATCH 07/14] Add initial sunspot machine file for eamxx --- components/eamxx/cmake/machine-files/sunspot.cmake | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 components/eamxx/cmake/machine-files/sunspot.cmake diff --git a/components/eamxx/cmake/machine-files/sunspot.cmake b/components/eamxx/cmake/machine-files/sunspot.cmake new file mode 100644 index 000000000000..09ff6d4e6c48 --- /dev/null +++ b/components/eamxx/cmake/machine-files/sunspot.cmake @@ -0,0 +1,10 @@ +include(${CMAKE_CURRENT_LIST_DIR}/common.cmake) +common_setup() + +include (${EKAT_MACH_FILES_PATH}/kokkos/serial.cmake) +include (${EKAT_MACH_FILES_PATH}/mpi/other.cmake) + +set(EKAT_MPIRUN_EXE "mpiexec" CACHE STRING "" FORCE) +set(EKAT_MPI_NP_FLAG "-np" CACHE STRING "" FORCE) +set(EKAT_MPI_EXTRA_ARGS "--label --cpu-bind depth -envall" CACHE STRING "") +set(EKAT_MPI_THREAD_FLAG "-d" CACHE STRING "") From 0b58cb0c955bafd28f906f1f1fb006153a0978a4 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Wed, 21 Aug 2024 23:18:20 +0000 Subject: [PATCH 08/14] Load pre-built kokkos module for GPU builds Also export ZES_ENABLE_SYSMAN=1 to avoid ext_intel_free_memory run-time errors. --- cime_config/machines/config_machines.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index c94a2bbcf7d2..b6ac815f58f6 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3411,6 +3411,9 @@ oneapi/eng-compiler/2024.04.15.002 + + kokkos/git.7ff87a5-omp-sycl + spack cmake gcc/10.3.0 @@ -3439,6 +3442,8 @@ /soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh 131072 20 + $ENV{KOKKOS_ROOT} + 1 0 From 226ac0ff37064badf2f4237b111a93ecdf20ce74 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Wed, 21 Aug 2024 23:21:54 +0000 Subject: [PATCH 09/14] Fix a SYCL typo Also avoid a call to variadic printf in a SYCL kernel within an error diagnostic log. --- components/homme/src/share/compose/cedr_kokkos.hpp | 2 +- components/homme/src/share/compose/compose_slmm_siqk.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/components/homme/src/share/compose/cedr_kokkos.hpp b/components/homme/src/share/compose/cedr_kokkos.hpp index 42e423e2913a..758f4148a9a4 100644 --- a/components/homme/src/share/compose/cedr_kokkos.hpp +++ b/components/homme/src/share/compose/cedr_kokkos.hpp @@ -18,7 +18,7 @@ typedef Kokkos::Experimental::HIPSpace CedrGpuSpace; # endif # if defined KOKKOS_ENABLE_SYCL typedef Kokkos::Experimental::SYCL CedrGpuExeSpace; -typedef Kokkos::Experimental::SYCL> CedrGpuSpace; +typedef Kokkos::Experimental::SYCL CedrGpuSpace; # endif #endif diff --git a/components/homme/src/share/compose/compose_slmm_siqk.cpp b/components/homme/src/share/compose/compose_slmm_siqk.cpp index 628c023090cb..56564b0b8ca6 100644 --- a/components/homme/src/share/compose/compose_slmm_siqk.cpp +++ b/components/homme/src/share/compose/compose_slmm_siqk.cpp @@ -60,8 +60,10 @@ class TestSphereToRefKernel { // tol is on dx, not (a,b), so adjust slightly. if ( ! info.success || err > 1e4*tol_) { jinfo.nfails++; +#ifndef KOKKOS_ENABLE_SYCL printf("calc_sphere_to_ref ei %d i %d j %d: nits %d re %1.1e\n", ei, i, j, info.n_iterations, err); +#endif } jinfo.sum_nits += info.n_iterations; jinfo.max_nits = max(jinfo.max_nits, info.n_iterations); From d5c15c3ca7fa41b564b41b30aa79c146dafc399c Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Wed, 21 Aug 2024 23:30:34 +0000 Subject: [PATCH 10/14] Let SYCL kernels call a virtual function --- .../machines/cmake_macros/oneapi-ifxgpu_aurora.cmake | 1 + .../machines/cmake_macros/oneapi-ifxgpu_sunspot.cmake | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot.cmake diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake index 47d513408c2c..16288ce4dee7 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake @@ -5,3 +5,4 @@ if (compile_threaded) endif() string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ARCH_INTEL_PVC=On -DKokkos_ENABLE_SYCL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off") string(APPEND SYCL_FLAGS " -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64 -Xsycl-target-backend \"-device 12.60.7\"") +string(APPEND CMAKE_CXX_FLAGS " -Xclang -fsycl-allow-virtual-functions") diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot.cmake new file mode 100644 index 000000000000..6835515164f0 --- /dev/null +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot.cmake @@ -0,0 +1,7 @@ + +string(APPEND CMAKE_EXE_LINKER_FLAGS " -lmkl_intel_lp64 -lmkl_sequential -lmkl_core") +if (compile_threaded) + string(APPEND CMAKE_EXE_LINKER_FLAGS " -fiopenmp -fopenmp-targets=spir64") +endif() +string(APPEND SYCL_FLAGS " -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64 -Xsycl-target-backend \"-device 12.60.7\"") +string(APPEND CMAKE_CXX_FLAGS " -Xclang -fsycl-allow-virtual-functions") From bd79ca9ce10bafa8ec2d2f170bec5863d73b860e Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Wed, 28 Aug 2024 20:35:07 +0000 Subject: [PATCH 11/14] Run in 4-CCS mode --- cime_config/machines/config_machines.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index b6ac815f58f6..fed0110bc32b 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3385,7 +3385,7 @@ 104 104 104 - 12 + 48 FALSE mpiexec @@ -3444,6 +3444,7 @@ 20 $ENV{KOKKOS_ROOT} 1 + 0:4,1:4,2:4,3:4:4:4,5:4,6:4,7:4 0 From 6ae05889a8664d227995f661cdbeae13e3b9136d Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Wed, 28 Aug 2024 20:43:22 +0000 Subject: [PATCH 12/14] Disable openmp-offload --- cime_config/machines/Depends.oneapi-ifxgpu.cmake | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/cime_config/machines/Depends.oneapi-ifxgpu.cmake b/cime_config/machines/Depends.oneapi-ifxgpu.cmake index 0dd35e56bcc9..5a958df26eba 100644 --- a/cime_config/machines/Depends.oneapi-ifxgpu.cmake +++ b/cime_config/machines/Depends.oneapi-ifxgpu.cmake @@ -1,14 +1,5 @@ -set(CPPDEFS "${CPPDEFS} -DMPAS_OPENMP_OFFLOAD") -list(APPEND MPAS_ADD_ACC_FLAGS - ${CMAKE_BINARY_DIR}/core_seaice/shared/mpas_seaice_mesh_pool.f90 - ${CMAKE_BINARY_DIR}/core_seaice/shared/mpas_seaice_velocity_solver_variational.f90 - ${CMAKE_BINARY_DIR}/core_seaice/shared/mpas_seaice_velocity_solver.f90 -) - -foreach(ITEM IN LISTS MPAS_ADD_ACC_FLAGS) - e3sm_add_flags("${ITEM}" "-fiopenmp -fopenmp-targets=spir64") -endforeach() - # compile mpas_seaice_core_interface.f90 with ifort, not ifx -e3sm_add_flags("${CMAKE_BINARY_DIR}/core_seaice/model_forward/mpas_seaice_core_interface.f90" "-fc=ifort") +if (NOT MPILIB STREQUAL "openmpi") + e3sm_add_flags("${CMAKE_BINARY_DIR}/core_seaice/model_forward/mpas_seaice_core_interface.f90" "-fc=ifort") +endif() From 04872f3350c20c5e10faf14785c11d647f6bbe12 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Fri, 30 Aug 2024 03:00:57 +0000 Subject: [PATCH 13/14] Update Aurora machine config --- cime_config/allactive/config_pesall.xml | 8 ++-- .../cmake_macros/oneapi-ifxgpu_aurora.cmake | 1 - cime_config/machines/config_batch.xml | 2 +- cime_config/machines/config_machines.xml | 47 +++++++++---------- .../eamxx/cmake/machine-files/aurora.cmake | 14 ++++++ 5 files changed, 41 insertions(+), 31 deletions(-) create mode 100644 components/eamxx/cmake/machine-files/aurora.cmake diff --git a/cime_config/allactive/config_pesall.xml b/cime_config/allactive/config_pesall.xml index 84c0e24d8c9b..3392ad71ca10 100644 --- a/cime_config/allactive/config_pesall.xml +++ b/cime_config/allactive/config_pesall.xml @@ -1472,9 +1472,9 @@ - + - sunspot: --compset BGC* --res ne30pg2_r05_IcoswISC30E3r5 on 2 nodes pure-MPI + sunspot|aurora: --compset BGC* --res ne30pg2_r05_IcoswISC30E3r5 on 2 nodes pure-MPI -2 -2 @@ -1759,9 +1759,9 @@ - + - allactive+sunspot: default, 96 mpi x 1 omp @ root 0 + allactive+sunspot|aurora: default, 96 mpi x 1 omp @ root 0 96 96 diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake index 16288ce4dee7..6835515164f0 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake @@ -3,6 +3,5 @@ string(APPEND CMAKE_EXE_LINKER_FLAGS " -lmkl_intel_lp64 -lmkl_sequential -lmkl_c if (compile_threaded) string(APPEND CMAKE_EXE_LINKER_FLAGS " -fiopenmp -fopenmp-targets=spir64") endif() -string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ARCH_INTEL_PVC=On -DKokkos_ENABLE_SYCL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off") string(APPEND SYCL_FLAGS " -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64 -Xsycl-target-backend \"-device 12.60.7\"") string(APPEND CMAKE_CXX_FLAGS " -Xclang -fsycl-allow-virtual-functions") diff --git a/cime_config/machines/config_batch.xml b/cime_config/machines/config_batch.xml index 448eefece216..13cfdfb337b6 100644 --- a/cime_config/machines/config_batch.xml +++ b/cime_config/machines/config_batch.xml @@ -562,7 +562,7 @@ - /lus/gecko/projects/CSC249ADSE15_CNDA/tools/qsub/throttle + /lus/flare/projects/CSC249ADSE15_CNDA/tools/qsub/throttle EarlyAppAccess workq-route diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index fed0110bc32b..f1bed254da48 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3474,14 +3474,14 @@ oneapi-ifx,oneapi-ifxgpu,gnu mpich CSC249ADSE15_CNDA - /lus/gecko/projects/CSC249ADSE15_CNDA/performance_archive + /lus/flare/projects/CSC249ADSE15_CNDA/performance_archive .* - /lus/gecko/projects/CSC249ADSE15_CNDA/$USER/scratch - /lus/gecko/projects/CSC249ADSE15_CNDA/inputdata - /lus/gecko/projects/CSC249ADSE15_CNDA/inputdata/atm/datm7 + /lus/flare/projects/CSC249ADSE15_CNDA/$USER/scratch + /lus/flare/projects/CSC249ADSE15_CNDA/inputdata + /lus/flare/projects/CSC249ADSE15_CNDA/inputdata/atm/datm7 $CIME_OUTPUT_ROOT/archive/$CASE - /lus/gecko/projects/CSC249ADSE15_CNDA/baselines/$COMPILER - /lus/gecko/projects/CSC249ADSE15_CNDA/tools/cprnc/cprnc + /lus/flare/projects/CSC249ADSE15_CNDA/baselines/$COMPILER + /lus/flare/projects/CSC249ADSE15_CNDA/tools/cprnc/cprnc 16 e3sm_developer 4 @@ -3490,7 +3490,7 @@ 208 104 104 - 12 + 48 FALSE mpiexec @@ -3498,46 +3498,40 @@ -np {{ total_tasks }} --label -ppn {{ tasks_per_node }} - --cpu-bind $ENV{RANKS_BIND} -envall + --cpu-bind depth -envall -d $ENV{OMP_NUM_THREADS} $ENV{GPU_TILE_COMPACT} - /lus/gecko/projects/CSC249ADSE15_CNDA/modules/lmod.sh + /lus/flare/projects/CSC249ADSE15_CNDA/modules/lmod.sh /soft/sunspot_migrate/soft/packaging/lmod/lmod/init/csh /soft/sunspot_migrate/soft/packaging/lmod/lmod/init/env_modules_python.py module module /soft/sunspot_migrate/soft/packaging/lmod/lmod/libexec/lmod python - - /soft/modulefiles - /soft/restricted/CNDA/updates/modulefiles - spack-pe-gcc cmake + cmake - oneapi/eng-compiler/2023.05.15.007 + oneapi/eng-compiler/2024.04.15.002 + + + kokkos/git.7ff87a5-omp-sycl spack-pe-gcc cmake gcc/10.3.0 - - cray-pals - libfabric/1.15.2.0 - cray-libpals/1.3.2 - $CIME_OUTPUT_ROOT/$CASE/run $CIME_OUTPUT_ROOT/$CASE/bld - /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007 - /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007 - /lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007 - /lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/lib:$ENV{LD_LIBRARY_PATH} - /lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/bin:$ENV{PATH} - list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 + /lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002 + /lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002 + /lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2024.04.15.002 + /lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2024.04.15.002/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002/lib:$ENV{LD_LIBRARY_PATH} + /lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2024.04.15.002/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002/bin:$ENV{PATH} 1 @@ -3554,6 +3548,9 @@ /soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh 131072 20 + $ENV{KOKKOS_ROOT} + 1 + 0:4,1:4,2:4,3:4:4:4,5:4,6:4,7:4 0 diff --git a/components/eamxx/cmake/machine-files/aurora.cmake b/components/eamxx/cmake/machine-files/aurora.cmake new file mode 100644 index 000000000000..59157285bab7 --- /dev/null +++ b/components/eamxx/cmake/machine-files/aurora.cmake @@ -0,0 +1,14 @@ +include(${CMAKE_CURRENT_LIST_DIR}/common.cmake) +common_setup() + +include (${EKAT_MACH_FILES_PATH}/kokkos/serial.cmake) +include (${EKAT_MACH_FILES_PATH}/mpi/other.cmake) + +set(EKAT_MPIRUN_EXE "mpiexec" CACHE STRING "" FORCE) +set(EKAT_MPI_NP_FLAG "-np" CACHE STRING "" FORCE) +set(EKAT_MPI_EXTRA_ARGS "--label --cpu-bind depth -envall" CACHE STRING "") +set(EKAT_MPI_THREAD_FLAG "-d" CACHE STRING "") + +set(NETCDF_C_PATH "$ENV{NETCDF_C_PATH}") +set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_FORTRAN_PATH}") +set(PNETCDF_PATH "$ENV{PNETCDF_PATH}") From 4b5cf35b729a221770acd1134c720a7f8dff8410 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Sat, 31 Aug 2024 02:18:41 +0000 Subject: [PATCH 14/14] Remove "--cpu-bind depth" in Aurora mpiexec --- cime_config/machines/config_machines.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index f1bed254da48..e97c93957a8e 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3498,7 +3498,7 @@ -np {{ total_tasks }} --label -ppn {{ tasks_per_node }} - --cpu-bind depth -envall + -envall -d $ENV{OMP_NUM_THREADS} $ENV{GPU_TILE_COMPACT}