From 4224998744ee5fabe1411d3f68b146e9e6b7548a Mon Sep 17 00:00:00 2001 From: Guoqing Ge Date: Tue, 15 Oct 2024 14:21:09 -0600 Subject: [PATCH] port RDASApp to Gaea (#196) port RDASApp to Gaea Link to issue #170 --- README.md | 1 + build.sh | 6 +- docs/build_and_test.md | 7 +- modulefiles/EVA/gaeaC5.lua | 19 +++++ modulefiles/EVA/gaeaC6.lua | 19 +++++ modulefiles/RDAS/gaea.intel.lua | 82 +++++++++++++++++++ .../templates/mpasjedi_expr/run_bump.sh | 7 +- .../templates/mpasjedi_expr/run_jedi.sh | 7 +- ush/detect_machine.sh | 14 ++-- ush/init.sh | 9 ++ ush/load_eva.sh | 12 ++- 11 files changed, 167 insertions(+), 16 deletions(-) create mode 100644 modulefiles/EVA/gaeaC5.lua create mode 100644 modulefiles/EVA/gaeaC6.lua create mode 100644 modulefiles/RDAS/gaea.intel.lua diff --git a/README.md b/README.md index 1ef496c8..b8548f50 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ Repository for JEDI-based data assimilation for the Rapid Refresh Forecast Syste - NOAA RDHPCS Jet - NOAA RDHPCS Orion - NOAA RDHPCS Hercules +- NOAA RDHPCS Gaea (C5 and C6) ## [How to clone and build RDASAPP?](docs/build_and_test.md) ## [How to run RRFS CTest?](docs/build_and_test.md#3-rrfs-ctest) diff --git a/build.sh b/build.sh index d9a7f2b1..3e30048b 100755 --- a/build.sh +++ b/build.sh @@ -77,10 +77,10 @@ while getopts "p:c:m:j:hvfsx" opt; do done case ${BUILD_TARGET} in - hera | orion | hercules | jet) + hera | orion | hercules | jet | gaea ) echo "Building RDASApp on $BUILD_TARGET" echo " Build initiated `date`" - source $dir_root/ush/module-setup.sh + [[ "${BUILD_TARGET}" != *gaea* ]] && source $dir_root/ush/module-setup.sh module use $dir_root/modulefiles module load RDAS/$BUILD_TARGET.$COMPILER CMAKE_OPTS+=" -DMPIEXEC_EXECUTABLE=$MPIEXEC_EXEC -DMPIEXEC_NUMPROC_FLAG=$MPIEXEC_NPROC -DBUILD_GSIBEC=ON -DMACHINE_ID=$MACHINE_ID" @@ -95,7 +95,7 @@ esac # Set default number of build jobs based on machine if [[ $BUILD_TARGET == 'orion' ]]; then # lower due to memory limit on login nodes BUILD_JOBS=${BUILD_JOBS:-4} -else # hera, hercules, jet +else # hera, hercules, jet, gaea BUILD_JOBS=${BUILD_JOBS:-6} fi diff --git a/docs/build_and_test.md b/docs/build_and_test.md index 7bb9ebb0..ffe2932e 100644 --- a/docs/build_and_test.md +++ b/docs/build_and_test.md @@ -1,5 +1,5 @@ ## 1.Clone RDASApp -If running on Orion/Hercules, you will need to run `module load git-lfs` before cloning. +If running on Orion/Hercules/Gaea, you will need to run `module load git-lfs` before cloning. ``` git clone --recurse-submodules https://github.com/NOAA-EMC/RDASApp.git ``` @@ -14,4 +14,7 @@ Run `./build.sh -h` to learn more about command line options supported by build. ``` ush/run_rrfs_tests.sh $account ``` -Where `$account` is your valid slurm resource account (e.g., `fv3-cam`, `da-cpu`, `wrfruc`, `rtrr`, `nrtrr`, etc.). +Where `$account` is your valid slurm resource account (e.g., `fv3-cam`, `da-cpu`, `wrfruc`, `rtrr`, `nrtrr`, etc.). +- To run ctest mannualy without using the above bash script, follow these two steps first: +`source ush/load_rdas.sh` +`export SLURM_ACCOUNT=$account` diff --git a/modulefiles/EVA/gaeaC5.lua b/modulefiles/EVA/gaeaC5.lua new file mode 100644 index 00000000..f50c60b3 --- /dev/null +++ b/modulefiles/EVA/gaeaC5.lua @@ -0,0 +1,19 @@ +help([[ +Load environment for running EVA. +]]) + +local pkgName = myModuleName() +local pkgVersion = myModuleVersion() +local pkgNameVer = myModuleFullName() + +conflict(pkgName) + +prepend_path("MODULEPATH", '/gpfs/f5/gsl-glo/world-shared/gge/miniconda3/modulefiles') + +load("miniconda3/4.6.14") +load("eva/1.0.0") + +whatis("Name: ".. pkgName) +whatis("Version: ".. pkgVersion) +whatis("Category: EVA") +whatis("Description: Load all libraries needed for EVA") diff --git a/modulefiles/EVA/gaeaC6.lua b/modulefiles/EVA/gaeaC6.lua new file mode 100644 index 00000000..50f8c2c0 --- /dev/null +++ b/modulefiles/EVA/gaeaC6.lua @@ -0,0 +1,19 @@ +help([[ +Load environment for running EVA. +]]) + +local pkgName = myModuleName() +local pkgVersion = myModuleVersion() +local pkgNameVer = myModuleFullName() + +conflict(pkgName) + +prepend_path("MODULEPATH", '/gpfs/f6/bil-fire10-oar/world-shared/gge/miniconda3/modulefiles') + +load("miniconda3/4.6.14") +load("eva/1.0.0") + +whatis("Name: ".. pkgName) +whatis("Version: ".. pkgVersion) +whatis("Category: EVA") +whatis("Description: Load all libraries needed for EVA") diff --git a/modulefiles/RDAS/gaea.intel.lua b/modulefiles/RDAS/gaea.intel.lua new file mode 100644 index 00000000..0d6ca5e6 --- /dev/null +++ b/modulefiles/RDAS/gaea.intel.lua @@ -0,0 +1,82 @@ +help([[ +Load environment for running the RDAS application with Intel compilers and MPI. +]]) + +local pkgName = myModuleName() +local pkgVersion = myModuleVersion() +local pkgNameVer = myModuleFullName() + +prepend_path("MODULEPATH", '/ncrc/proj/epic/spack-stack/c6/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core') + +-- below two lines get us access to the spack-stack modules +load("stack-intel/2023.2.0") +load("stack-cray-mpich/8.1.29") +-- JCSDA has 'jedi-fv3-env/unified-dev', but we should load these manually as needed +load("cmake/3.23.1") +load("gettext/0.20.2") +--load("libunistring/1.1") +--load("libidn2/2.3.4") +load("pcre2/10.42") +load("curl/8.4.0") +load("zlib/1.2.13") +load("git/2.42.0") +load("pkg-config/0.29.2") +load("hdf5/1.14.0") +load("parallel-netcdf/1.12.2") +load("netcdf-c/4.9.2") +load("nccmp/1.9.0.1") +load("netcdf-fortran/4.6.1") +load("nco/5.0.6") +load("parallelio/2.5.10") +load("wget/1.20.3") +load("boost/1.83.0") +load("bufr/12.0.1") +load("git-lfs/2.11.0") +load("ecbuild/3.7.2") +load("openjpeg/2.3.1") +load("eccodes/2.32.0") +load("eigen/3.4.0") +load("openblas/0.3.24") +load("eckit/1.24.5") +load("fftw/3.3.10") +load("fckit/0.11.0") +load("fiat/1.2.0") +--load("ectrans/1.2.0") +load("atlas/0.35.1") +load("sp/2.5.0") +load("gsl-lite/0.37.0") +load("libjpeg/2.1.0") +load("krb5/1.20.1") +load("libtirpc/1.3.3") +load("hdf/4.2.15") +load("jedi-cmake/1.4.0") +load("libpng/1.6.37") +load("udunits/2.2.28") +load("ncview/2.1.9") +load("netcdf-cxx4/4.3.1") +load("json/3.10.5") +--load("crtm/v2.4_jedi") +load("prod_util/2.1.1") +load("fms/2023.04") + +load("py-jinja2/3.0.3") +load("py-netcdf4/1.5.8") +load("py-pybind11/2.11.0") +load("py-pycodestyle/2.11.0") +load("py-pyyaml/6.0") +load("py-scipy/1.11.3") +load("py-xarray/2023.7.0") + +setenv("CC","cc") +setenv("FC","ftn") +setenv("CXX","CC") + +local mpiexec = '/usr/bin/srun' +local mpinproc = '-n' +setenv('MPIEXEC_EXEC', mpiexec) +setenv('MPIEXEC_NPROC', mpinproc) + +whatis("Name: ".. pkgName) +whatis("Version: ".. pkgVersion) +whatis("Category: RDASApp") +whatis("Description: Load all libraries needed for RDASApp") diff --git a/rrfs-test/scripts/templates/mpasjedi_expr/run_bump.sh b/rrfs-test/scripts/templates/mpasjedi_expr/run_bump.sh index 481e3c8e..25b57223 100755 --- a/rrfs-test/scripts/templates/mpasjedi_expr/run_bump.sh +++ b/rrfs-test/scripts/templates/mpasjedi_expr/run_bump.sh @@ -1,6 +1,7 @@ #!/bin/sh #SBATCH --account=rtrr -#SBATCH --qos=batch +#SBATCH --qos=batch # use the normal queue on Gaea +###SBATCH -M c6 # for Gaea ###SBATCH --partition=bigmem ###SBATCH --partition=kjet ###SBATCH --reservation=rrfsens @@ -13,7 +14,9 @@ RDASApp=@RDASApp@ -. /apps/lmod/lmod/init/sh +if [[ -s /apps/lmod/lmod/init/sh ]]; then + . /apps/lmod/lmod/init/sh +fi module purge source ${RDASApp}/ush/detect_machine.sh diff --git a/rrfs-test/scripts/templates/mpasjedi_expr/run_jedi.sh b/rrfs-test/scripts/templates/mpasjedi_expr/run_jedi.sh index 0671f5ba..87f4d452 100755 --- a/rrfs-test/scripts/templates/mpasjedi_expr/run_jedi.sh +++ b/rrfs-test/scripts/templates/mpasjedi_expr/run_jedi.sh @@ -1,6 +1,7 @@ #!/bin/sh #SBATCH --account=rtrr -#SBATCH --qos=batch +#SBATCH --qos=batch # use the normal queue on Gaea +###SBATCH -M c6 # for Gaea ###SBATCH --partition=kjet ###SBATCH --reservation=rrfsens #SBATCH --ntasks=120 @@ -16,7 +17,9 @@ inputfile=./rrfs_mpasjedi_2024052700_Ens3Dvar.yaml # FOR ENVAR #inputfile=./rrfs_mpasjedi_2024052700_letkf.yaml # FOR LETKF #inputfile=./rrfs_mpasjedi_2024052700_getkf.yaml # FOR GETKF -. /apps/lmod/lmod/init/sh +if [[ -s /apps/lmod/lmod/init/sh ]]; then + . /apps/lmod/lmod/init/sh +fi module purge source ${RDASApp}/ush/detect_machine.sh diff --git a/ush/detect_machine.sh b/ush/detect_machine.sh index 8ff43ef1..bab25e23 100755 --- a/ush/detect_machine.sh +++ b/ush/detect_machine.sh @@ -21,10 +21,10 @@ case $(hostname -f) in dlogin0[1-9].dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood01-9 dlogin10.dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood10 - gaea9) MACHINE_ID=gaea ;; ### gaea9 - gaea1[0-6]) MACHINE_ID=gaea ;; ### gaea10-16 - gaea9.ncrc.gov) MACHINE_ID=gaea ;; ### gaea9 - gaea1[0-6].ncrc.gov) MACHINE_ID=gaea ;; ### gaea10-16 + gaea|gaea5[1-8]) MACHINE_ID=gaea ;; + gaea6[1-8]) MACHINE_ID=gaea ;; + gaea.ncrc.gov|gaea5[1-8].ncrc.gov) MACHINE_ID=gaea ;; + gaea6[1-8].ncrc.gov) MACHINE_ID=gaea ;; hfe0[1-9]) MACHINE_ID=hera ;; ### hera01-09 hfe1[0-2]) MACHINE_ID=hera ;; ### hera10-12 @@ -83,8 +83,10 @@ elif [[ -d /work ]]; then else MACHINE_ID=orion fi -elif [[ -d /gpfs && -d /ncrc ]]; then - # We are on GAEA. +elif [[ -d /gpfs/f5 && -d /ncrc ]]; then + # We are on GAEA + MACHINE_ID=gaea +elif [[ -d /gpfs/f6 && -d /ncrc ]]; then MACHINE_ID=gaea elif [[ -d /data/prod ]]; then # We are on SSEC's S4 diff --git a/ush/init.sh b/ush/init.sh index a28a3a1b..bf596d8d 100755 --- a/ush/init.sh +++ b/ush/init.sh @@ -14,6 +14,15 @@ case ${MACHINE_ID} in orion|hercules) RDAS_DATA=/work/noaa/zrtrr/RDAS_DATA ;; + gaea) + if [[ -d /gpfs/f5 ]]; then + RDAS_DATA=/gpfs/f5/gsl-glo/world-shared/role.rrfsfix/RDAS_DATA + elif [[ -d /gpfs/f6 ]]; then + RDAS_DATA=/gpfs/f6/bil-fire10-oar/world-shared/role.rrfsfix/RDAS_DATA + else + echo "unsupported gaea cluster: ${MACHINE_ID}" + fi + ;; *) echo "platform not supported: ${MACHINE_ID}" ;; diff --git a/ush/load_eva.sh b/ush/load_eva.sh index c6ec457f..5b4852ee 100755 --- a/ush/load_eva.sh +++ b/ush/load_eva.sh @@ -12,5 +12,15 @@ source ${ushdir}/detect_machine.sh module purge module use ${ushdir}/../modulefiles -module load EVA/${MACHINE_ID} +if [[ "${MACHINE_ID}" == "gaea" ]]; then + if [[ -d /gpfs/f5 ]]; then + module load EVA/${MACHINE_ID}C5 + elif [[ -d /gpfs/f6 ]]; then + module load EVA/${MACHINE_ID}C6 + else + echo "not supported gaea cluster: ${MACHINE_ID}" + fi +else + module load EVA/${MACHINE_ID} +fi module list