Skip to content

Commit

Permalink
Initial port to Acorn.
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidHuber-NOAA committed Sep 24, 2024
1 parent 2b1d706 commit 3cbe71f
Show file tree
Hide file tree
Showing 6 changed files with 257 additions and 23 deletions.
50 changes: 50 additions & 0 deletions modulefiles/gsi_acorn.intel.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
help([[
]])


local PrgEnv_intel_ver=os.getenv("PrgEnv_intel_ver") or "8.1.0"
local intel_ver=os.getenv("intel_ver") or "19.1.3.304"
local craype_ver=os.getenv("craype_ver") or "2.7.8"
local cray_mpich_ver=os.getenv("cray_mpich_ver") or "8.1.7"
local cmake_ver= os.getenv("cmake_ver") or "3.20.2"
local python_ver=os.getenv("python_ver") or "3.8.6"
local prod_util_ver=os.getenv("prod_util_ver") or "2.0.10"

local netcdf_ver=os.getenv("netcdf_ver") or "4.7.4"
local bufr_ver=os.getenv("bufr_ver") or "11.7.0"
local bacio_ver=os.getenv("bacio_ver") or "2.4.1"
local w3emc_ver=os.getenv("w3emc_ver") or "2.9.2"
local sp_ver=os.getenv("sp_ver") or "2.3.3"
local ip_ver=os.getenv("ip_ver") or "3.3.3"
local sigio_ver=os.getenv("sigio_ver") or "2.3.2"
local sfcio_ver=os.getenv("sfcio_ver") or "1.4.1"
local nemsio_ver=os.getenv("nemsio_ver") or "2.5.4"
local wrf_io_ver=os.getenv("wrf_io_ver") or "1.2.0"
local ncio_ver=os.getenv("ncio_ver") or "1.1.2"
local crtm_ver=os.getenv("crtm_ver") or "2.4.0"
local ncdiag_ver=os.getenv("ncdiag_ver") or "1.1.1"

load("PrgEnv-intel")
load("intel")
load("craype")
load("cray-mpich")
load(pathJoin("cmake", cmake_ver))
load(pathJoin("python", python_ver))
load(pathJoin("prod_util", prod_util_ver))
load(pathJoin("netcdf", netcdf_ver))
load(pathJoin("bufr", bufr_ver))
load(pathJoin("bacio", bacio_ver))
load(pathJoin("w3emc", w3emc_ver))
load(pathJoin("sp", sp_ver))
load(pathJoin("ip", ip_ver))
load(pathJoin("sigio", sigio_ver))
load(pathJoin("sfcio", sfcio_ver))
load(pathJoin("nemsio", nemsio_ver))
load(pathJoin("wrf_io", wrf_io_ver))
load(pathJoin("ncio", ncio_ver))
load(pathJoin("crtm", crtm_ver))
load(pathJoin("ncdiag",ncdiag_ver))

pushenv("GSI_BINARY_SOURCE_DIR", "/lfs/h2/emc/global/noscrub/emc.global/FIX/fix/gsi/20230911")

whatis("Description: GSI environment on WCOSS2 Acorn")
41 changes: 23 additions & 18 deletions regression/regression_param.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ regtest=$1

case $machine in

Hera)
sub_cmd="sub_hera"
Hera)
sub_cmd="sub_hera"
memnode=96
numcore=40
;;
Expand All @@ -19,23 +19,28 @@ case $machine in
memnode=512
numcore=40
;;
Jet)
sub_cmd="sub_jet"
Jet)
sub_cmd="sub_jet"
memnode=96
numcore=40
;;
Gaea)
sub_cmd="sub_gaea"
Gaea)
sub_cmd="sub_gaea"
memnode=251
numcore=128
;;
wcoss2)
sub_cmd="sub_wcoss2"
wcoss2)
sub_cmd="sub_wcoss2"
memnode=512
numcore=128
;;
Discover)
sub_cmd="sub_discover"
acorn)
sub_cmd="sub_acorn"
memnode=512
numcore=128
;;
Discover)
sub_cmd="sub_discover"
;;
*) # EXIT out for unresolved machine
echo "unknown $machine"
Expand Down Expand Up @@ -71,7 +76,7 @@ case $regtest in
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:10:00" ; popts[1]="12/8/" ; ropts[1]="/1"
topts[2]="0:10:00" ; popts[2]="12/10/" ; ropts[2]="/2"
elif [[ "$machine" = "wcoss2" ]]; then
elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then
topts[1]="0:10:00" ; popts[1]="12/8/" ; ropts[1]="/1"
topts[2]="0:10:00" ; popts[2]="12/10/" ; ropts[2]="/2"
fi
Expand Down Expand Up @@ -101,7 +106,7 @@ case $regtest in
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1"
elif [[ "$machine" = "wcoss2" ]]; then
elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then
topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1"
fi
Expand Down Expand Up @@ -131,7 +136,7 @@ case $regtest in
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1"
elif [[ "$machine" = "wcoss2" ]]; then
elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then
topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1"
fi
Expand Down Expand Up @@ -160,7 +165,7 @@ case $regtest in
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1"
elif [[ "$machine" = "wcoss2" ]]; then
elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then
topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1"
fi
Expand Down Expand Up @@ -190,7 +195,7 @@ case $regtest in
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:15:00" ; popts[1]="28/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="28/2/" ; ropts[2]="/1"
elif [[ "$machine" = "wcoss2" ]]; then
elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then
topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="64/2/" ; ropts[2]="/1"
fi
Expand Down Expand Up @@ -220,7 +225,7 @@ case $regtest in
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:30:00" ; popts[1]="14/8/" ; ropts[1]="/1"
topts[2]="0:30:00" ; popts[2]="14/14/" ; ropts[2]="/1"
elif [[ "$machine" = "wcoss2" ]]; then
elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then
topts[1]="0:30:00" ; popts[1]="14/8/" ; ropts[1]="/1"
topts[2]="0:30:00" ; popts[2]="14/14/" ; ropts[2]="/2"
fi
Expand Down Expand Up @@ -250,7 +255,7 @@ case $regtest in
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:10:00" ; popts[1]="16/2/" ; ropts[1]="/1"
topts[2]="0:10:00" ; popts[2]="16/4/" ; ropts[2]="/2"
elif [[ "$machine" = "wcoss2" ]]; then
elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then
topts[1]="0:10:00" ; popts[1]="16/2/" ; ropts[1]="/1"
topts[2]="0:10:00" ; popts[2]="16/4/" ; ropts[2]="/2"
fi
Expand Down Expand Up @@ -316,7 +321,7 @@ elif [[ "$machine" = "Gaea" ]]; then
export MPI_BUFS_PER_HOST=256
export MPI_GROUP_MAX=256
export APRUN="srun --export=ALL -n \$ntasks"
elif [[ "$machine" = "wcoss2" ]]; then
elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then
export OMP_PLACES=cores
export OMP_STACKSIZE=2G
export FORT_BUFFERED=true
Expand Down
10 changes: 7 additions & 3 deletions regression/regression_var.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,12 @@ elif [[ -d /work ]]; then # Orion or Hercules
else
export machine="Orion"
fi
elif [[ -d /lfs/h2 ]]; then # wcoss2
export machine="wcoss2"
elif [[ -d /lfs/h2 ]]; then # wcoss2 or acorn
if [[ $(hostname -f) =~ "alogin" ]]; then
export machine="acorn"
else
export machine="wcoss2"
fi
fi
echo "Running Regression Tests on '$machine'";

Expand All @@ -63,7 +67,7 @@ case $machine in
export check_resource="no"
export accnt="ufs-ard"
;;
wcoss2)
wcoss2 | acorn)
export local_or_default="${local_or_default:-/lfs/h2/emc/da/noscrub/$LOGNAME}"
if [ -d $local_or_default ]; then
export noscrub="$local_or_default/noscrub"
Expand Down
4 changes: 2 additions & 2 deletions ush/detect_machine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
# First detect w/ hostname
case $(hostname -f) in

adecflow0[12].acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=acorn ;; ### acorn
alogin0[12].acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=acorn ;; ### acorn
adecflow0[1-3].acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=acorn ;; ### acorn
alogin0[1-3].acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=acorn ;; ### acorn
clogin0[1-9].cactus.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### cactus01-9
clogin10.cactus.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### cactus10
dlogin0[1-9].dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood01-9
Expand Down
4 changes: 4 additions & 0 deletions ush/module-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ elif [[ $MACHINE_ID = wcoss2 ]]; then
# We are on WCOSS2
module reset

elif [[ $MACHINE_ID = acorn ]]; then
# We are on WCOSS2-Acorn
module reset

elif [[ $MACHINE_ID = stampede* ]] ; then
# We are on TACC Stampede
if ( ! eval module help > /dev/null 2>&1 ) ; then
Expand Down
171 changes: 171 additions & 0 deletions ush/sub_acorn
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#!/bin/sh --login
set -x
echo "starting sub_acorn"
usage="\
Usage: $0 [options] executable [args]
where the options are:
-a account account (default: none)
-b binding run smt binding or not (default:NO)
-d dirin initial directory (default: cwd)
-e envars copy comma-separated environment variables
-g group group name
-i append standard input to command file
-j jobname specify jobname (default: executable basename)
-m machine machine on which to run (default: current)
-n write command file to stdout rather than submitting it
-o output specify output file (default: jobname.out)
-p procs[/nodes[/ppreq]
number of MPI tasks and optional nodes or Bblocking and
ppreq option (N or S) (defaults: serial, Bunlimited, S)
-q queue[/qpreq] queue name and optional requirement, e.g. dev/P
(defaults: 1 if serial or dev if parallel and none)
(queue 3 or 4 is dev or prod with twice tasks over ip)
(options: P=parallel, B=bigmem, b=batch)
-r rmem[/rcpu] resources memory and cpus/task (default: '1024 mb', 1)
-t timew wall time limit in [[hh:]mm:]ss format (default: 900)
-u userid userid to run under (default: self)
-v verbose mode
-w when when to run, in yyyymmddhh[mm], +hh[mm], thh[mm], or
Thh[mm] (full, incremental, today or tomorrow) format
(default: now)
Function: This command submits a job to the batch queue."
subcmd="$*"
stdin=NO
nosub=NO
account=""
binding="NO"
dirin=""
envars=""
group=""
jobname=""
machine=""
output=""
procs=0
nodes=""
ppreq=""
queue=""
qpreq=""
rmem="1024"
rcpu="1"
timew="900"
userid=""
verbose=NO
when=""
while getopts a:b:d:e:g:ij:m:no:p:q:r:t:u:vw: opt;do
case $opt in
a) account="$OPTARG";;
b) binding="$OPTARG";;
d) dirin="$OPTARG";;
e) envars="$OPTARG";;
g) group="$OPTARG";;
i) stdin=YES;;
j) jobname=$OPTARG;;
m) machine="$OPTARG";;
n) nosub=YES;;
o) output=$OPTARG;;
p) procs=$(echo $OPTARG/|cut -d/ -f1);nodes=$(echo $OPTARG/|cut -d/ -f2);ppreq=$(echo $OPTARG/|cut -d/ -f3);;
q) queue=$(echo $OPTARG/|cut -d/ -f1);qpreq=$(echo $OPTARG/|cut -d/ -f2);;
r) rmem=$(echo $OPTARG/|cut -d/ -f1);rcpu=$(echo $OPTARG/|cut -d/ -f2);;
t) timew=$OPTARG;;
u) userid=$OPTARG;;
v) verbose=YES;;
w) when=$OPTARG;;
\?) echo $0: invalid option >&2;echo "$usage" >&2;exit 1;;
esac
done
shift $(($OPTIND-1))
if [[ $# -eq 0 ]];then
echo $0: missing executable name >&2;echo "$usage" >&2;exit 1
fi
exec=$1
if [[ ! -s $exec ]]&&which $exec >/dev/null 2>&1;then
exec=$(which $exec)
fi
shift
args="$*"
bn=$(basename $exec)
export jobname=${jobname:-$bn}
output=${output:-$jobname.out}
myuser=$LOGNAME
myhost=$(hostname)

DATA=/lfs/h2/emc/stmp/$LOGNAME/tmp
mkdir -p $DATA

queue=${queue:-dev}
timew=${timew:-01:20:00}
task_node=${task_node:-$procs}
size=$((nodes*task_node))
envars=$envars
threads=${rcpu:-1}
ncpus=$((procs*threads))

export TZ=GMT
cfile=$DATA/sub$$
> $cfile
echo "#!/bin/bash" >> $cfile
echo "" >> $cfile
echo "#PBS -o $output" >> $cfile
echo "#PBS -N $jobname" >> $cfile
echo "#PBS -q $queue" >> $cfile
echo "#PBS -l walltime=$timew" >> $cfile
echo "#PBS -l select=$nodes:mpiprocs=$procs:ompthreads=$threads:ncpus=$ncpus" >> $cfile
echo "#PBS -l place=vscatter:exclhost" >> $cfile
echo "#PBS -j oe" >> $cfile
echo "#PBS -A "$accnt >> $cfile

echo "" >> $cfile
echo "export OMP_NUM_THREADS=$threads" >> $cfile
echo "export ntasks=$(( $nodes * $procs ))" >> $cfile
echo "export ppn=$procs" >> $cfile
echo "export threads=$threads" >> $cfile
echo "" >> $cfile
echo ". "$(awk '{ print $1, $2, $3, $4, $5, $6, $7, $8, $9 }' $regdir/regression_var.out) >>$cfile
echo "" >> $cfile

echo "module reset" >> $cfile
echo "module use $modulefiles" >> $cfile
echo "module load gsi_acorn.intel" >> $cfile
echo "module load envvar/1.0" >> $cfile
echo "module load cray-pals/1.2.2" >> $cfile
echo "module -t list 2>&1 | while read line;do module show $line 2>&1 | sed -n -e '2p';done | sort" >> $cfile
echo "module avail" >> $cfile

echo "" >> $cfile

cat $exec >> $cfile

if [[ $nosub = YES ]];then
cat $cfile
exit
elif [[ $verbose = YES ]];then
set -x
cat $cfile
fi

if [[ $stdin = YES ]];then
cat
fi >>$cfile
if [[ $nosub = YES ]];then
cat $cfile
exit
elif [[ $verbose = YES ]];then
set -x
cat $cfile
fi
qsub=${qsub:-qsub}

ofile=$DATA/subout$$
>$ofile
chmod 777 $ofile
$qsub -V $cfile >$ofile
rc=$?
cat $ofile
if [[ -w $SUBLOG ]];then
jobn=$(grep -i submitted $ofile|head -n1|cut -d\" -f2)
date -u +"%Y%m%d%H%M%S : $subcmd : $jobn" >>$SUBLOG
fi
##rm $cfile $ofile
##[[ $MKDATA = YES ]] && rmdir $DATA
echo "ending sub_acorn"
exit $rc

0 comments on commit 3cbe71f

Please sign in to comment.