Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/reger-men/HPL_GPU
Browse files Browse the repository at this point in the history
  • Loading branch information
Redtorm committed Sep 9, 2022
2 parents b911542 + 0cc18d3 commit 4854af4
Show file tree
Hide file tree
Showing 21 changed files with 413 additions and 33 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ src/pgesv/HPL_pdupdateNT.cpp
src/pgesv/HPL_pdupdateNN.cpp src/pgesv/HPL_pdupdateTN.cpp src/pgesv/HPL_pdupdateTT.cpp
src/pgesv/HPL_pdupdateTN.cpp src/pgesv/HPL_pdupdateTT.cpp
src/pauxil/HPL_pwarn.cpp src/comm/HPL_bcast.cpp src/comm/HPL_blong.cpp
src/comm/HPL_blonM.cpp src/comm/HPL_1ring.cpp src/comm/HPL_2ring.cpp
src/comm/HPL_blonM.cpp src/comm/HPL_bidir.cpp src/comm/HPL_1ring.cpp src/comm/HPL_2ring.cpp
src/comm/HPL_1rinM.cpp src/comm/HPL_2rinM.cpp src/comm/HPL_packL.cpp
src/comm/HPL_sdrv.cpp src/comm/HPL_send.cpp src/pgesv/HPL_pdlaswp00N.cpp
src/comm/HPL_recv.cpp src/grid/HPL_reduce.cpp src/comm/HPL_binit.cpp
Expand Down
11 changes: 0 additions & 11 deletions env/env.crusher.sh

This file was deleted.

7 changes: 6 additions & 1 deletion include/hpl_comm.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ typedef enum
HPL_2RING_M = 404, /* Increasing 2-ring (modified) */
HPL_BLONG = 405, /* long broadcast */
HPL_BLONG_M = 406, /* long broadcast (modified) */
HPL_IBCAST = 407 /* default bcast in MPI */
HPL_IBCAST = 407, /* default bcast in MPI */
HPL_BIDIR = 408, /* bidirectional */
} HPL_T_TOP;
/*
* ---------------------------------------------------------------------
Expand Down Expand Up @@ -160,6 +161,10 @@ int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel * ) );
int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) );
int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel * ) );

int HPL_binit_bidir STDC_ARGS( ( HPL_T_panel * ) );
int HPL_bcast_bidir STDC_ARGS( ( HPL_T_panel *, int * ) );
int HPL_bwait_bidir STDC_ARGS( ( HPL_T_panel * ) );

#endif
/*
* End of hpl_comm.h
Expand Down
4 changes: 3 additions & 1 deletion makes/Make.comm
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ INCdep = \
#
HPL_comobj = \
HPL_1ring.o HPL_1rinM.o HPL_2ring.o \
HPL_2rinM.o HPL_blong.o HPL_blonM.o \
HPL_2rinM.o HPL_blong.o HPL_bidir.o HPL_blonM.o \
HPL_packL.o HPL_copyL.o HPL_binit.o \
HPL_bcast.o HPL_bwait.o HPL_send.o \
HPL_recv.o HPL_sdrv.o
Expand Down Expand Up @@ -84,6 +84,8 @@ HPL_2rinM.o : ../HPL_2rinM.cpp $(INCdep)
$(CC) -o $@ -c $(CCFLAGS) ../HPL_2rinM.cpp
HPL_blong.o : ../HPL_blong.cpp $(INCdep)
$(CC) -o $@ -c $(CCFLAGS) ../HPL_blong.cpp
HPL_bidir.o : ../HPL_bidir.cpp $(INCdep)
$(CC) -o $@ -c $(CCFLAGS) ../HPL_bidir.cpp
HPL_blonM.o : ../HPL_blonM.cpp $(INCdep)
$(CC) -o $@ -c $(CCFLAGS) ../HPL_blonM.cpp
HPL_packL.o : ../HPL_packL.cpp $(INCdep)
Expand Down
4 changes: 2 additions & 2 deletions scripts/config/HPL_128GPU.dat
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ HPL.out output file name (if any)
384 NBs
0 PMAP process mapping (0=Row-,1=Column-major)
1 # of process grids (P x Q)
8 Ps
16 Qs
16 Ps
8 Qs
16.0 threshold
1 # of panel fact
2 PFACTs (0=left, 1=Crout, 2=Right)
Expand Down
31 changes: 31 additions & 0 deletions scripts/config/HPL_2048GPU.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
HPLinpack benchmark input file
Innovative Computing Laboratory, University of Tennessee
HPL.out output file name (if any)
0 device out (6=stdout,7=stderr,file)
1 # of problems sizes (N)
4104192 N
1 # of NBs
384 NBs
0 PMAP process mapping (0=Row-,1=Column-major)
1 # of process grids (P x Q)
64 Ps
32 Qs
16.0 threshold
1 # of panel fact
2 PFACTs (0=left, 1=Crout, 2=Right)
1 # of recursive stopping criterium
2 NBMINs (>= 1)
1 # of panels in recursion
2 NDIVs
1 # of recursive panel fact.
2 RFACTs (0=left, 1=Crout, 2=Right)
1 # of broadcast
6 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM,6=ibcast)
1 # of lookahead depth
1 DEPTHs (>=0)
1 SWAP (0=bin-exch,1=long,2=mix)
64 swapping threshold
1 L1 in (0=transposed,1=no-transposed) form
0 U in (0=transposed,1=no-transposed) form
0 Equilibration (0=no,1=yes)
8 memory alignment in double (> 0)
4 changes: 2 additions & 2 deletions scripts/config/HPL_32GPU.dat
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ HPL.out output file name (if any)
384 NBs
0 PMAP process mapping (0=Row-,1=Column-major)
1 # of process grids (P x Q)
4 Ps
8 Qs
8 Ps
4 Qs
16.0 threshold
1 # of panel fact
2 PFACTs (0=left, 1=Crout, 2=Right)
Expand Down
31 changes: 31 additions & 0 deletions scripts/config/HPL_4096GPU.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
HPLinpack benchmark input file
Innovative Computing Laboratory, University of Tennessee
HPL.out output file name (if any)
0 device out (6=stdout,7=stderr,file)
1 # of problems sizes (N)
5786112 N
1 # of NBs
384 NBs
0 PMAP process mapping (0=Row-,1=Column-major)
1 # of process grids (P x Q)
64 Ps
64 Qs
16.0 threshold
1 # of panel fact
2 PFACTs (0=left, 1=Crout, 2=Right)
1 # of recursive stopping criterium
2 NBMINs (>= 1)
1 # of panels in recursion
2 NDIVs
1 # of recursive panel fact.
2 RFACTs (0=left, 1=Crout, 2=Right)
1 # of broadcast
6 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM,6=ibcast)
1 # of lookahead depth
1 DEPTHs (>=0)
1 SWAP (0=bin-exch,1=long,2=mix)
64 swapping threshold
1 L1 in (0=transposed,1=no-transposed) form
0 U in (0=transposed,1=no-transposed) form
0 Equilibration (0=no,1=yes)
8 memory alignment in double (> 0)
4 changes: 2 additions & 2 deletions scripts/config/HPL_512GPU.dat
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ HPL.out output file name (if any)
384 NBs
0 PMAP process mapping (0=Row-,1=Column-major)
1 # of process grids (P x Q)
16 Ps
32 Qs
32 Ps
16 Qs
16.0 threshold
1 # of panel fact
2 PFACTs (0=left, 1=Crout, 2=Right)
Expand Down
10 changes: 5 additions & 5 deletions scripts/config/HPL_8GPU.dat
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ Innovative Computing Laboratory, University of Tennessee
HPL.out output file name (if any)
0 device out (6=stdout,7=stderr,file)
1 # of problems sizes (N)
256000 N
256512 N
1 # of NBs
384 NBs
0 PMAP process mapping (0=Row-,1=Column-major)
1 # of process grids (P x Q)
2 Ps
4 Qs
4 Ps
2 Qs
16.0 threshold
1 # of panel fact
2 PFACTs (0=left, 1=Crout, 2=Right)
Expand All @@ -19,8 +19,8 @@ HPL.out output file name (if any)
2 NDIVs
1 # of recursive panel fact.
2 RFACTs (0=left, 1=Crout, 2=Right)
1 # of broadcast
6 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM,6=ibcast)
8 # of broadcast
6 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM,6=ibcast,7=BiDir)
1 # of lookahead depth
1 DEPTHs (>=0)
1 SWAP (0=bin-exch,1=long,2=mix)
Expand Down
20 changes: 20 additions & 0 deletions scripts/env.crusher.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# modules
module reset

module load craype-accel-amd-gfx90a
module load PrgEnv-amd
module load amd/5.2.0
module load rocm/5.2.0
module load cray-mpich/8.1.17
module load openblas/0.3.17-omp

#
# env
#
export LD_LIBRARY_PATH="${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}"
# enable GPU aware MPI
export MPICH_GPU_SUPPORT_ENABLED=1
# to work around the OFI registration cache issue for > 8 nodes
export FI_MR_CACHE_MAX_COUNT=0
#export MPICH_SMP_SINGLE_COPY_MODE=NONE # does not work
export MPICH_RANK_REORDER_DISPLAY=1
22 changes: 22 additions & 0 deletions scripts/env.lumi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Any other commands must follow the #SBATCH directives
module load LUMI/22.06 partition/G
#module load rocm/5.1.4

module use /project/project_462000075/paklui/modulefiles
#module load rocm/5.3.0-10584
#module load rocm/5.3.0-10619
module load rocm/5.3.0-10670
#module load openblas/0.3.17-omp
#module load cce/14.0.2
#module load cray-libsci/22.08.1.1
module load cray-mpich/8.1.18
#module load craype/2.7.17

#
# env
#
# enable GPU aware MPI
export MPICH_GPU_SUPPORT_ENABLED=1
# to work around the OFI registration cache issue for > 8 nodes
#export FI_MR_CACHE_MAX_COUNT=0
export MPICH_RANK_REORDER_DISPLAY=1
20 changes: 15 additions & 5 deletions scripts/run_hpl.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
#SBATCH -N 1
##SBATCH -n 8
#SBATCH -c 8
#SBATCH -t 1:00:00
#SBATCH -A VEN114
#SBATCH -t 2:00:00
##SBATCH -A VEN114
#SBATCH -A project_462000075
#SBATCH -J xhplhip
#SBATCH --gpu-bind=closest
#SBATCH --ntasks-per-node=8
#SBATCH --gpus-per-node=8

source ../env/env.crusher.sh
#source ../env/env.crusher.sh
source ../env/env.lumi.sh

NP=$SLURM_NPROCS
NODES=$SLURM_NNODES
Expand All @@ -34,9 +36,17 @@ CMD+="-o $LOG -e $LOG "
#CMD+="${HOME}/mpich_bind.sh "
CMD+="$EXE"

if [ $NODES -gt 8 ]; then
echo "export FI_MR_CACHE_MAX_COUNT=0"
export FI_MR_CACHE_MAX_COUNT=0
else
echo "unset FI_MR_CACHE_MAX_COUNT"
unset FI_MR_CACHE_MAX_COUNT
fi

#export MPICH_SMP_SINGLE_COPY_MODE=NONE # does not work
export FI_MR_CACHE_MAX_COUNT=0
export MPICH_RANK_REORDER_DISPLAY=1
#export FI_MR_CACHE_MAX_COUNT=0 # workaround for failed to allocate memory
#export MPICH_RANK_REORDER_DISPLAY=1

echo $CMD >> $LOG
echo $CMD 2>&1 | tee -a $LOG
Expand Down
2 changes: 1 addition & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \
blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \
blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \
comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \
comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \
comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_bidir.c comm/HPL_1ring.c \
comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \
grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \
grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \
Expand Down
9 changes: 8 additions & 1 deletion src/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ am_libhpl_a_OBJECTS = auxil/HPL_dlatcpy.$(OBJEXT) \
comm/HPL_recv.$(OBJEXT) comm/HPL_bcast.$(OBJEXT) \
comm/HPL_binit.$(OBJEXT) comm/HPL_bwait.$(OBJEXT) \
comm/HPL_blong.$(OBJEXT) comm/HPL_1ring.$(OBJEXT) \
comm/HPL_bidir.$(OBJEXT) \
comm/HPL_1rinM.$(OBJEXT) comm/HPL_2rinM.$(OBJEXT) \
comm/HPL_2ring.$(OBJEXT) comm/HPL_blonM.$(OBJEXT) \
comm/HPL_packL.$(OBJEXT) grid/HPL_reduce.$(OBJEXT) \
Expand Down Expand Up @@ -229,6 +230,7 @@ am__depfiles_remade = auxil/$(DEPDIR)/HPL_dlacpy.Po \
comm/$(DEPDIR)/HPL_2rinM.Po comm/$(DEPDIR)/HPL_2ring.Po \
comm/$(DEPDIR)/HPL_bcast.Po comm/$(DEPDIR)/HPL_binit.Po \
comm/$(DEPDIR)/HPL_blonM.Po comm/$(DEPDIR)/HPL_blong.Po \
comm/$(DEPDIR)/HPL_bidir.Po \
comm/$(DEPDIR)/HPL_bwait.Po comm/$(DEPDIR)/HPL_packL.Po \
comm/$(DEPDIR)/HPL_recv.Po comm/$(DEPDIR)/HPL_sdrv.Po \
comm/$(DEPDIR)/HPL_send.Po grid/$(DEPDIR)/HPL_all_reduce.Po \
Expand Down Expand Up @@ -428,7 +430,7 @@ blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \
blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \
blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \
comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \
comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \
comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_bidir.c comm/HPL_1ring.c \
comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \
grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \
grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \
Expand Down Expand Up @@ -583,6 +585,8 @@ comm/HPL_bwait.$(OBJEXT): comm/$(am__dirstamp) \
comm/$(DEPDIR)/$(am__dirstamp)
comm/HPL_blong.$(OBJEXT): comm/$(am__dirstamp) \
comm/$(DEPDIR)/$(am__dirstamp)
comm/HPL_bidir.$(OBJEXT): comm/$(am__dirstamp) \
comm/$(DEPDIR)/$(am__dirstamp)
comm/HPL_1ring.$(OBJEXT): comm/$(am__dirstamp) \
comm/$(DEPDIR)/$(am__dirstamp)
comm/HPL_1rinM.$(OBJEXT): comm/$(am__dirstamp) \
Expand Down Expand Up @@ -818,6 +822,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_binit.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blonM.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blong.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bidir.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bwait.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_packL.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_recv.Po@am__quote@ # am--include-marker
Expand Down Expand Up @@ -1086,6 +1091,7 @@ distclean: distclean-am
-rm -f comm/$(DEPDIR)/HPL_binit.Po
-rm -f comm/$(DEPDIR)/HPL_blonM.Po
-rm -f comm/$(DEPDIR)/HPL_blong.Po
-rm -f comm/$(DEPDIR)/HPL_bidir.Po
-rm -f comm/$(DEPDIR)/HPL_bwait.Po
-rm -f comm/$(DEPDIR)/HPL_packL.Po
-rm -f comm/$(DEPDIR)/HPL_recv.Po
Expand Down Expand Up @@ -1233,6 +1239,7 @@ maintainer-clean: maintainer-clean-am
-rm -f comm/$(DEPDIR)/HPL_binit.Po
-rm -f comm/$(DEPDIR)/HPL_blonM.Po
-rm -f comm/$(DEPDIR)/HPL_blong.Po
-rm -f comm/$(DEPDIR)/HPL_bidir.Po
-rm -f comm/$(DEPDIR)/HPL_bwait.Po
-rm -f comm/$(DEPDIR)/HPL_packL.Po
-rm -f comm/$(DEPDIR)/HPL_recv.Po
Expand Down
1 change: 1 addition & 0 deletions src/comm/HPL_bcast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ int HPL_bcast
case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break;
case HPL_BLONG : ierr = HPL_bcast_blong( PANEL, IFLAG ); break;
case HPL_IBCAST : ierr = HPL_BE_bcast_ibcast( PANEL, IFLAG, HPL_TR); break;
case HPL_BIDIR : ierr = HPL_bcast_bidir( PANEL, IFLAG ); break;
default : ierr = HPL_SUCCESS;
}

Expand Down
Loading

0 comments on commit 4854af4

Please sign in to comment.