-
Notifications
You must be signed in to change notification settings - Fork 230
[Manual] Devito on TURSA [A100 GPUs].
George Bisbas edited this page Sep 12, 2024
·
16 revisions
DIRAC login page and management Login to SAFE systems.
You should be able to log in to the service machine itself using: ssh [@tursa.dirac.ed.ac.uk](@tursa.dirac.ed.ac.uk)
# After completing the registration
# Do `ssh` to your login node (password only, no keys are used)
ssh -oPubkeyAuthentication=no [email protected]
# To quickly see the available versions of any software do not forget that you can do:
module avail -t 2>&1 | grep -i <keyword>
# e.g.
module avail -t 2>&1 | grep -i nvidia
# https://icl-rcs-user-guide.readthedocs.io/en/latest/hpc/applications/easybuild/
# First, load the production tools
module load tools/prod
# Load Python, create virtual env and activate it
module load Python/3.11.3-GCCcore-12.3.0
# ...create activate and then...
python3 -m pip install -e .
# Requesting an interactive job
qsub -I -l walltime=01:30:00 -l select=1:ncpus=64:mem=200gb:ngpus=1:gpu_type=A100
# See the available GPUs
nvidia-smi --query-gpu=gpu_name --format=csv
# name
# NVIDIA A100-SXM4-80GB
module load tools/eb-dev
module load NVHPC/23.7-CUDA-12.2.0
# DROP (but check in a new env if we can compile mpi4py with its own mpi)
# module load OpenMPI/4.1.5-GCC-12.3.0
module load Python/3.11.5-GCCcore-13.2.0
export PATH=/gpfs/easybuild/prod/software/NVHPC/23.7-CUDA-12.2.0/Linux_x86_64/23.7/comm_libs/mpi/bin:$PATH
DEVITO_LANGUAGE=openacc DEVITO_LOGGING=DEBUG DEVITO_PLATFORM=nvidiaX DEVITO_COMPILER=nvcc python examples/seismic/acoustic/acoustic_example.py -d 256 256 256 --tn 256
# ... 0.29 secs, 17.14 Gpts/s
#!/bin/bash
#PBS -l walltime=00:40:00
#PBS -l select=1:ncpus=8:mpiprocs=2:mem=200gb:ngpus=2:gpu_type=A100
cd $PBS_O_WORKDIR
cat $PBS_NODEFILE
module load NVHPC/23.7-CUDA-12.2.0
module load Python/3.11.5-GCCcore-13.2.0
export PATH=/gpfs/easybuild/prod/software/NVHPC/23.7-CUDA-12.2.0/Linux_x86_64/23.7/comm_libs/mpi/bin:$PATH
export HPCSDK_HOME=/gpfs/easybuild/prod/software/NVHPC/23.7-CUDA-12.2.0/Linux_x86_64/23.7/
# export PATH=/gpfs/easybuild/prod/software/NVHPC/23.7-CUDA-12.2.0/Linux_x86_64/23.7/comm_libs/hpcx/bin:$PATH
module load OpenMPI/4.1.4-NVHPC-22.7-CUDA-11.7.0
cd devito
export DEVITO_MPI=1
export DEVITO_LANGUAGE=openacc
export DEVITO_LOGGING=DEBUG
export DEVITO_PROFILING=advanced2
export DEVITO_PLATFORM=nvidiaX
export DEVITO_COMPILER=nvc
# mpirun -n 4 --map-by ppr:2:node -hostfile $PBS_NODEFILE --report-bindings python examples/seismic/acoustic/acoustic_example.py -d 1024 1024 1024 --tn 1024 -so 12
mpirun -n 2 python examples/seismic/acoustic/acoustic_example.py -d 1024 1024 1024 --tn 1024 -so 8
mpirun -n 2 python examples/seismic/elastic/elastic_example.py -d 768 768 768 --tn 1024 -so 8
mpirun -n 2 python examples/seismic/tti/tti_example.py -d 768 768 768 --tn 1024 -so 8
mpirun -n 2 python examples/seismic/viscoelastic/viscoelastic_example.py -d 768 768 768 --tn 1024 -so 8
mpirun -n 2 python examples/seismic/acoustic/acoustic_example.py -d 1024 1024 1024 --tn 1024 -so 12
mpirun -n 2 python examples/seismic/elastic/elastic_example.py -d 768 768 768 --tn 1024 -so 12
mpirun -n 2 python examples/seismic/tti/tti_example.py -d 768 768 768 --tn 1024 -so 12
mpirun -n 2 python examples/seismic/viscoelastic/viscoelastic_example.py -d 768 768 768 --tn 1024 -so 12
watch -n 10 'qstat -T -u $(whoami)'
watch -n 10 'qstat | grep v1_a100'
watch -n 0.1 'nvidia-smi'
# 1x1
qsub -I -l walltime=01:30:00 -l select=1:ncpus=8:mem=200gb:mpiprocs=1:ngpus=1:gpu_type=A100
# 1x2
qsub -I -l walltime=01:30:00 -l select=1:ncpus=8:mem=200gb:mpiprocs=2:ngpus=2:gpu_type=A100
# 2x1
qsub -I -l walltime=02:30:00 -l select=2:ncpus=8:mem=200gb:mpiprocs=1:ngpus=1:gpu_type=A100 -l place=scatter
mpirun -n 4 --map-by ppr:4:node --report-bindings
mpirun -n 4 --map-by ppr:2:node -hostfile $PBS_NODEFILE --report-bindings python examples/seismic/elastic/elastic_example.py -d 1024 1024 1024 --tn 1024 -so 8