-
Notifications
You must be signed in to change notification settings - Fork 2
/
upcxx-run-summit
executable file
·138 lines (109 loc) · 4.41 KB
/
upcxx-run-summit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/bin/bash
USAGE="$0 <options> upcxx-executable <options>
Options:
--help Print this message
--low-latency Select the HCA(s) for lowest latency (default)
--high-bandwidth Select the HCA(s) for higest bandwidth
--by-gpu Resource Set by GPU - Bind ranks to 7 cores on 1 socket with 1 GPU (default)
--by-socket Resource Set by socket - Bind ranks to 1 socket with 3 GPUs
--1-hca Use 1 HCA per rank (default)
--2-hca Use 2 HCA per rank
--4-hca Use 4 HCA per rank (i.e. all)
--use-odp Use ODB instead of the default XRC
--shared-heap set either a percentage of node RAM '10%' or fixed amount per thread '128MB'
"
# default is 1 HCAs
export __UPCXX_RUN_SUMMIT_nHCAS=${__UPCXX_RUN_SUMMIT_nHCAS:=1}
# default is lowlatency
export __UPCXX_RUN_SUMMIT_MODE=${__UPCXX_RUN_SUMMIT_MODE:=lowlatency}
# default is by gpu
RS_PER_HOST=${RS_PER_HOST:=6} ; CPU_PER_RS=${CPU_PER_RS:=7} ; GPU_PER_RS=${GPU_PER_RS:=1}
while [ 1 ]
do
# process and consume any options on the command line
__opt=$1
_opt=${__opt#-} # for 1 dash
opt=${_opt#-} # for optional second dash
if [ "${__opt}" == "${opt}" ]
then
break
fi
shift
case $opt in
"by-gpu") RS_PER_HOST=${RS_PER_HOST:=6} ; CPU_PER_RS=${CPU_PER_RS:=7} ; GPU_PER_RS=${GPU_PER_RS:=1} ;;
"by-socket") RS_PER_HOST=${RS_PER_HOST:=2} ; CPU_PER_RS=${CPU_PER_RS:=21} ; GPU_PER_RS=${GPU_PER_RS:=3} ;;
"low-latency") export __UPCXX_RUN_SUMMIT_MODE="lowlatency" ;; #default
"high-bandwidth") export __UPCXX_RUN_SUMMIT_MODE="highbandwidth" ;;
"shared-heap") export __UPCXX_RUN_SUMMIT_SHARED_HEAP=$1 ; shift ;;
"verbose") export __UPCXX_RUN_SUMMIT_VERBOSE=1 ;;
"use-odp") export GASNET_USE_XRC=0 ; export GASNET_USE_ODP=1 ;;
"1-hca") export __UPCXX_RUN_SUMMIT_nHCAS=1 ;;
"2-hca") export __UPCXX_RUN_SUMMIT_nHCAS=2 ;; #default
"4-hca") export __UPCXX_RUN_SUMMIT_nHCAS=4 ;;
"") break ;; # last option
"help") echo "$USAGE" ; exit 0 ;;
*) echo "Invalid option to $0: '$opt'" 1>&2 ; exit 1 ;;
esac
done
if [ -n "${__UPCXX_RUN_SUMMIT}" ]
then
socket=$(hwloc-calc -I Node $(hwloc-bind --get))
if [ "${__UPCXX_RUN_SUMMIT_MODE}" == "highbandwidth" ]
then
# high bandwidth HCA from both sockets (each use different rail)
case $socket in
1) export GASNET_IBV_PORTS=mlx5_1+mlx5_2 ;;
*) export GASNET_IBV_PORTS=mlx5_0+mlx5_3 ;;
esac
else
# low latency only HCAs closest to socket (both use same, single rail)
case $socket in
1) export GASNET_IBV_PORTS=mlx5_2+mlx5_3 ;;
*) export GASNET_IBV_PORTS=mlx5_0+mlx5_1 ;;
esac
fi
if [ "${__UPCXX_RUN_SUMMIT_nHCAS}" == "1" ]
then
# 1 HCA per rank
if [ -n "${PMIX_RANK}" ] && ((${PMIX_RANK} % 2))
then
export GASNET_IBV_PORTS=${GASNET_IBV_PORTS%+*}
else
export GASNET_IBV_PORTS=${GASNET_IBV_PORTS#*+}
fi
elif [ "${__UPCXX_RUN_SUMMIT_nHCAS}" == "2" ]
then
dummy=noop
elif [ "${__UPCXX_RUN_SUMMIT_nHCAS}" == "4" ]
then
export GASNET_IBV_PORTS=mlx5_0+mlx5_1+mlx5_2+mlx5_3
fi
if [ -n "${__UPCXX_RUN_SUMMIT_SHARED_HEAP}" ]
then
if [ "${__UPCXX_RUN_SUMMIT_SHARED_HEAP%*%}" != "${__UPCXX_RUN_SUMMIT_SHARED_HEAP}" ]
then
# fix the % in shared heap size to allocate the correct pattern for gasnet max segsize
export GASNET_MAX_SEGSIZE=$(printf "0.%02d/H" ${__UPCXX_RUN_SUMMIT_SHARED_HEAP%*%})
export UPCXX_SHARED_HEAP_SIZE="MAX"
else
export UPCXX_SHARED_HEAP_SIZE="${__UPCXX_RUN_SUMMIT_SHARED_HEAP}"
fi
fi
# TEMPORARY
# for Issue #37 to scale up to 778 nodes
export GASNET_USE_SRQ=${GASNET_USE_SRQ:=1}
export GASNET_USE_XRC=${GASNET_USE_XRC:=1}
export GASNET_AM_CREDITS_PP=$__UPCXX_RUN_SUMMIT_nHCAS
export GASNET_RBUF_COUNT=$((GASNET_AM_CREDITS_PP*778*42))
if [ "${PMIX_RANK}" == "0" ] || [ "${PMIX_RANK}" == "1" ] || [ "${PMIX_RANK}" == "21" ] || [ "${PMIX_RANK}" == "22" ]
then
echo "${PMIX_RANK}: second invocation $(uname -n) pid=$$ hwloc='$(hwloc-ps)'" $(env | grep GASNET)
fi
# actually run the program in this second invocation
exec "$@"
fi
export __UPCXX_RUN_SUMMIT=1
# first invocation calls jsrun on the submit node
echo "Executing first invocation on `uname -n`: " $(env | grep GASNET)
set -x
exec jsrun --rs_per_host $RS_PER_HOST --cpu_per_rs $CPU_PER_RS --gpu_per_rs $GPU_PER_RS --tasks_per_rs $CPU_PER_RS -d plane:$CPU_PER_RS --bind none $0 "$@"