-
Notifications
You must be signed in to change notification settings - Fork 1
/
start_TFandCFandDL_rpyc_servers.sh
executable file
·89 lines (65 loc) · 2.53 KB
/
start_TFandCFandDL_rpyc_servers.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
_basedir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# After the script run can kill all the services via: pkill -f rpyc_server
DEBUG=true
# DEBUG=false
if [ "$DEBUG" = true ] ; then
# for debugging send to background otherwise use daemon process.
nvcmd="nvidia-docker run"
else
nvcmd="nvidia-docker run -d"
# When using daemon no need to send to background via &, but it was just
# easier then having an extra if condition for ampersand.
fi
pushd ${_basedir}
datamnts="/tmp"
mntdata=''
if [ ! -z "${datamnts// }" ]; then
for mnt in ${datamnts//,/ } ; do
mntdata="-v ${mnt}:${mnt} ${mntdata}"
done
fi
USEROPTS="-u $(id -u):$(id -g) -e HOME=$HOME -e USER=$USER -v $HOME:$HOME"
getent group > ${_basedir}/group
getent passwd > ${_basedir}/passwd
tfcontainer="nvcr.io/nvidia/tensorflow:17.12"
# nvidia-docker run -d --rm --name=tfserv --net=host \
${nvcmd} --rm --name=tfserv --net=host \
$USEROPTS $mntdata \
--hostname "$(hostname)_contain" \
-v ${_basedir}/passwd:/etc/passwd:ro -v ${_basedir}/group:/etc/group:ro \
--shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-w ${_basedir} --entrypoint=bash $tfcontainer -c '
source bash_init_rpyc.sh
python tfrpyc_server.py --debug
' &
# Ampersand not needed if running as daemon
nvcfcontainer="nvcr.io/nvidia/caffe:17.12"
# for debugging send to background otherwise use daemon process
# nvidia-docker run -d --rm --name=nvcfserv --net=host \
${nvcmd} --rm --name=nvcfserv --net=host \
$USEROPTS $mntdata \
--hostname "$(hostname)_contain" \
-v ${_basedir}/passwd:/etc/passwd:ro -v ${_basedir}/group:/etc/group:ro \
--shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-w ${_basedir} --entrypoint=bash $nvcfcontainer -c '
source bash_init_rpyc.sh
python nvcfrpyc_server.py --debug
' &
cfcontainer="bvlc/caffe:gpu"
# On Volta hardware rebuild Caffe with CUDA9 and NCCL2.
# cfcontainer="bvlc_caffe_nccl2"
# for debugging send to background otherwise use daemon process
# nvidia-docker run -d --rm --name=cfserv --net=host \
${nvcmd} --rm --name=cfserv --net=host \
$USEROPTS $mntdata \
--hostname "$(hostname)_contain" \
-v ${_basedir}/passwd:/etc/passwd:ro -v ${_basedir}/group:/etc/group:ro \
--shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-w ${_basedir} --entrypoint=bash $cfcontainer -c '
source bash_init_rpyc.sh
python cfrpyc_server.py --debug
' &
# Start generic service that can be used to spawn docker services.
source bash_init_rpyc.sh
python dlrpyc_server.py --debug & # send to background
popd