-
Notifications
You must be signed in to change notification settings - Fork 1
/
dlrpyc_client.py
95 lines (66 loc) · 2.47 KB
/
dlrpyc_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
'''
'''
import os
import sys
import dill
from rpycdl_lib.common import mkdir_p
from cli_clients_common import get_server_connection
_topdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
def dltfclient_code(codeargs):
import subprocess
container = codeargs['container']
codefile = codeargs['codefile']
cliargs = codeargs['cliargs']
datadir = codeargs['datadir']
workdir = codeargs['workdir']
envvars = {
'DATA': datadir,
'WORKDIR': workdir
}
envvars_cmd = ' '.join(('-e {vn}="{vv}"'.format(vn=vn, vv=vv)
for vn, vv in envvars.items()))
recommended_opts = '--shm-size=1g --ulimit memlock=-1 '\
'--ulimit stack=67108864'
workdir_cmd = '-w "{}"'.format(workdir)
code_cmd = 'python {codefile} {cliargs}'.format(
codefile=codefile, cliargs=cliargs)
runcmd = 'nvidia-docker run --rm -t '\
'-u $(id -u):$(id -g) -e HOME=$HOME -e USER=$USER -v $HOME:$HOME '\
'{envvars_cmd} {recommended_opts} '\
'{workdir_cmd} --entrypoint=bash {container} -c \' '\
'{code_cmd} \' '\
.format(
envvars_cmd=envvars_cmd,
recommended_opts=recommended_opts,
workdir_cmd=workdir_cmd, container=container,
code_cmd=code_cmd)
print('RUNNING NVIDIA-DOCKER CMD:\n\n{}\n\n'.format(runcmd))
rp = subprocess.Popen(
runcmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
# rp.wait()
for stdout_line in iter(rp.stdout.readline, ""):
print stdout_line.strip()
def main():
# Run mnist_deep training -------------------------------------------------
tfserv = get_server_connection('deeplearning')
ngpus = 1
examples_dir = os.path.join(_topdir, 'examples')
container = 'nvcr.io/nvidia/tensorflow:17.12'
codefile = '{}/tensorflow/mnist/mnist_deep.py'.format(examples_dir)
cliargs = '--data_dir "$DATA" --workdir "$WORKDIR"'
datadir = '{}/data/mnist'.format(examples_dir)
workdir = '{}/workdir/tensorflow/mnist/'.format(_topdir)
mkdir_p(workdir)
codeargs = {
'container': container,
'codefile': codefile,
'cliargs': cliargs,
'datadir': datadir,
'workdir': workdir
}
tfcode_mnist = dill.dumps(dltfclient_code)
tfserv.root.run_code(tfcode_mnist, codeargs, ngpus=ngpus,
stdout=sys.stdout, stderr=sys.stderr)
tfserv.close()
if __name__ == "__main__":
main()