-
Notifications
You must be signed in to change notification settings - Fork 0
/
add_jobs.py
executable file
·95 lines (78 loc) · 3.07 KB
/
add_jobs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import os.path as osp
import hashlib
import time
import glob
import argparse
from hyper_params import ParamGenerator
default_params = {
'script_name': 'demo.py',
'sleep_time': 5,
'n_depth': 3,
'n_channel': 32,
'gpu_memory': 0.1,
}
ROOT_JOB = 'jobs'
TODO_DIR = '{}/todo'.format(ROOT_JOB)
QUEUE_DIR = '{}/queue'.format(ROOT_JOB)
DONE_DIR = '{}/done'.format(ROOT_JOB)
FAIL_DIR = '{}/fail'.format(ROOT_JOB)
def check_job_dirs():
if not osp.exists(TODO_DIR):
os.makedirs(TODO_DIR)
if not osp.exists(QUEUE_DIR):
os.makedirs(QUEUE_DIR)
if not osp.exists(DONE_DIR):
os.makedirs(DONE_DIR)
if not osp.exists(FAIL_DIR):
os.makedirs(FAIL_DIR)
def get_command(params):
cmd = 'OMP_NUM_THREADS=4 python {}'.format(params['script_name'])
cmd += ' --sleep_time={}'.format(params['sleep_time'])
cmd += ' --n_depth={}'.format(params['n_depth'])
cmd += ' --n_channel={}'.format(params['n_channel'])
cmd += ' --gpu_memory={}'.format(params['gpu_memory'])
return cmd
def write_shell_script(command, memo=None, params=None):
hash_str = hashlib.sha256(command.encode('utf-8')).hexdigest()[:16]
job_name = 'job-{}.sh'.format(hash_str)
job_file = osp.join(TODO_DIR, job_name)
with open(job_file, 'w') as f:
f.write("#!/bin/bash\n")
f.write('# [TIME] {}\n'.format(time.asctime()))
if memo is not None and len(memo) > 0:
f.write('# [MEMO] {}\n'.format(memo))
if params is not None and len(params) > 0:
f.write('# [PARAM] {}\n'.format(params))
f.write("CMD='{}'\n".format(command))
f.write('echo $CMD\n')
f.write('eval $CMD\n')
print('Create {}'.format(job_file))
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--dry_run', action='store_const',
const=True, default=False,
help='dry-run (not save any files)')
config, unparsed = parser.parse_known_args()
if len(unparsed) > 0:
print('[Error] unparsed args: {}'.format(unparsed))
exit(1)
pg = ParamGenerator()
memo = 'run demo.py with various hyper parameters'
pg.add_params('sleep_time', [5,10,20,30])
pg.add_params_if('gpu_memory', [0.3, 0.5], cond_key='sleep_time', cond_val=30)
pg.add_params('n_depth', [3,6,9,12])
pg.add_params('n_channel', [64,64,32,32], in_series=True)
all_params = pg.generate(default_params, add_param_string=True)
if not config.dry_run:
check_job_dirs()
for n, params in enumerate(all_params):
print(n, params['__PARAM__'])
cmd = get_command(params)
if not config.dry_run:
write_shell_script(cmd, memo=memo, params=params['__PARAM__'])
time.sleep(0.1) # insert sleep to give a timestamp properly
total_num_jobs = len(glob.glob(osp.join(TODO_DIR, 'job*sh')))
print('Add {} jobs and {} jobs are waiting for running'.format(len(all_params), total_num_jobs))