-
Notifications
You must be signed in to change notification settings - Fork 7
/
launcher_finetune.py
94 lines (84 loc) · 4.41 KB
/
launcher_finetune.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
mode = 'finetune' # 'final' or 'finetune'
env_ls = [ 'slowdown' ] #'catchup', 'eight'
algo_ls = ['DMPO']
name = '[finetune-0]'
if mode == 'final':
seed_number = 5
for algo in algo_ls:
for env in env_ls:
# set a new tmux session to contain a algo-env group
command = f'tmux new-session -d -t {algo}_{env}{name}'
os.system(command)
os.system('tmux send-keys clear')
os.system('tmux send-keys KPEnter')
for i in range(seed_number):
# set a new window to contain a specific experiment
os.system(f'tmux new-window -d -n actor_{i}')
# prepare the python running command
py_command = f"tmux send-keys -t actor_{i} 'python launcher_baseline.py --env {env} --algo {algo} --name {name}' ENTER"
os.system(py_command)
elif mode == 'finetune':
seed_number = 5
tune_ls = {'ring': ['\'{\\\"agent_args.lr_v\\\":3e-3}\'',
'\'{\\\"agent_args.lr\\\":2e-3,\\\"agent_args.lr_v\\\":2e-3}\'',
'\'{\\\"agent_args.lr\\\":5e-3,\\\"agent_args.lr_v\\\":6e-3}\''],
'eight': ['\'{\\\"agent_args.lr_v\\\":1e-2,\\\"agent_args.lr_p\\\":1e-2}\'',
'\'{\\\"agent_args.lr_v\\\":1e-2,\\\"agent_args.lr_p\\\":1e-3}\'',
'\'{\\\"agent_args.lr_v\\\":1e-3,\\\"agent_args.lr_p\\\":1e-3}\''],
'catchup': ['{}',
'\'{\\\"agent_args.lr\\\":5e-4,\\\"agent_args.lr_v\\\":5e-4}\'',
'\'{\\\"agent_args.lr\\\":5e-3,\\\"agent_args.lr_v\\\":5e-3}\'',
'\'{\\\"agent_args.lr\\\":5e-5,\\\"agent_args.lr_v\\\":5e-4}\''
],
'slowdown': ['\'{\\\"agent_args.lr_v\\\":1e-2,\\\"agent_args.lr_p\\\":1e-2}\'',
'\'{\\\"agent_args.lr_v\\\":1e-2,\\\"agent_args.lr_p\\\":1e-3}\'',
'\'{\\\"agent_args.lr_v\\\":1e-3,\\\"agent_args.lr_p\\\":1e-3}\''],
}
for algo in algo_ls:
for env in env_ls:
# set a new tmux session to contain a algo-env group
command = f'tmux new-session -d -t {algo}_{env}{name}'
os.system(command)
os.system('tmux send-keys clear')
os.system('tmux send-keys KPEnter')
for i in range(seed_number):
for j in range(len(tune_ls[env])):
# set a new window to contain a specific experiment
os.system(f'tmux new-window -d -n actor_para{j}_{i}')
# prepare the python running command
py_command = f"tmux send-keys -t actor_para{j}_{i} \"python launcher.py --env {env} --algo {algo} --name {name+str(j)} --para {tune_ls[env][j]}\" ENTER"
os.system(py_command)
# os.system(f"tmux attach -t {algo}_{env}{name}")
'''
## CPPO
tune_ls = {'ring': ['{\"agent_args.lr_v\":2e-3}',
'{\"agent_args.lr\":1e-3}',
'{\"agent_args.lr\":2e-3, \"agent_args.lr_v\":2e-3}'],
'eight': ['{}'],
'catchup': ['{}'],
'slowdown': ['{}',
'{\"agent_args.lr\":1e-4,"agent_args.lr_v":1e-3}'],
}
tune_ls = {'ring': ['\'{\\\"agent_args.lr_v\\\":3e-3}\'',
'\'{\\\"agent_args.lr\\\":1e-3',
'\'{\\\"agent_args.lr\\\":5e-3,\\\"agent_args.lr_v\\\":6e-3}\''],
'eight': ['{}'],
'catchup': ['{}'],
'slowdown': ['{}',
'\'{\\\"agent_args.lr\\\":1e-4,\\\"agent_args.lr_v\\\":1e-3}\''],
}
# for eight
## IA2C
tune_ls = {'ring': ['\'{\\\"agent_args.lr_v\\\":3e-3}\'',
'\'{\\\"agent_args.lr\\\":2e-3,\\\"agent_args.lr_v\\\":2e-3}\'',
'\'{\\\"agent_args.lr\\\":5e-3,\\\"agent_args.lr_v\\\":6e-3}\''],
'eight': ['{}'],
'catchup': ['{}',
'\'{\\\"agent_args.lr\\\":5e-4,\\\"agent_args.lr_v\\\":5e-4}\'',
'\'{\\\"agent_args.lr\\\":5e-3,\\\"agent_args.lr_v\\\":5e-3}\'',
'\'{\\\"agent_args.lr\\\":5e-5,\\\"agent_args.lr_v\\\":5e-4}\''],
'slowdown': ['{}',
'\'{\\\"agent_args.lr\\\":1e-4,\\\"agent_args.lr_v\\\":1e-3}\''],
}
'''