-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yaml
110 lines (101 loc) · 2.45 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
wandb:
project: meta-mmo
group: ~
debug:
train:
num_envs: 1
envs_per_batch: 1 # batching envs work?
envs_per_worker: 1
batch_size: 1024
total_timesteps: 10000
pool_kernel: [0, 1]
checkpoint_interval: 3
verbose: True
train:
seed: 21
torch_deterministic: True
device: cuda
total_timesteps: 100_000_000
learning_rate: 1.0e-4
anneal_lr: True
gamma: 0.99
gae_lambda: 0.95
update_epochs: 2
norm_adv: True
clip_coef: 0.1
clip_vloss: True
ent_coef: 0.01
vf_coef: 0.5
max_grad_norm: 0.5
target_kl: ~
num_envs: 15
envs_per_worker: 1
envs_per_batch: 6
env_pool: True
verbose: True
data_dir: runs
checkpoint_interval: 763 # every 25M steps
# 112 leaners + 16 previous policy
pool_kernel: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
batch_size: 32768
batch_rows: 128
bptt_horizon: 8
vf_clip_coef: 0.1
compile: False
compile_mode: reduce-overhead
sweep:
method: random
name: sweep
metric:
goal: maximize
name: episodic_return
# Nested parameters name required by WandB API
parameters:
train:
parameters:
learning_rate: {
'distribution': 'log_uniform_values',
'min': 1e-4,
'max': 1e-1,
}
batch_size: {
'values': [128, 256, 512, 1024, 2048],
}
batch_rows: {
'values': [16, 32, 64, 128, 256],
}
bptt_horizon: {
'values': [4, 8, 16, 32],
}
env:
num_agents: 128
num_agents_per_team: 8
num_npcs: 256
max_episode_length: 1024
maps_path: 'maps/train/'
map_size: 128
num_maps: 256
map_force_generation: False
death_fog_tick: 256
spawn_immunity: 20
resilient_population: 0
policy:
layer_width: 256
recurrent:
layer_width: 256
num_layers: 1
reward_wrapper:
eval_mode: False
early_stop_agent_num: 0
use_custom_reward: True
augment_obs: True
baseline:
reward_wrapper:
game_lost_penalty: -1.0
game_won_reward: ~
nontask_bonus_scale: 0.1
hp_bonus_weight: 0.03
exp_bonus_weight: 0.002
defense_bonus_weight: 0.04
attack_bonus_weight: 0.0
gold_bonus_weight: 0.001