-
Notifications
You must be signed in to change notification settings - Fork 5
/
config.py
25 lines (22 loc) · 802 Bytes
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# configs
# common config
env_id = 'BreakoutNoFrameskip-v4'
number_timesteps = int(1e7) # total number of time steps to train on
explore_frac = 0.1
# epsilon-greedy schedule, final exploit prob is 0.99
epsilon = lambda i: 1 - 0.99 * min(1, i / (number_timesteps * explore_frac))
lr = 1e-4 # learning rate
buffer_size = 100000 # replay buffer size
target_q_update_freq = 1000 # how frequency target q net update
train_freq = 4
clipnorm = 10
reward_gamma = 0.99 # reward discount
batch_size = 32 # batch size for sampling from replay buffer
warm_start = 10000 # sample times before learning
# config for per
prioritized_alpha = 0.6 # alpha in PER
prioritized_beta0 = 0.4 # initial beta in PER
# config for c51
atom_num = 51
min_value = -1.0 # allow some approximation error
max_value = 19.0