Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
xinpw8 committed Apr 3, 2024
1 parent 51375cd commit aea1fab
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 31 deletions.
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,10 @@ dmypy.json
.vscode/

# ROMs
*.gb
*.gb

# Experiments
/experiments

# wandb
/wandb
43 changes: 15 additions & 28 deletions config.yaml
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,35 +1,29 @@
# goes in PufferLib top directory
train:
seed: 1
torch_deterministic: True
device: cuda
# total_timesteps: 800_000_000 # superceded by pokemon_red package
learning_rate: 0.000175 # 0.00015 # 0.0002 # 0.00025 # decreased lr again from 0.00015 (which works best so far!) to 0.0001; decreased lr from 0.00025 to 0.0002; now again, decreased to 0.00015
# num_steps: 128 # 128
anneal_lr: False # True # changed from True to False
gamma: 0.998 # 0.998 # 0.99 #BET testing 2/6/24
learning_rate: 0.0002 # 0.0002 # 0.000175 # 0.00015
anneal_lr: False
gamma: 0.998
gae_lambda: 0.95
num_minibatches: 4
num_minibatches: 1 # 4 # 16 # 32 didn't work well? kind of slow? # 16 worked well # 4 default
norm_adv: True
clip_coef: 0.1 # 0.1
clip_coef: 0.1
clip_vloss: True
ent_coef: 0.01
vf_coef: 0.5
max_grad_norm: 0.5
target_kl: ~

# num_envs: 512 # 128 # 48 # superceded by pokemon_red package
# envs_per_worker: 8 # 4 # superceded by pokemon_red package
# envs_per_batch: 48 # must be <= num_envs # superceded by pokemon_red package
env_pool: True
verbose: True
data_dir: experiments
checkpoint_interval: 200
cpu_offload: True
pool_kernel: [0]
# batch_size: 65280 # 32768 # 128 (?) # superceded by pokemon_red package
# batch_rows: 1024 # 256 # 128 # superceded by pokemon_red package
bptt_horizon: 16 # 32 # 16
vf_clip_coef: 0.1 # 0.12 # 0.1
bptt_horizon: 16
vf_clip_coef: 0.1
compile: True
compile_mode: reduce-overhead

Expand Down Expand Up @@ -57,26 +51,19 @@ sweep:
bptt_horizon: {
'values': [4, 8, 16, 32],
}
# experiment log:
# 500m/72/4/12/98304 (num_envs, total_timesteps unchanged; envs_per_worker 1->2, envs_per_batch 24->12, batch_size 49152->98304) a little slower than best run. badge1: 46 min badge2: 114 min crashed
# 500m/72/2/12/98304 also crashed - a little bit to a lot slower with no apparent benefit
# 500m/72/2/24/49152 no badge by 30 min
# 500m/72/1/24/49152 lr 0.00015 annealing off. works well - but stuck after pokemon tower/celedon
# crash
# 96/1/32/65536 adjusted level reward 30 -> 50

# fastest badge 1 ever: (lr: 2.0e-4, anneal=True, num_envs=150, envs_per_worker=2, envs_per_batch=60, update_epochs=3, batch_size=32768, batch_rows=64)
pokemon_red:
package: pokemon_red
train:
total_timesteps: 750_000_000
num_envs: 96 # 144/4/24/49152 crashed :/ cuda error; # 96 # reduced from 150 to 96; reduced again from 96 to 72 ||| 12 workers, 2 envs per worker. batch size 12: 2 observations each from 6 workers for a total of 12. 12 workers / 6 envs per worker = 2 workers 2 60 150 workers / 2 envs per worker = then you have 300 envs 75 workers cpu with 1 core. run 2 envs on it. batch size=1 reset, and 2 obs waiting for you. only take 1 of them cuz batch size 1. compute action, give it back. env already has other obs for you, it gives you that obs, and while youre stepping that, it gives the action it computed for you back.
envs_per_worker: 1 # just leave on 1
# envpool_batch_size: 128
envs_per_batch: 32 # 12 # 24 # 32 # reduced again from 24 (which works best so far!) to 12; reduced from 32 to 24 to match num_envs: 72 # changed from 6 to 12 # 12 # 32 # 60 # env_per_batch / envs_per_worker ~ (# thread -2) and set num_envs ~ 2.5 * env_per_batch
total_timesteps: 1_000_000_000
num_envs: 72 # 96 # 144 # 72
envs_per_worker: 1 # 1
envs_per_batch: 18 # 48 is crap w/ 96 env # 24
update_epochs: 3 # 3
gamma: 0.998
batch_size: 65536 # 49152 (better than 65k) # 65536 / 32 = 2048. 2048 * 24 = 49152. Let's try it... :) # 131072 # 65536 # 32768 # 131072 doesn't work as well, but still just about as fast as 65536. so far 65536 is the best b/c agents get cut quickly and cut reliably and then explore afterwards.
batch_rows: 128
batch_size: 49152 # 65536 # 49152
batch_rows: 128 # 128
env:
name: pokemon_red
pokemon-red:
Expand Down
1 change: 0 additions & 1 deletion pokegym/pokegym/required_resources/running_experiment.txt

This file was deleted.

Empty file.
1 change: 0 additions & 1 deletion pokegym/pokegym/required_resources/test_exp.txt

This file was deleted.

0 comments on commit aea1fab

Please sign in to comment.