moving to box5

xinpw8 · Mar 25, 2024 · 9d1b8aa · 9d1b8aa
1 parent 2ae06cd
commit 9d1b8aa
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 7 deletions.
diff --git a/config.yaml b/config.yaml
@@ -3,7 +3,7 @@ train:
   torch_deterministic: True
   device: cuda
   # total_timesteps: 800_000_000 # superceded by pokemon_red package
-  learning_rate: 0.00015 # 0.00015 # 0.0002 # 0.00025 # decreased lr again from 0.00015 (which works best so far!) to 0.0001; decreased lr from 0.00025 to 0.0002; now again, decreased to 0.00015
+  learning_rate: 0.000175 # 0.00015 # 0.0002 # 0.00025 # decreased lr again from 0.00015 (which works best so far!) to 0.0001; decreased lr from 0.00025 to 0.0002; now again, decreased to 0.00015
   # num_steps: 128 # 128
   anneal_lr: False # True # changed from True to False 
   gamma: 0.998 # 0.998 # 0.99 #BET testing 2/6/24
@@ -62,17 +62,20 @@ sweep:
   # 500m/72/2/12/98304 also crashed - a little bit to a lot slower with no apparent benefit
   # 500m/72/2/24/49152 no badge by 30 min
   # 500m/72/1/24/49152 lr 0.00015 annealing off. works well - but stuck after pokemon tower/celedon
+  # crash
+  # 96/1/32/65536 adjusted level reward 30 -> 50
+
 pokemon_red:
   package: pokemon_red
   train:
-    total_timesteps: 500_000_000
-    num_envs: 72 # 144/4/24/49152 crashed :/ cuda error; # 96 # reduced from 150 to 96; reduced again from 96 to 72  ||| 12 workers, 2 envs per worker. batch size 12: 2 observations each from 6 workers for a total of 12.  12 workers / 6 envs per worker = 2 workers     2  60   150 workers / 2 envs per worker = then you have 300 envs 75 workers       cpu with 1 core. run 2 envs on it. batch size=1  reset, and 2 obs waiting for you. only take 1 of them cuz batch size 1. compute action, give it back. env already has other obs for you, it gives you that obs, and while youre stepping that, it gives the action it computed for you back.
-    envs_per_worker: 1 # 6 # 1 # 1 (next, try 6) increased from 1 to 6 (wild, I know) (it was slowwww SPS so killed it)
+    total_timesteps: 750_000_000
+    num_envs: 96 # 144/4/24/49152 crashed :/ cuda error; # 96 # reduced from 150 to 96; reduced again from 96 to 72  ||| 12 workers, 2 envs per worker. batch size 12: 2 observations each from 6 workers for a total of 12.  12 workers / 6 envs per worker = 2 workers     2  60   150 workers / 2 envs per worker = then you have 300 envs 75 workers       cpu with 1 core. run 2 envs on it. batch size=1  reset, and 2 obs waiting for you. only take 1 of them cuz batch size 1. compute action, give it back. env already has other obs for you, it gives you that obs, and while youre stepping that, it gives the action it computed for you back.
+    envs_per_worker: 1 # just leave on 1
     # envpool_batch_size: 128
-    envs_per_batch: 24 # 12 # 24 # 32 # reduced again from 24 (which works best so far!) to 12; reduced from 32 to 24 to match num_envs: 72 # changed from 6 to 12 # 12 # 32 # 60  # env_per_batch / envs_per_worker ~ (# thread -2)  and set num_envs ~ 2.5 * env_per_batch
-    update_epochs: 3
+    envs_per_batch: 32 # 12 # 24 # 32 # reduced again from 24 (which works best so far!) to 12; reduced from 32 to 24 to match num_envs: 72 # changed from 6 to 12 # 12 # 32 # 60  # env_per_batch / envs_per_worker ~ (# thread -2)  and set num_envs ~ 2.5 * env_per_batch
+    update_epochs: 3 # 3
     gamma: 0.998
-    batch_size: 49152 # 49152 (better than 65k) # 65536 / 32 = 2048. 2048 * 24 = 49152. Let's try it... :) # 131072 # 65536 # 32768  # 131072 doesn't work as well, but still just about as fast as 65536. so far 65536 is the best b/c agents get cut quickly and cut reliably and then explore afterwards.
+    batch_size: 65536 # 49152 (better than 65k) # 65536 / 32 = 2048. 2048 * 24 = 49152. Let's try it... :) # 131072 # 65536 # 32768  # 131072 doesn't work as well, but still just about as fast as 65536. so far 65536 is the best b/c agents get cut quickly and cut reliably and then explore afterwards.
     batch_rows: 128
   env:
     name: pokemon_red

diff --git a/pokegym/pokegym/required_resources/running_experiment.txt b/pokegym/pokegym/required_resources/running_experiment.txt
@@ -0,0 +1 @@
+pokegym_test_pufferbox3_BET
diff --git a/pokegym/pokegym/required_resources/stats.txt b/pokegym/pokegym/required_resources/stats.txt
diff --git a/pokegym/pokegym/required_resources/test_exp.txt b/pokegym/pokegym/required_resources/test_exp.txt
@@ -0,0 +1 @@
+Namespace(seed=1, torch_deterministic=True, device='cuda', learning_rate=0.00025, anneal_lr=True, gamma=0.998, gae_lambda=0.95, num_minibatches=8, norm_adv=True, clip_coef=0.1, clip_vloss=True, ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, target_kl=None, env_pool=True, verbose=True, data_dir='experiments', checkpoint_interval=200, cpu_offload=True, pool_kernel=[0], bptt_horizon=32, vf_clip_coef=0.12, compile=True, compile_mode='reduce-overhead', total_timesteps=800000000, num_envs=300, envs_per_worker=4, envs_per_batch=120, update_epochs=3, batch_size=65536, batch_rows=128)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Namespace(seed=1, torch_deterministic=True, device='cuda', learning_rate=0.00025, anneal_lr=True, gamma=0.998, gae_lambda=0.95, num_minibatches=8, norm_adv=True, clip_coef=0.1, clip_vloss=True, ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, target_kl=None, env_pool=True, verbose=True, data_dir='experiments', checkpoint_interval=200, cpu_offload=True, pool_kernel=[0], bptt_horizon=32, vf_clip_coef=0.12, compile=True, compile_mode='reduce-overhead', total_timesteps=800000000, num_envs=300, envs_per_worker=4, envs_per_batch=120, update_epochs=3, batch_size=65536, batch_rows=128)