reduce minibatches, fix reference to config

thatguy11325 · Jun 21, 2024 · 948598c · 948598c
1 parent e28afbd
commit 948598c
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 8 deletions.
diff --git a/config.yaml b/config.yaml
@@ -74,7 +74,7 @@ train:
   float32_matmul_precision: "high"
   total_timesteps: 100_000_000_000
   batch_size: 65536 
-  minibatch_size: 32768
+  minibatch_size: 2048
   learning_rate: 2.0e-4
   anneal_lr: False
   gamma: 0.998

diff --git a/pokemonred_puffer/cleanrl_puffer.py b/pokemonred_puffer/cleanrl_puffer.py
@@ -12,6 +12,7 @@
 import pufferlib
 import pufferlib.emulation
 import pufferlib.frameworks.cleanrl
+import pufferlib.pytorch
 import pufferlib.utils
 import pufferlib.vector
 
@@ -178,12 +179,6 @@ def __post_init__(self):
 
         if self.config.compile:
             self.policy = torch.compile(self.policy, mode=self.config.compile_mode)
-            self.policy.get_value = torch.compile(
-                self.policy.get_value, mode=self.config.compile_mode
-            )
-            self.policy.get_action_and_value = torch.compile(
-                self.policy.get_action_and_value, mode=self.config.compile_mode
-            )
 
         self.optimizer = torch.optim.Adam(
             self.policy.parameters(), lr=self.config.learning_rate, eps=1e-5
@@ -418,7 +413,9 @@ def train(self):
                 with self.profile.learn:
                     self.optimizer.zero_grad()
                     loss.backward()
-                    torch.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm)
+                    torch.nn.utils.clip_grad_norm_(
+                        self.policy.parameters(), self.config.max_grad_norm
+                    )
                     self.optimizer.step()
                     if self.config.device == "cuda":
                         torch.cuda.synchronize()