diff --git a/config.yaml b/config.yaml index c02e688..79a56a9 100644 --- a/config.yaml +++ b/config.yaml @@ -5,18 +5,18 @@ wandb: debug: env: - headless: True + headless: False stream_wrapper: False init_state: cut - max_steps: 4 + max_steps: 1_000_000 train: device: cpu compile: False compile_mode: default - num_envs: 10 + num_envs: 4 envs_per_worker: 1 - envs_per_batch: 1 - batch_size: 8 + envs_per_batch: 4 + batch_size: 16 batch_rows: 4 bptt_horizon: 2 total_timesteps: 100_000_000 @@ -28,8 +28,8 @@ debug: env_pool: False log_frequency: 5000 load_optimizer_state: False - swarm_frequency: 5 - swarm_keep_pct: .8 + swarm_frequency: 10 + swarm_keep_pct: .1 env: headless: True diff --git a/pokemonred_puffer/environment.py b/pokemonred_puffer/environment.py index bca7567..4ed86db 100644 --- a/pokemonred_puffer/environment.py +++ b/pokemonred_puffer/environment.py @@ -291,9 +291,9 @@ def reset(self, seed: Optional[int] = None, options: Optional[dict[str, Any]] = self.moves_obtained = np.zeros(0xA5, dtype=np.uint8) self.pokecenters = np.zeros(252, dtype=np.uint8) # lazy random seed setting - if not seed: - seed = random.randint(0, 4096) - self.pyboy.tick(seed, render=False) + # if not seed: + # seed = random.randint(0, 4096) + # self.pyboy.tick(seed, render=False) else: self.reset_count += 1 diff --git a/pokemonred_puffer/rewards/baseline.py b/pokemonred_puffer/rewards/baseline.py index 62394c4..c6da5d2 100644 --- a/pokemonred_puffer/rewards/baseline.py +++ b/pokemonred_puffer/rewards/baseline.py @@ -13,6 +13,7 @@ class BaselineRewardEnv(RedGymEnv): def __init__(self, env_config: pufferlib.namespace, reward_config: pufferlib.namespace): super().__init__(env_config) + self.reward_config = reward_config # TODO: make the reward weights configurable def get_game_state_reward(self): @@ -83,10 +84,6 @@ def get_levels_reward(self): class TeachCutReplicationEnv(BaselineRewardEnv): - def __init__(self, env_config: pufferlib.namespace, reward_config: pufferlib.namespace): - super().__init__(env_config) - self.reward_config = reward_config - def get_game_state_reward(self): return { "event": self.reward_config["event"] * self.update_max_event_rew(), @@ -117,10 +114,6 @@ def get_game_state_reward(self): class TeachCutReplicationEnvFork(BaselineRewardEnv): - def __init__(self, env_config: pufferlib.namespace, reward_config: pufferlib.namespace): - super().__init__(env_config) - self.reward_config = reward_config - def get_game_state_reward(self): return { "event": self.reward_config["event"] * self.update_max_event_rew(), @@ -172,7 +165,7 @@ def get_levels_reward(self): return 15 + (self.max_level_sum - 15) / 4 -class RockTunnelReplicationEnv(TeachCutReplicationEnv): +class RockTunnelReplicationEnv(BaselineRewardEnv): def get_game_state_reward(self): return { "level": self.reward_config["level"] * self.get_levels_reward(),