Skip to content

Commit

Permalink
ent coef adj now configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
thatguy11325 committed Mar 14, 2024
1 parent 9551efa commit 0f5efa8
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 16 deletions.
5 changes: 4 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ env:
state_dir: pyboy_states
init_state: Bulbasaur
action_freq: 24
max_steps: 1_000_000
max_steps: 100_000_000
save_video: False
fast_video: False
frame_stacks: 1
Expand Down Expand Up @@ -80,6 +80,9 @@ train:
cpu_offload: True
pool_kernel: [0]

events_maxlen: 800
ent_coef_adj: 0.01015

wrappers:
baseline:
- stream_wrapper.StreamWrapper:
Expand Down
25 changes: 12 additions & 13 deletions pokemonred_puffer/cleanrl_puffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,14 +311,13 @@ def __init__(
self.losses = Losses()
self.performance = Performance()

self.reward_buffer = deque(maxlen=1_000)
self.exploration_map_agg = np.zeros((config.num_envs, *GLOBAL_MAP_SHAPE), dtype=np.float32)
self.taught_cut = False

self.infos = {}
self.log = False
self.ent_coef = self.config.ent_coef
self.events_avg = deque(maxlen=500)
self.events_avg = deque(maxlen=self.config.events_maxlen)

@pufferlib.utils.profile
def evaluate(self):
Expand Down Expand Up @@ -435,12 +434,22 @@ def evaluate(self):
with env_profiler:
self.pool.send(actions)

eval_profiler.stop()

self.total_agent_steps += padded_steps_collected
new_step = np.mean(self.infos["learner"]["stats/step"])
if new_step > self.global_step:
self.global_step = new_step
self.log = True
self.reward = torch.mean(self.rewards).float().item()
self.SPS = int(padded_steps_collected / eval_profiler.elapsed)
self.events_avg.append(np.mean(self.infos["learner"]["stats/event"]))

if (
len(self.events_avg) == self.events_avg.maxlen
and abs(self.events_avg[-1] - self.events_avg[0]) < 3
):
self.ent_coef = self.config.ent_coef * 1.25
self.ent_coef = self.config.ent_coef_adj
else:
self.ent_coef = self.config.ent_coef
if self.log and self.wandb is not None:
Expand All @@ -451,16 +460,6 @@ def evaluate(self):
},
)

eval_profiler.stop()

self.total_agent_steps += padded_steps_collected
new_step = np.mean(self.infos["learner"]["stats/step"])
if new_step > self.global_step:
self.global_step = new_step
self.log = True
self.reward = torch.mean(self.rewards).float().item()
self.SPS = int(padded_steps_collected / eval_profiler.elapsed)

perf = self.performance
perf.total_uptime = int(time.time() - self.start_time)
perf.total_agent_steps = self.total_agent_steps
Expand Down
2 changes: 1 addition & 1 deletion pokemonred_puffer/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@ def update_tm_hm_moves_obtained(self):
if self.pyboy.get_memory_value(i) != 0:
for j in range(4):
move_id = self.pyboy.get_memory_value(i + j + 8)
if move_id != 0: # and move_id in TM_HM_MOVES:
if move_id != 0 and move_id in TM_HM_MOVES:
self.moves_obtained[move_id] = 1
"""
# Scan current box (since the box doesn't auto increment in pokemon red)
Expand Down
2 changes: 1 addition & 1 deletion pokemonred_puffer/rewards/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_game_state_reward(self):
"explore_npcs": sum(self.seen_npcs.values()) * 0.02,
# "seen_pokemon": sum(self.seen_pokemon) * 0.0000010,
# "caught_pokemon": sum(self.caught_pokemon) * 0.0000010,
"moves_obtained": sum(self.moves_obtained) * 0.00010,
"moves_obtained": sum(self.moves_obtained) * 0.0010,
"explore_hidden_objs": sum(self.seen_hidden_objs.values()) * 0.02,
# "level": self.get_levels_reward(),
# "opponent_level": self.max_opponent_level,
Expand Down

0 comments on commit 0f5efa8

Please sign in to comment.