From b25b3c7e23997d230ce718434522ff9528914db4 Mon Sep 17 00:00:00 2001 From: thatguy11325 <148832074+thatguy11325@users.noreply.github.com> Date: Fri, 22 Mar 2024 14:38:08 -0400 Subject: [PATCH] Return of the hidden obj npc reward --- config.yaml | 2 ++ pokemonred_puffer/environment.py | 11 ++--------- pokemonred_puffer/rewards/baseline.py | 3 +++ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/config.yaml b/config.yaml index 50237b4..14b7e89 100644 --- a/config.yaml +++ b/config.yaml @@ -148,6 +148,8 @@ rewards: stats_menu: 0.1 bag_menu: 0.1 taught_cut: 10.0 + explore_npcs: 0.02 + explore_hidden_objs: 0.02 policies: diff --git a/pokemonred_puffer/environment.py b/pokemonred_puffer/environment.py index a109d3d..a2e6048 100644 --- a/pokemonred_puffer/environment.py +++ b/pokemonred_puffer/environment.py @@ -255,6 +255,8 @@ def reset(self, seed: Optional[int] = None): self.explore_map = np.zeros(GLOBAL_MAP_SHAPE, dtype=np.float32) self.cut_explore_map = np.zeros(GLOBAL_MAP_SHAPE, dtype=np.float32) self.init_mem() + # We only init seen hidden objs once cause they can only be found once! + self.seen_hidden_objs = {} self.reset_count = 0 with open(self.init_state_path, "rb") as f: self.pyboy.load_state(f) @@ -266,7 +268,6 @@ def reset(self, seed: Optional[int] = None): self.read_m(i).bit_count() for i in range(EVENT_FLAGS_START, EVENT_FLAGS_START + EVENTS_FLAGS_LENGTH) ) - else: self.reset_count += 1 @@ -311,14 +312,10 @@ def init_mem(self): self.seen_coords = {} # self.seen_global_coords = np.zeros(GLOBAL_MAP_SHAPE) self.seen_map_ids = np.zeros(256) - self.seen_npcs = {} - self.seen_hidden_objs = {} - self.cut_coords = {} self.cut_tiles = {} - self.cut_state = deque(maxlen=3) self.seen_start_menu = 0 self.seen_pokemon_menu = 0 @@ -329,13 +326,9 @@ def init_mem(self): def reset_mem(self): self.seen_coords.update((k, 0) for k, _ in self.seen_coords.items()) self.seen_map_ids *= 0 - self.seen_npcs.update((k, 0) for k, _ in self.seen_npcs.items()) - self.seen_hidden_objs.update((k, 0) for k, _ in self.seen_hidden_objs.items()) - self.cut_coords.update((k, 0) for k, _ in self.cut_coords.items()) - self.cut_state = deque(maxlen=3) self.seen_start_menu = 0 self.seen_pokemon_menu = 0 diff --git a/pokemonred_puffer/rewards/baseline.py b/pokemonred_puffer/rewards/baseline.py index ef118e0..d3722f0 100644 --- a/pokemonred_puffer/rewards/baseline.py +++ b/pokemonred_puffer/rewards/baseline.py @@ -152,6 +152,9 @@ def get_game_state_reward(self): "hm_count": self.reward_config["hm_count"] * self.get_hm_count(), "badges": self.reward_config["badges"] * self.get_badges(), "exploration": self.reward_config["exploration"] * sum(self.seen_coords.values()), + "explore_npcs": self.reward_config["explore_npcs"] * sum(self.seen_npcs.values()), + "explore_hidden_objs": self.reward_config["explore_hidden_objs"] + * sum(self.seen_hidden_objs.values()), "cut_coords": self.reward_config["cut_coords"] * sum(self.cut_coords.values()), "cut_tiles": self.reward_config["cut_tiles"] * sum(self.cut_tiles), "start_menu": self.reward_config["start_menu"] * self.seen_start_menu,