From d5724ee60059c80db97fb490a21010ccf4654409 Mon Sep 17 00:00:00 2001 From: thatguy11325 <148832074+thatguy11325@users.noreply.github.com> Date: Thu, 18 Jul 2024 00:21:02 -0400 Subject: [PATCH] Exploration coords for each tileset --- config.yaml | 26 +++++++++ pokemonred_puffer/environment.py | 18 ++++-- pokemonred_puffer/rewards/baseline.py | 82 +++++++++++++++++++++++++-- 3 files changed, 117 insertions(+), 9 deletions(-) diff --git a/config.yaml b/config.yaml index 78b3784..ab5b224 100644 --- a/config.yaml +++ b/config.yaml @@ -259,6 +259,32 @@ rewards: required_item: 5.0 useful_item: 1.0 pokecenter_heal: 1.0 + + baseline.ObjectRewardRequiredEventsEnvTilesetExploration: + reward: + event: 1.0 + seen_pokemon: 4.0 + caught_pokemon: 4.0 + moves_obtained: 4.0 + hm_count: 10.0 + level: 1.0 + badges: 5.0 + cut_coords: 0.0 + cut_tiles: 0.0 + start_menu: 0.0 + pokemon_menu: 0.0 + stats_menu: 0.0 + bag_menu: 0.0 + explore_hidden_objs: 0.02 + seen_action_bag_menu: 0.0 + required_event: 5.0 + required_item: 5.0 + useful_item: 1.0 + pokecenter_heal: 1.0 + exploration: 0.02 + exploration_gym: 0.025 + exploration_facility: 0.025 + exploration_plateau: 0.025 diff --git a/pokemonred_puffer/environment.py b/pokemonred_puffer/environment.py index 7f4b955..1a72044 100644 --- a/pokemonred_puffer/environment.py +++ b/pokemonred_puffer/environment.py @@ -365,7 +365,7 @@ def reset(self, seed: Optional[int] = None, options: Optional[dict[str, Any]] = def init_mem(self): # Maybe I should preallocate a giant matrix for all map ids # All map ids have the same size, right? - self.seen_coords = {} + self.seen_coords: dict[int, dict[tuple[int, int, int], int]] = {} self.explore_map = np.zeros(GLOBAL_MAP_SHAPE, dtype=np.float32) self.cut_explore_map = np.zeros(GLOBAL_MAP_SHAPE, dtype=np.float32) self.seen_map_ids = np.zeros(256) @@ -1182,7 +1182,7 @@ def agent_stats(self, action): "levels_sum": sum(levels), "ptypes": self.read_party(), "hp": self.read_hp_fraction(), - "coord": sum(self.seen_coords.values()), # np.sum(self.seen_global_coords), + "coord": sum(sum(tileset.values()) for tileset in self.seen_coords.values()), "map_id": np.sum(self.seen_map_ids), "npc": sum(self.seen_npcs.values()), "hidden_obj": sum(self.seen_hidden_objs.values()), @@ -1212,6 +1212,12 @@ def agent_stats(self, action): "pokecenter": np.sum(self.pokecenters), "pokecenter_heal": self.pokecenter_heal, } + | { + "exploration": { + tileset.name.lower(): sum(self.seen_coords.get(tileset.value, {}).values()) + for tileset in Tilesets + } + } | {f"badge_{i+1}": bool(badges & (1 << i)) for i in range(8)}, "events": {event: self.events.get_event(event) for event in REQUIRED_EVENTS} | { @@ -1267,10 +1273,14 @@ def get_game_coords(self): return (self.read_m(0xD362), self.read_m(0xD361), self.read_m(0xD35E)) def update_seen_coords(self): - inc = 0.25 if (self.read_m("wd736") & 0b1000_0000) else 1 + inc = 0.0 if (self.read_m("wd736") & 0b1000_0000) else 1 x_pos, y_pos, map_n = self.get_game_coords() - self.seen_coords[(x_pos, y_pos, map_n)] = inc + # self.seen_coords[(x_pos, y_pos, map_n)] = inc + cur_map_tileset = self.read_m("wCurMapTileset") + if cur_map_tileset not in self.seen_coords: + self.seen_coords[cur_map_tileset] = {} + self.seen_coords[cur_map_tileset][(x_pos, y_pos, map_n)] = inc # TODO: Turn into a wrapper? self.explore_map[local_to_global(y_pos, x_pos, map_n)] = inc # self.seen_global_coords[local_to_global(y_pos, x_pos, map_n)] = 1 diff --git a/pokemonred_puffer/rewards/baseline.py b/pokemonred_puffer/rewards/baseline.py index 223f5dc..65f7fcf 100644 --- a/pokemonred_puffer/rewards/baseline.py +++ b/pokemonred_puffer/rewards/baseline.py @@ -3,6 +3,7 @@ from pokemonred_puffer.data.events import REQUIRED_EVENTS from pokemonred_puffer.data.items import REQUIRED_ITEMS, USEFUL_ITEMS +from pokemonred_puffer.data.tilesets import Tilesets from pokemonred_puffer.environment import ( EVENT_FLAGS_START, EVENTS_FLAGS_LENGTH, @@ -35,7 +36,7 @@ def get_game_state_reward(self): # "death_reward": self.died_count, "badge": self.get_badges() * 5, # "heal": self.total_healing_rew, - "explore": sum(self.seen_coords.values()) * 0.012, + "explore": sum(sum(tileset.values()) for tileset in self.seen_coords.values()) * 0.012, # "explore_maps": np.sum(self.seen_map_ids) * 0.0001, "taught_cut": 4 * int(self.check_if_party_has_hm(0xF)), "cut_coords": sum(self.cut_coords.values()) * 1.0, @@ -111,7 +112,8 @@ def get_game_state_reward(self): "hm_count": self.reward_config["hm_count"] * self.get_hm_count(), "level": self.reward_config["level"] * self.get_levels_reward(), "badges": self.reward_config["badges"] * self.get_badges(), - "exploration": self.reward_config["exploration"] * sum(self.seen_coords.values()), + "exploration": self.reward_config["exploration"] + * sum(sum(tileset.values()) for tileset in self.seen_coords.values()), "cut_coords": self.reward_config["cut_coords"] * sum(self.cut_coords.values()), "cut_tiles": self.reward_config["cut_tiles"] * sum(self.cut_tiles.values()), "start_menu": self.reward_config["start_menu"] * self.seen_start_menu, @@ -147,7 +149,8 @@ def get_game_state_reward(self): "moves_obtained": self.reward_config["moves_obtained"] * sum(self.moves_obtained), "hm_count": self.reward_config["hm_count"] * self.get_hm_count(), "badges": self.reward_config["badges"] * self.get_badges(), - "exploration": self.reward_config["exploration"] * sum(self.seen_coords.values()), + "exploration": self.reward_config["exploration"] + * sum(sum(tileset.values()) for tileset in self.seen_coords.values()), "explore_npcs": self.reward_config["explore_npcs"] * sum(self.seen_npcs.values()), "explore_hidden_objs": ( self.reward_config["explore_hidden_objs"] * sum(self.seen_hidden_objs.values()) @@ -202,7 +205,8 @@ def get_game_state_reward(self): "hm_count": self.reward_config["hm_count"] * self.get_hm_count(), "level": self.reward_config["level"] * self.get_levels_reward(), "badges": self.reward_config["badges"] * self.get_badges(), - "exploration": self.reward_config["exploration"] * sum(self.seen_coords.values()), + "exploration": self.reward_config["exploration"] + * sum(sum(tileset.values()) for tileset in self.seen_coords.values()), "cut_coords": self.reward_config["cut_coords"] * sum(self.cut_coords.values()), "cut_tiles": self.reward_config["cut_tiles"] * sum(self.cut_tiles.values()), "start_menu": self.reward_config["start_menu"] * self.seen_start_menu, @@ -246,7 +250,8 @@ def get_game_state_reward(self): "hm_count": self.reward_config["hm_count"] * self.get_hm_count(), "level": self.reward_config["level"] * self.get_levels_reward(), "badges": self.reward_config["badges"] * self.get_badges(), - "exploration": self.reward_config["exploration"] * sum(self.seen_coords.values()), + "exploration": self.reward_config["exploration"] + * sum(sum(tileset.values()) for tileset in self.seen_coords.values()), "cut_coords": self.reward_config["cut_coords"] * sum(self.cut_coords.values()), "cut_tiles": self.reward_config["cut_tiles"] * sum(self.cut_tiles.values()), "start_menu": self.reward_config["start_menu"] * self.seen_start_menu, @@ -287,3 +292,70 @@ def get_levels_reward(self): return self.max_level_sum else: return 15 + (self.max_level_sum - 15) / 4 + + +class ObjectRewardRequiredEventsEnvTilesetExploration(BaselineRewardEnv): + def get_game_state_reward(self): + _, wBagItems = self.pyboy.symbol_lookup("wBagItems") + bag = np.array(self.pyboy.memory[wBagItems : wBagItems + 40], dtype=np.uint8) + numBagItems = self.read_m("wNumBagItems") + # item ids start at 1 so using 0 as the nothing value is okay + bag[2 * numBagItems :] = 0 + bag_item_ids = bag[::2] + + return ( + { + "event": self.reward_config["event"] * self.update_max_event_rew(), + "seen_pokemon": self.reward_config["seen_pokemon"] * sum(self.seen_pokemon), + "caught_pokemon": self.reward_config["caught_pokemon"] * sum(self.caught_pokemon), + "moves_obtained": self.reward_config["moves_obtained"] * sum(self.moves_obtained), + "hm_count": self.reward_config["hm_count"] * self.get_hm_count(), + "level": self.reward_config["level"] * self.get_levels_reward(), + "badges": self.reward_config["badges"] * self.get_badges(), + "cut_coords": self.reward_config["cut_coords"] * sum(self.cut_coords.values()), + "cut_tiles": self.reward_config["cut_tiles"] * sum(self.cut_tiles.values()), + "start_menu": self.reward_config["start_menu"] * self.seen_start_menu, + "pokemon_menu": self.reward_config["pokemon_menu"] * self.seen_pokemon_menu, + "stats_menu": self.reward_config["stats_menu"] * self.seen_stats_menu, + "bag_menu": self.reward_config["bag_menu"] * self.seen_bag_menu, + "explore_hidden_objs": sum(self.seen_hidden_objs.values()) + * self.reward_config["explore_hidden_objs"], + "seen_action_bag_menu": self.seen_action_bag_menu + * self.reward_config["seen_action_bag_menu"], + "pokecenter_heal": self.pokecenter_heal * self.reward_config["pokecenter_heal"], + "rival3": self.reward_config["required_event"] + * int(self.read_m("wSSAnne2FCurScript") == 4), + "game_corner_rocket": self.reward_config["required_event"] + * float(self.missables.get_missable("HS_GAME_CORNER_ROCKET")), + "saffron_guard": self.reward_config["required_event"] + * float(self.wd728.get_bit("GAVE_SAFFRON_GUARD_DRINK")), + } + | { + f"exploration_{tileset.name.lower()}": self.reward_config.get( + tileset.name.lower(), self.reward_config["exploration"] + ) + * sum(self.seen_coords.get(tileset.value, {}).values()) + for tileset in Tilesets + } + | { + event: self.reward_config["required_event"] * float(self.events.get_event(event)) + for event in REQUIRED_EVENTS + } + | { + item.name: self.reward_config["required_item"] * float(item.value in bag_item_ids) + for item in REQUIRED_ITEMS + } + | { + item.name: self.reward_config["useful_item"] * float(item.value in bag_item_ids) + for item in USEFUL_ITEMS + } + ) + + def get_levels_reward(self): + party_size = self.read_m("wPartyCount") + party_levels = [self.read_m(f"wPartyMon{i+1}Level") for i in range(party_size)] + self.max_level_sum = max(self.max_level_sum, sum(party_levels)) + if self.max_level_sum < 15: + return self.max_level_sum + else: + return 15 + (self.max_level_sum - 15) / 4