Skip to content

Commit

Permalink
Exploration coords for each tileset
Browse files Browse the repository at this point in the history
  • Loading branch information
thatguy11325 committed Jul 18, 2024
1 parent 3296194 commit d5724ee
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 9 deletions.
26 changes: 26 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,32 @@ rewards:
required_item: 5.0
useful_item: 1.0
pokecenter_heal: 1.0

baseline.ObjectRewardRequiredEventsEnvTilesetExploration:
reward:
event: 1.0
seen_pokemon: 4.0
caught_pokemon: 4.0
moves_obtained: 4.0
hm_count: 10.0
level: 1.0
badges: 5.0
cut_coords: 0.0
cut_tiles: 0.0
start_menu: 0.0
pokemon_menu: 0.0
stats_menu: 0.0
bag_menu: 0.0
explore_hidden_objs: 0.02
seen_action_bag_menu: 0.0
required_event: 5.0
required_item: 5.0
useful_item: 1.0
pokecenter_heal: 1.0
exploration: 0.02
exploration_gym: 0.025
exploration_facility: 0.025
exploration_plateau: 0.025



Expand Down
18 changes: 14 additions & 4 deletions pokemonred_puffer/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def reset(self, seed: Optional[int] = None, options: Optional[dict[str, Any]] =
def init_mem(self):
# Maybe I should preallocate a giant matrix for all map ids
# All map ids have the same size, right?
self.seen_coords = {}
self.seen_coords: dict[int, dict[tuple[int, int, int], int]] = {}
self.explore_map = np.zeros(GLOBAL_MAP_SHAPE, dtype=np.float32)
self.cut_explore_map = np.zeros(GLOBAL_MAP_SHAPE, dtype=np.float32)
self.seen_map_ids = np.zeros(256)
Expand Down Expand Up @@ -1182,7 +1182,7 @@ def agent_stats(self, action):
"levels_sum": sum(levels),
"ptypes": self.read_party(),
"hp": self.read_hp_fraction(),
"coord": sum(self.seen_coords.values()), # np.sum(self.seen_global_coords),
"coord": sum(sum(tileset.values()) for tileset in self.seen_coords.values()),
"map_id": np.sum(self.seen_map_ids),
"npc": sum(self.seen_npcs.values()),
"hidden_obj": sum(self.seen_hidden_objs.values()),
Expand Down Expand Up @@ -1212,6 +1212,12 @@ def agent_stats(self, action):
"pokecenter": np.sum(self.pokecenters),
"pokecenter_heal": self.pokecenter_heal,
}
| {
"exploration": {
tileset.name.lower(): sum(self.seen_coords.get(tileset.value, {}).values())
for tileset in Tilesets
}
}
| {f"badge_{i+1}": bool(badges & (1 << i)) for i in range(8)},
"events": {event: self.events.get_event(event) for event in REQUIRED_EVENTS}
| {
Expand Down Expand Up @@ -1267,10 +1273,14 @@ def get_game_coords(self):
return (self.read_m(0xD362), self.read_m(0xD361), self.read_m(0xD35E))

def update_seen_coords(self):
inc = 0.25 if (self.read_m("wd736") & 0b1000_0000) else 1
inc = 0.0 if (self.read_m("wd736") & 0b1000_0000) else 1

x_pos, y_pos, map_n = self.get_game_coords()
self.seen_coords[(x_pos, y_pos, map_n)] = inc
# self.seen_coords[(x_pos, y_pos, map_n)] = inc
cur_map_tileset = self.read_m("wCurMapTileset")
if cur_map_tileset not in self.seen_coords:
self.seen_coords[cur_map_tileset] = {}
self.seen_coords[cur_map_tileset][(x_pos, y_pos, map_n)] = inc
# TODO: Turn into a wrapper?
self.explore_map[local_to_global(y_pos, x_pos, map_n)] = inc
# self.seen_global_coords[local_to_global(y_pos, x_pos, map_n)] = 1
Expand Down
82 changes: 77 additions & 5 deletions pokemonred_puffer/rewards/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pokemonred_puffer.data.events import REQUIRED_EVENTS
from pokemonred_puffer.data.items import REQUIRED_ITEMS, USEFUL_ITEMS
from pokemonred_puffer.data.tilesets import Tilesets
from pokemonred_puffer.environment import (
EVENT_FLAGS_START,
EVENTS_FLAGS_LENGTH,
Expand Down Expand Up @@ -35,7 +36,7 @@ def get_game_state_reward(self):
# "death_reward": self.died_count,
"badge": self.get_badges() * 5,
# "heal": self.total_healing_rew,
"explore": sum(self.seen_coords.values()) * 0.012,
"explore": sum(sum(tileset.values()) for tileset in self.seen_coords.values()) * 0.012,
# "explore_maps": np.sum(self.seen_map_ids) * 0.0001,
"taught_cut": 4 * int(self.check_if_party_has_hm(0xF)),
"cut_coords": sum(self.cut_coords.values()) * 1.0,
Expand Down Expand Up @@ -111,7 +112,8 @@ def get_game_state_reward(self):
"hm_count": self.reward_config["hm_count"] * self.get_hm_count(),
"level": self.reward_config["level"] * self.get_levels_reward(),
"badges": self.reward_config["badges"] * self.get_badges(),
"exploration": self.reward_config["exploration"] * sum(self.seen_coords.values()),
"exploration": self.reward_config["exploration"]
* sum(sum(tileset.values()) for tileset in self.seen_coords.values()),
"cut_coords": self.reward_config["cut_coords"] * sum(self.cut_coords.values()),
"cut_tiles": self.reward_config["cut_tiles"] * sum(self.cut_tiles.values()),
"start_menu": self.reward_config["start_menu"] * self.seen_start_menu,
Expand Down Expand Up @@ -147,7 +149,8 @@ def get_game_state_reward(self):
"moves_obtained": self.reward_config["moves_obtained"] * sum(self.moves_obtained),
"hm_count": self.reward_config["hm_count"] * self.get_hm_count(),
"badges": self.reward_config["badges"] * self.get_badges(),
"exploration": self.reward_config["exploration"] * sum(self.seen_coords.values()),
"exploration": self.reward_config["exploration"]
* sum(sum(tileset.values()) for tileset in self.seen_coords.values()),
"explore_npcs": self.reward_config["explore_npcs"] * sum(self.seen_npcs.values()),
"explore_hidden_objs": (
self.reward_config["explore_hidden_objs"] * sum(self.seen_hidden_objs.values())
Expand Down Expand Up @@ -202,7 +205,8 @@ def get_game_state_reward(self):
"hm_count": self.reward_config["hm_count"] * self.get_hm_count(),
"level": self.reward_config["level"] * self.get_levels_reward(),
"badges": self.reward_config["badges"] * self.get_badges(),
"exploration": self.reward_config["exploration"] * sum(self.seen_coords.values()),
"exploration": self.reward_config["exploration"]
* sum(sum(tileset.values()) for tileset in self.seen_coords.values()),
"cut_coords": self.reward_config["cut_coords"] * sum(self.cut_coords.values()),
"cut_tiles": self.reward_config["cut_tiles"] * sum(self.cut_tiles.values()),
"start_menu": self.reward_config["start_menu"] * self.seen_start_menu,
Expand Down Expand Up @@ -246,7 +250,8 @@ def get_game_state_reward(self):
"hm_count": self.reward_config["hm_count"] * self.get_hm_count(),
"level": self.reward_config["level"] * self.get_levels_reward(),
"badges": self.reward_config["badges"] * self.get_badges(),
"exploration": self.reward_config["exploration"] * sum(self.seen_coords.values()),
"exploration": self.reward_config["exploration"]
* sum(sum(tileset.values()) for tileset in self.seen_coords.values()),
"cut_coords": self.reward_config["cut_coords"] * sum(self.cut_coords.values()),
"cut_tiles": self.reward_config["cut_tiles"] * sum(self.cut_tiles.values()),
"start_menu": self.reward_config["start_menu"] * self.seen_start_menu,
Expand Down Expand Up @@ -287,3 +292,70 @@ def get_levels_reward(self):
return self.max_level_sum
else:
return 15 + (self.max_level_sum - 15) / 4


class ObjectRewardRequiredEventsEnvTilesetExploration(BaselineRewardEnv):
def get_game_state_reward(self):
_, wBagItems = self.pyboy.symbol_lookup("wBagItems")
bag = np.array(self.pyboy.memory[wBagItems : wBagItems + 40], dtype=np.uint8)
numBagItems = self.read_m("wNumBagItems")
# item ids start at 1 so using 0 as the nothing value is okay
bag[2 * numBagItems :] = 0
bag_item_ids = bag[::2]

return (
{
"event": self.reward_config["event"] * self.update_max_event_rew(),
"seen_pokemon": self.reward_config["seen_pokemon"] * sum(self.seen_pokemon),
"caught_pokemon": self.reward_config["caught_pokemon"] * sum(self.caught_pokemon),
"moves_obtained": self.reward_config["moves_obtained"] * sum(self.moves_obtained),
"hm_count": self.reward_config["hm_count"] * self.get_hm_count(),
"level": self.reward_config["level"] * self.get_levels_reward(),
"badges": self.reward_config["badges"] * self.get_badges(),
"cut_coords": self.reward_config["cut_coords"] * sum(self.cut_coords.values()),
"cut_tiles": self.reward_config["cut_tiles"] * sum(self.cut_tiles.values()),
"start_menu": self.reward_config["start_menu"] * self.seen_start_menu,
"pokemon_menu": self.reward_config["pokemon_menu"] * self.seen_pokemon_menu,
"stats_menu": self.reward_config["stats_menu"] * self.seen_stats_menu,
"bag_menu": self.reward_config["bag_menu"] * self.seen_bag_menu,
"explore_hidden_objs": sum(self.seen_hidden_objs.values())
* self.reward_config["explore_hidden_objs"],
"seen_action_bag_menu": self.seen_action_bag_menu
* self.reward_config["seen_action_bag_menu"],
"pokecenter_heal": self.pokecenter_heal * self.reward_config["pokecenter_heal"],
"rival3": self.reward_config["required_event"]
* int(self.read_m("wSSAnne2FCurScript") == 4),
"game_corner_rocket": self.reward_config["required_event"]
* float(self.missables.get_missable("HS_GAME_CORNER_ROCKET")),
"saffron_guard": self.reward_config["required_event"]
* float(self.wd728.get_bit("GAVE_SAFFRON_GUARD_DRINK")),
}
| {
f"exploration_{tileset.name.lower()}": self.reward_config.get(
tileset.name.lower(), self.reward_config["exploration"]
)
* sum(self.seen_coords.get(tileset.value, {}).values())
for tileset in Tilesets
}
| {
event: self.reward_config["required_event"] * float(self.events.get_event(event))
for event in REQUIRED_EVENTS
}
| {
item.name: self.reward_config["required_item"] * float(item.value in bag_item_ids)
for item in REQUIRED_ITEMS
}
| {
item.name: self.reward_config["useful_item"] * float(item.value in bag_item_ids)
for item in USEFUL_ITEMS
}
)

def get_levels_reward(self):
party_size = self.read_m("wPartyCount")
party_levels = [self.read_m(f"wPartyMon{i+1}Level") for i in range(party_size)]
self.max_level_sum = max(self.max_level_sum, sum(party_levels))
if self.max_level_sum < 15:
return self.max_level_sum
else:
return 15 + (self.max_level_sum - 15) / 4

0 comments on commit d5724ee

Please sign in to comment.