diff --git a/pokemonred_puffer/environment.py b/pokemonred_puffer/environment.py index a1651cd..e99428c 100644 --- a/pokemonred_puffer/environment.py +++ b/pokemonred_puffer/environment.py @@ -13,7 +13,6 @@ from gymnasium import Env, spaces from pyboy import PyBoy from pyboy.utils import WindowEvent -# from skimage.transform import resize import pufferlib from pokemonred_puffer.data.events import ( @@ -1259,6 +1258,7 @@ def agent_stats(self, action): "pokecenter": np.sum(self.pokecenters), "pokecenter_heal": self.pokecenter_heal, "in_battle": self.read_m("wIsInBattle") > 0, + "event": self.progress_reward["event"], "max_steps": self.max_steps * (len(self.required_events) + len(self.required_items)) * self.max_steps_scaling, diff --git a/pokemonred_puffer/policies/multi_convolutional.py b/pokemonred_puffer/policies/multi_convolutional.py index 31474f3..a3d603c 100644 --- a/pokemonred_puffer/policies/multi_convolutional.py +++ b/pokemonred_puffer/policies/multi_convolutional.py @@ -205,18 +205,6 @@ def encode_observations(self, observations): ) party_latent = self.party_network(party_obs) - # enemy mon - species = self.species_embeddings(observations["species"].int()).float().squeeze(1) - status = one_hot(observations["status"].int(), 7).float().squeeze(1) - type1 = self.type_embeddings(observations["type1"].int()).squeeze(1) - type2 = self.type_embeddings(observations["type2"].int()).squeeze(1) - moves = ( - self.moves_embeddings(observations["moves"].int()) - .squeeze(1) - .float() - .reshape((-1, 6, 4 * self.moves_embeddings.embedding_dim)) - ) - # event_obs = ( # observations["events"].float() @ self.event_embeddings.weight # ) / self.event_embeddings.weight.shape[0] diff --git a/pokemonred_puffer/rewards/baseline.py b/pokemonred_puffer/rewards/baseline.py index e119d60..24ffccc 100644 --- a/pokemonred_puffer/rewards/baseline.py +++ b/pokemonred_puffer/rewards/baseline.py @@ -303,6 +303,7 @@ def get_game_state_reward(self): return ( { + "event": self.reward_config["event"] * self.update_max_event_rew(), "seen_pokemon": self.reward_config["seen_pokemon"] * sum(self.seen_pokemon), "caught_pokemon": self.reward_config["caught_pokemon"] * sum(self.caught_pokemon), "moves_obtained": self.reward_config["moves_obtained"] * sum(self.moves_obtained),