Skip to content

Commit

Permalink
now pufferbox-compliant
Browse files Browse the repository at this point in the history
  • Loading branch information
xinpw8 committed Mar 2, 2024
1 parent d90b6bc commit 2b7090b
Show file tree
Hide file tree
Showing 6 changed files with 214 additions and 152 deletions.
137 changes: 80 additions & 57 deletions clean_pufferl.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@
import pufferlib.policy_pool

from collections import deque
from pokegym.global_map import GLOBAL_MAP_SHAPE
from pokegym.eval import make_pokemon_red_overlay
import sys
sys.path.append('/home/bet_adsorption_xinpw8')

from pokegym.pokegym.global_map import GLOBAL_MAP_SHAPE
# from pokegym.pokegym.global_map import GLOBAL_MAP_SHAPE
from pokegym.pokegym.eval import make_pokemon_red_overlay
from pathlib import Path

@pufferlib.dataclass
Expand Down Expand Up @@ -57,6 +61,7 @@ class Losses:
clipfrac = 0
explained_variance = 0


@pufferlib.dataclass
class Charts:
global_step = 0
Expand Down Expand Up @@ -86,7 +91,7 @@ def create(
if exp_name is None:
exp_name = str(uuid.uuid4())[:8]
# Base directory path
required_resources_dir = Path('/home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym')
required_resources_dir = Path('/home/bet_adsorption_xinpw8/pokegym/pokegym') # Path('/home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym')
# Path for the required_resources directory
required_resources_path = required_resources_dir / "required_resources"
required_resources_path.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -292,7 +297,7 @@ def evaluate(data):
**{f'performance/{k}': v
for k, v in data.performance.items()},
**{f'stats/{k}': v for k, v in data.stats.items()},
# **{f"max_stats/{k}": v for k, v in data.max_stats.items()}, # BET ADDED 1
**{f"max_stats/{k}": v for k, v in data.max_stats.items()}, # BET ADDED 1
**{f'skillrank/{policy}': elo
for policy, elo in data.policy_pool.ranker.ratings.items()},
})
Expand All @@ -305,7 +310,7 @@ def evaluate(data):
misc_profiler = pufferlib.utils.Profiler() # BET ADDED 2

ptr = step = padded_steps_collected = agent_steps_collected = 0
infos = defaultdict(lambda: defaultdict(list))
# infos = defaultdict(lambda: defaultdict(list))
while True:
step += 1
if ptr == config.batch_size + 1:
Expand Down Expand Up @@ -370,32 +375,50 @@ def evaluate(data):
for policy_name, policy_i in i.items():
for agent_i in policy_i:
for name, dat in unroll_nested_dict(agent_i):
infos[policy_name][name].append(dat)
if policy_name not in data.infos:
data.infos[policy_name] = {}
if name not in data.infos[policy_name]:
data.infos[policy_name][name] = [
np.zeros_like(dat)
] * config.num_envs
data.infos[policy_name][name][agent_i["env_id"]] = dat
# infos[policy_name][name].append(dat)
with env_profiler:
data.pool.send(actions)

data.reward_buffer.append(r.cpu().sum().numpy())
# data.reward_buffer.append(r.cpu().sum().numpy())
# Probably should normalize the rewards before trying to take the variance...
reward_var = np.var(data.reward_buffer)
if data.wandb is not None:
data.wandb.log(
{
"reward/reward_var": reward_var,
"reward/reward_buffer_len": len(data.reward_buffer),
}
)
if (
data.taught_cut
and len(data.reward_buffer) == data.reward_buffer.maxlen
and reward_var < 2.5e-3
):
data.reward_buffer.clear()
# reward_var = np.var(data.reward_buffer)
# if data.wandb is not None:
# data.wandb.log(
# {
# "reward/reward_var": reward_var,
# "reward/reward_buffer_len": len(data.reward_buffer),
# },
# step=data.global_step

# )
# if (
# data.taught_cut
# and len(data.reward_buffer) == data.reward_buffer.maxlen
# and reward_var < 2.5e-3
# ):
# data.reward_buffer.clear()
# reset lr update if the reward starts stalling
data.lr_update = 1.0
# data.lr_update = 1.0

eval_profiler.stop()

data.global_step += padded_steps_collected
# data.global_step += padded_steps_collected
try:
new_step = np.mean(data.infos["learner"]["stats/step"])
if new_step > data.global_step:
data.global_step = new_step
data.log = True
except KeyError:
print(f'KeyError clean_pufferl data.infos["learner"]["stats/step"]')
pass

data.reward = float(torch.mean(data.rewards))
data.SPS = int(padded_steps_collected / eval_profiler.elapsed)

Expand All @@ -412,49 +435,49 @@ def evaluate(data):
perf.eval_pytorch_memory = eval_profiler.end_torch_mem
perf.misc_time = misc_profiler.elapsed # BET ADDED 25


data.stats = {}
# data.max_stats = {} # BET ADDED 26
data.max_stats = {} # BET ADDED 26
# BET ADDED 0.7 Original logic:
infos = infos['learner']

try:
if 'pokemon_exploration_map' in infos:
for idx, pmap in zip(infos['learner']['env_id'], infos['pokemon_exploration_map']):
if not hasattr(data, 'pokemon'):
import pokemon_red_eval
data.map_updater = pokemon_red_eval.map_updater()
data.map_buffer = np.zeros((data.config.num_envs, *pmap.shape))
data.map_buffer[idx] = pmap
pokemon_map = np.sum(data.map_buffer, axis=0)
rendered = data.map_updater(pokemon_map)
import cv2
# cv2.imwrite('c_counts_map.png', rendered)
# cv2.wait(1)
data.stats['Media/exploration_map'] = data.wandb.Image(rendered)
except:
pass

try:
if "stats/step" in infos:
data.global_step = np.mean(infos["stats/step"])
if 'pokemon_exploration_map' in infos:
overlay = make_pokemon_red_overlay(np.stack(infos['pokemon_exploration_map'], axis=0))
# infos = infos['learner']
for k, v in data.infos["learner"].items():

# try:
# if 'pokemon_exploration_map' in infos:
# for idx, pmap in zip(infos['learner']['env_id'], infos['pokemon_exploration_map']):
# if not hasattr(data, 'pokemon'):
# import pokemon_red_eval
# data.map_updater = pokemon_red_eval.map_updater()
# data.map_buffer = np.zeros((data.config.num_envs, *pmap.shape))
# data.map_buffer[idx] = pmap
# pokemon_map = np.sum(data.map_buffer, axis=0)
# rendered = data.map_updater(pokemon_map)
# # import cv2
# # cv2.imwrite('c_counts_map.png', rendered)
# # cv2.wait(1)
# data.stats['Media/exploration_map'] = data.wandb.Image(rendered)
# except:
# pass


if "stats/step" in data.infos:
data.global_step = np.mean(data["stats/step"])
if 'pokemon_exploration_map' in k:
overlay = make_pokemon_red_overlay(np.stack(v, axis=0))
# overlay = make_pokemon_red_overlay(np.stack(data['pokemon_exploration_map'], axis=0))
if data.wandb is not None:
data.stats['Media/exploration_map'] = data.wandb.Image(overlay)
try:
data.stats['stats'] = np.mean(infos)
# data.max_stats['stats'] = np.max(infos)
# if data.max_stats["got_hm01"] > 0:
# data.taught_cut = True
data.stats[k] = np.mean(v)
data.max_stats[k] = np.max(v)
if data.max_stats["got_hm01"] > 0:
data.taught_cut = True
except:
pass
except:
pass

continue
if config.verbose:
print_dashboard(data.stats, data.init_performance, data.performance)

return data.stats, infos
return data.stats, data.infos

@pufferlib.utils.profile
def train(data):
Expand Down
36 changes: 18 additions & 18 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ train:
seed: 1
torch_deterministic: True
device: cuda
total_timesteps: 800_000_000
total_timesteps: 800_000_000 # superceded by pokemon_red package
learning_rate: 0.0003
num_steps: 128
anneal_lr: True
gamma: 0.999
gamma: 0.998 # superceded by pokemon_red package
gae_lambda: 0.95
num_minibatches: 4
update_epochs: 2 # 3
update_epochs: 2 # 3 # superceded by pokemon_red package
norm_adv: True
clip_coef: 0.1
clip_vloss: True
Expand All @@ -18,19 +18,19 @@ train:
max_grad_norm: 0.5
target_kl: ~

num_envs: 128 # 48
envs_per_worker: 4
envs_per_batch: 48 # must be <= num_envs
env_pool: True
verbose: True
num_envs: 128 # 48 # superceded by pokemon_red package
envs_per_worker: 4 # superceded by pokemon_red package
envs_per_batch: 48 # must be <= num_envs # superceded by pokemon_red package
env_pool: True # superceded by pokemon_red package
verbose: True # superceded by pokemon_red package
data_dir: experiments
checkpoint_interval: 40960 # 2048 * 10 * 2
pool_kernel: [0]
batch_size: 32768 # 128 (?)
batch_rows: 128
batch_size: 32768 # 128 (?) # superceded by pokemon_red package
batch_rows: 128 # superceded by pokemon_red package
bptt_horizon: 16
vf_clip_coef: 0.1
compile: True
compile: True # superceded by pokemon_red package
compile_mode: reduce-overhead

sweep:
Expand Down Expand Up @@ -62,20 +62,20 @@ pokemon_red:
package: pokemon_red
train:
total_timesteps: 800_000_000
num_envs: 128
envs_per_worker: 4
num_envs: 256
envs_per_worker: 8
envs_per_batch: 48
update_epochs: 2 # 3
update_epochs: 10 # 3
gamma: 0.998
batch_size: 32768
batch_rows: 128
batch_size: 32768 # 65280 # 32768
batch_rows: 256
compile: True

# Boey-specific env parameters; loaded by environment.py
save_final_state: True
print_rewards: True
headless: True
init_state: /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir/start_from_state_dir/has_pokedex_nballs_noanim.state
init_state: /home/bet_adsorption_xinpw8/pokegym/pokegym/save_state_dir/has_pokedex_nballs_noanim.state # /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir/start_from_state_dir/has_pokedex_nballs_noanim.state
action_freq: 24
max_steps: 30720000 # Updated to match ep_length
early_stop: True
Expand All @@ -91,7 +91,7 @@ pokemon_red:
swap_button: True
restricted_start_menu: True # False
level_reward_badge_scale: 1.0
save_state_dir: /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir
save_state_dir: /home/bet_adsorption_xinpw8/pokegym/pokegym/save_state_dir # /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir
special_exploration_scale: 1.0
enable_item_manager: True
enable_stage_manager: True
Expand Down
2 changes: 1 addition & 1 deletion demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


def load_from_config(env):
with open('config.yaml') as f:
with open('config_test.yaml') as f:
config = yaml.safe_load(f)

assert env in config, f'"{env}" not found in config.yaml. Uncommon environments that are part of larger packages may not have their own config. Specify these manually using the parent package, e.g. --config atari --env MontezumasRevengeNoFrameskip-v4.'
Expand Down
4 changes: 2 additions & 2 deletions pufferlib/environments/pokemon_red/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import functools
import pufferlib.emulation
from pokegym import Environment
from pokegym.pokegym import Environment
from stream_wrapper import StreamWrapper

def env_creator(name="pokemon_red"):
Expand All @@ -24,7 +24,7 @@ def env_creator(name="pokemon_red"):
def make(name, **kwargs,):
"""Pokemon Red"""
env = Environment(kwargs)
env = StreamWrapper(env, stream_metadata={"user": " BET \n===PUFFERLIB===\n====BOEY====\n BET"})
env = StreamWrapper(env, stream_metadata={"user": " PUFFERBOX|BET|PUFFERBOX \nPUFFERBOX|BET|PUFFERBOX \n====BOEY====\nPUFFERBOX|BET|PUFFERBOX "})
# Looks like the following will optionally create the object for you
# Or use the one you pass it. I'll just construct it here.
return pufferlib.emulation.GymnasiumPufferEnv(
Expand Down
2 changes: 1 addition & 1 deletion run.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/bin/bash
python demo.py --backend clean_pufferl --config pokemon_red --no-render --vectorization multiprocessing --mode train --track
python demo.py --backend clean_pufferl --config pokemon_red --no-render --vectorization multiprocessing --mode train --track
Loading

0 comments on commit 2b7090b

Please sign in to comment.