From 2b7090b13f9f44f8a4793bba6a92e8e93b990ce8 Mon Sep 17 00:00:00 2001 From: xinpw8 Date: Sat, 2 Mar 2024 06:35:51 +0000 Subject: [PATCH] now pufferbox-compliant --- clean_pufferl.py | 137 +++++++------ config.yaml | 36 ++-- demo.py | 2 +- .../environments/pokemon_red/environment.py | 4 +- run.sh | 2 +- setup.py | 185 +++++++++++------- 6 files changed, 214 insertions(+), 152 deletions(-) diff --git a/clean_pufferl.py b/clean_pufferl.py index 6e388a46..66c7a4a3 100755 --- a/clean_pufferl.py +++ b/clean_pufferl.py @@ -22,8 +22,12 @@ import pufferlib.policy_pool from collections import deque -from pokegym.global_map import GLOBAL_MAP_SHAPE -from pokegym.eval import make_pokemon_red_overlay +import sys +sys.path.append('/home/bet_adsorption_xinpw8') + +from pokegym.pokegym.global_map import GLOBAL_MAP_SHAPE +# from pokegym.pokegym.global_map import GLOBAL_MAP_SHAPE +from pokegym.pokegym.eval import make_pokemon_red_overlay from pathlib import Path @pufferlib.dataclass @@ -57,6 +61,7 @@ class Losses: clipfrac = 0 explained_variance = 0 + @pufferlib.dataclass class Charts: global_step = 0 @@ -86,7 +91,7 @@ def create( if exp_name is None: exp_name = str(uuid.uuid4())[:8] # Base directory path - required_resources_dir = Path('/home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym') + required_resources_dir = Path('/home/bet_adsorption_xinpw8/pokegym/pokegym') # Path('/home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym') # Path for the required_resources directory required_resources_path = required_resources_dir / "required_resources" required_resources_path.mkdir(parents=True, exist_ok=True) @@ -292,7 +297,7 @@ def evaluate(data): **{f'performance/{k}': v for k, v in data.performance.items()}, **{f'stats/{k}': v for k, v in data.stats.items()}, - # **{f"max_stats/{k}": v for k, v in data.max_stats.items()}, # BET ADDED 1 + **{f"max_stats/{k}": v for k, v in data.max_stats.items()}, # BET ADDED 1 **{f'skillrank/{policy}': elo for policy, elo in data.policy_pool.ranker.ratings.items()}, }) @@ -305,7 +310,7 @@ def evaluate(data): misc_profiler = pufferlib.utils.Profiler() # BET ADDED 2 ptr = step = padded_steps_collected = agent_steps_collected = 0 - infos = defaultdict(lambda: defaultdict(list)) + # infos = defaultdict(lambda: defaultdict(list)) while True: step += 1 if ptr == config.batch_size + 1: @@ -370,32 +375,50 @@ def evaluate(data): for policy_name, policy_i in i.items(): for agent_i in policy_i: for name, dat in unroll_nested_dict(agent_i): - infos[policy_name][name].append(dat) + if policy_name not in data.infos: + data.infos[policy_name] = {} + if name not in data.infos[policy_name]: + data.infos[policy_name][name] = [ + np.zeros_like(dat) + ] * config.num_envs + data.infos[policy_name][name][agent_i["env_id"]] = dat + # infos[policy_name][name].append(dat) with env_profiler: data.pool.send(actions) - data.reward_buffer.append(r.cpu().sum().numpy()) + # data.reward_buffer.append(r.cpu().sum().numpy()) # Probably should normalize the rewards before trying to take the variance... - reward_var = np.var(data.reward_buffer) - if data.wandb is not None: - data.wandb.log( - { - "reward/reward_var": reward_var, - "reward/reward_buffer_len": len(data.reward_buffer), - } - ) - if ( - data.taught_cut - and len(data.reward_buffer) == data.reward_buffer.maxlen - and reward_var < 2.5e-3 - ): - data.reward_buffer.clear() + # reward_var = np.var(data.reward_buffer) + # if data.wandb is not None: + # data.wandb.log( + # { + # "reward/reward_var": reward_var, + # "reward/reward_buffer_len": len(data.reward_buffer), + # }, + # step=data.global_step + + # ) + # if ( + # data.taught_cut + # and len(data.reward_buffer) == data.reward_buffer.maxlen + # and reward_var < 2.5e-3 + # ): + # data.reward_buffer.clear() # reset lr update if the reward starts stalling - data.lr_update = 1.0 + # data.lr_update = 1.0 eval_profiler.stop() - data.global_step += padded_steps_collected + # data.global_step += padded_steps_collected + try: + new_step = np.mean(data.infos["learner"]["stats/step"]) + if new_step > data.global_step: + data.global_step = new_step + data.log = True + except KeyError: + print(f'KeyError clean_pufferl data.infos["learner"]["stats/step"]') + pass + data.reward = float(torch.mean(data.rewards)) data.SPS = int(padded_steps_collected / eval_profiler.elapsed) @@ -412,49 +435,49 @@ def evaluate(data): perf.eval_pytorch_memory = eval_profiler.end_torch_mem perf.misc_time = misc_profiler.elapsed # BET ADDED 25 + data.stats = {} - # data.max_stats = {} # BET ADDED 26 + data.max_stats = {} # BET ADDED 26 # BET ADDED 0.7 Original logic: - infos = infos['learner'] - - try: - if 'pokemon_exploration_map' in infos: - for idx, pmap in zip(infos['learner']['env_id'], infos['pokemon_exploration_map']): - if not hasattr(data, 'pokemon'): - import pokemon_red_eval - data.map_updater = pokemon_red_eval.map_updater() - data.map_buffer = np.zeros((data.config.num_envs, *pmap.shape)) - data.map_buffer[idx] = pmap - pokemon_map = np.sum(data.map_buffer, axis=0) - rendered = data.map_updater(pokemon_map) - import cv2 - # cv2.imwrite('c_counts_map.png', rendered) - # cv2.wait(1) - data.stats['Media/exploration_map'] = data.wandb.Image(rendered) - except: - pass - - try: - if "stats/step" in infos: - data.global_step = np.mean(infos["stats/step"]) - if 'pokemon_exploration_map' in infos: - overlay = make_pokemon_red_overlay(np.stack(infos['pokemon_exploration_map'], axis=0)) + # infos = infos['learner'] + for k, v in data.infos["learner"].items(): + + # try: + # if 'pokemon_exploration_map' in infos: + # for idx, pmap in zip(infos['learner']['env_id'], infos['pokemon_exploration_map']): + # if not hasattr(data, 'pokemon'): + # import pokemon_red_eval + # data.map_updater = pokemon_red_eval.map_updater() + # data.map_buffer = np.zeros((data.config.num_envs, *pmap.shape)) + # data.map_buffer[idx] = pmap + # pokemon_map = np.sum(data.map_buffer, axis=0) + # rendered = data.map_updater(pokemon_map) + # # import cv2 + # # cv2.imwrite('c_counts_map.png', rendered) + # # cv2.wait(1) + # data.stats['Media/exploration_map'] = data.wandb.Image(rendered) + # except: + # pass + + + if "stats/step" in data.infos: + data.global_step = np.mean(data["stats/step"]) + if 'pokemon_exploration_map' in k: + overlay = make_pokemon_red_overlay(np.stack(v, axis=0)) + # overlay = make_pokemon_red_overlay(np.stack(data['pokemon_exploration_map'], axis=0)) if data.wandb is not None: data.stats['Media/exploration_map'] = data.wandb.Image(overlay) try: - data.stats['stats'] = np.mean(infos) - # data.max_stats['stats'] = np.max(infos) - # if data.max_stats["got_hm01"] > 0: - # data.taught_cut = True + data.stats[k] = np.mean(v) + data.max_stats[k] = np.max(v) + if data.max_stats["got_hm01"] > 0: + data.taught_cut = True except: - pass - except: - pass - + continue if config.verbose: print_dashboard(data.stats, data.init_performance, data.performance) - return data.stats, infos + return data.stats, data.infos @pufferlib.utils.profile def train(data): diff --git a/config.yaml b/config.yaml index 3c878f49..6c8c77d1 100755 --- a/config.yaml +++ b/config.yaml @@ -2,14 +2,14 @@ train: seed: 1 torch_deterministic: True device: cuda - total_timesteps: 800_000_000 + total_timesteps: 800_000_000 # superceded by pokemon_red package learning_rate: 0.0003 num_steps: 128 anneal_lr: True - gamma: 0.999 + gamma: 0.998 # superceded by pokemon_red package gae_lambda: 0.95 num_minibatches: 4 - update_epochs: 2 # 3 + update_epochs: 2 # 3 # superceded by pokemon_red package norm_adv: True clip_coef: 0.1 clip_vloss: True @@ -18,19 +18,19 @@ train: max_grad_norm: 0.5 target_kl: ~ - num_envs: 128 # 48 - envs_per_worker: 4 - envs_per_batch: 48 # must be <= num_envs - env_pool: True - verbose: True + num_envs: 128 # 48 # superceded by pokemon_red package + envs_per_worker: 4 # superceded by pokemon_red package + envs_per_batch: 48 # must be <= num_envs # superceded by pokemon_red package + env_pool: True # superceded by pokemon_red package + verbose: True # superceded by pokemon_red package data_dir: experiments checkpoint_interval: 40960 # 2048 * 10 * 2 pool_kernel: [0] - batch_size: 32768 # 128 (?) - batch_rows: 128 + batch_size: 32768 # 128 (?) # superceded by pokemon_red package + batch_rows: 128 # superceded by pokemon_red package bptt_horizon: 16 vf_clip_coef: 0.1 - compile: True + compile: True # superceded by pokemon_red package compile_mode: reduce-overhead sweep: @@ -62,20 +62,20 @@ pokemon_red: package: pokemon_red train: total_timesteps: 800_000_000 - num_envs: 128 - envs_per_worker: 4 + num_envs: 256 + envs_per_worker: 8 envs_per_batch: 48 - update_epochs: 2 # 3 + update_epochs: 10 # 3 gamma: 0.998 - batch_size: 32768 - batch_rows: 128 + batch_size: 32768 # 65280 # 32768 + batch_rows: 256 compile: True # Boey-specific env parameters; loaded by environment.py save_final_state: True print_rewards: True headless: True - init_state: /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir/start_from_state_dir/has_pokedex_nballs_noanim.state + init_state: /home/bet_adsorption_xinpw8/pokegym/pokegym/save_state_dir/has_pokedex_nballs_noanim.state # /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir/start_from_state_dir/has_pokedex_nballs_noanim.state action_freq: 24 max_steps: 30720000 # Updated to match ep_length early_stop: True @@ -91,7 +91,7 @@ pokemon_red: swap_button: True restricted_start_menu: True # False level_reward_badge_scale: 1.0 - save_state_dir: /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir + save_state_dir: /home/bet_adsorption_xinpw8/pokegym/pokegym/save_state_dir # /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir special_exploration_scale: 1.0 enable_item_manager: True enable_stage_manager: True diff --git a/demo.py b/demo.py index 61919400..37567f28 100755 --- a/demo.py +++ b/demo.py @@ -15,7 +15,7 @@ def load_from_config(env): - with open('config.yaml') as f: + with open('config_test.yaml') as f: config = yaml.safe_load(f) assert env in config, f'"{env}" not found in config.yaml. Uncommon environments that are part of larger packages may not have their own config. Specify these manually using the parent package, e.g. --config atari --env MontezumasRevengeNoFrameskip-v4.' diff --git a/pufferlib/environments/pokemon_red/environment.py b/pufferlib/environments/pokemon_red/environment.py index 1e1afed6..4d652c69 100755 --- a/pufferlib/environments/pokemon_red/environment.py +++ b/pufferlib/environments/pokemon_red/environment.py @@ -15,7 +15,7 @@ import functools import pufferlib.emulation -from pokegym import Environment +from pokegym.pokegym import Environment from stream_wrapper import StreamWrapper def env_creator(name="pokemon_red"): @@ -24,7 +24,7 @@ def env_creator(name="pokemon_red"): def make(name, **kwargs,): """Pokemon Red""" env = Environment(kwargs) - env = StreamWrapper(env, stream_metadata={"user": " BET \n===PUFFERLIB===\n====BOEY====\n BET"}) + env = StreamWrapper(env, stream_metadata={"user": " PUFFERBOX|BET|PUFFERBOX \nPUFFERBOX|BET|PUFFERBOX \n====BOEY====\nPUFFERBOX|BET|PUFFERBOX "}) # Looks like the following will optionally create the object for you # Or use the one you pass it. I'll just construct it here. return pufferlib.emulation.GymnasiumPufferEnv( diff --git a/run.sh b/run.sh index d9cbed26..c4bfd485 100755 --- a/run.sh +++ b/run.sh @@ -1,2 +1,2 @@ #!/bin/bash -python demo.py --backend clean_pufferl --config pokemon_red --no-render --vectorization multiprocessing --mode train --track +python demo.py --backend clean_pufferl --config pokemon_red --no-render --vectorization multiprocessing --mode train --track \ No newline at end of file diff --git a/setup.py b/setup.py index b73ab02f..73b07099 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from Cython.Build import cythonize from itertools import chain -VERSION = '0.6.1' +VERSION = '0.7.2' # Default Gym/Gymnasium/PettingZoo versions # Gym: @@ -15,7 +15,7 @@ # - 0.18-0.21 require setuptools<=65.5.0 GYMNASIUM_VERSION = '0.29.1' -GYM_VERSION = '0.21' +GYM_VERSION = '0.23' PETTINGZOO_VERSION = '1.24.1' SHIMMY = 'shimmy[gym-v21]' @@ -28,61 +28,125 @@ 'furo==2023.3.27', ] +cleanrl = [ + 'tensorboard==2.11.2', + 'torch', + 'wandb==0.13.7', + 'psutil==5.9.5', + 'tyro', +] -# These are the environments that PufferLib has made -# compatible with the latest version of Gym/Gymnasium/PettingZoo -# They are included in PufferTank as a default heavy install -# We force updated versions of Gym/Gymnasium/PettingZoo here to -# ensure that users do not have issues with conflicting versions -# when switching to incompatible environments -compatible_environments = { +rllib = [ + 'ray[all]==2.0.0', + 'setproctitle==1.1.10', + 'service-identity==21.1.0', + 'pydantic==1.9', +] + +environments = { + 'avalon': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', + 'avalon-rl==1.0.0', + ], 'atari': [ - 'gymnasium[atari,accept-rom-license]', + f'gym=={GYM_VERSION}', + f'gymnasium[atari,accept-rom-license]=={GYMNASIUM_VERSION}', 'stable_baselines3==2.1.0', ], 'box2d': [ + f'gym=={GYM_VERSION}', + f'gymnasium[box2d]=={GYMNASIUM_VERSION}', 'swig==4.1.1', - 'gymnasium[box2d]', ], 'bsuite': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', 'bsuite==0.3.5', ], 'butterfly': [ - 'pettingzoo[butterfly]', + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', + f'pettingzoo[butterfly]=={PETTINGZOO_VERSION}', ], 'classic_control': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', ], 'crafter': [ - 'crafter==1.8.2', + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', + 'crafter==1.8.3', ], 'dm_control': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', 'dm_control==1.0.11', ], 'dm_lab': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', 'gym_deepmindlab==0.1.2', 'dm_env==1.6', ], 'griddly': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', 'imageio==2.23.0', - 'griddly==1.4.2', + 'griddly==1.6.7', + ], + 'magent': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', + 'pettingzoo==1.19.0', + 'magent==0.2.4', + # The Magent2 package is broken for now + #'magent2==0.3.2', ], 'microrts': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', 'ffmpeg==1.4', 'gym_microrts==0.3.2', ], + 'minerl': [ + 'gym==0.17.0', + f'gymnasium=={GYMNASIUM_VERSION}', + 'minerl==0.4.4', + # Compatiblity warning with urllib3 and chardet + 'requests==2.31.0', + ], 'minigrid': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', 'minigrid==2.3.1', ], 'minihack': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', 'minihack==0.1.5', ], 'nethack': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', 'nle==0.9.0', ], 'nmmo': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', + f'pettingzoo=={PETTINGZOO_VERSION}', 'nmmo>=2.0', ], + 'open_spiel': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', + 'open_spiel==1.3', + 'pettingzoo==1.19.0', + SHIMMY, + ], 'pokemon_red': [ + f'gym=={GYM_VERSION}', + f'gymnasium=={GYMNASIUM_VERSION}', 'einops==0.6.1', 'matplotlib', 'scikit-image==0.21.0', @@ -92,49 +156,11 @@ 'pandas==2.0.2', ], 'procgen': [ - 'procgen==0.10.7', - ], -} - -for env, packages in compatible_environments.items(): - compatible_environments[env] = [ - f'gymnasium=={GYMNASIUM_VERSION}', f'gym=={GYM_VERSION}', - f'pettingzoo=={PETTINGZOO_VERSION}', - SHIMMY, - *packages, - ] - -# These environments require specific old versions of -# Gym/Gymnasium/PettingZoo to work. -incompatible_environments = { - 'avalon': [ - 'avalon-rl==1.0.0', - f'gymnasium=={GYMNASIUM_VERSION}', - f'pettingzoo=={PETTINGZOO_VERSION}', - ], - 'magent': [ - 'magent==0.2.4', - 'pettingzoo==1.19.0', f'gymnasium=={GYMNASIUM_VERSION}', - f'gym=={GYM_VERSION}', - # The Magent2 package is broken for now - #'magent2==0.3.2', - ], - 'minerl': [ - 'gym==0.17.0', - 'minerl==0.4.4', - # Compatiblity warning with urllib3 and chardet - 'requests==2.31.0', - f'gymnasium=={GYMNASIUM_VERSION}', - f'pettingzoo=={PETTINGZOO_VERSION}', - ], - 'open_spiel': [ - 'open_spiel==1.3', - 'pettingzoo==1.19.0', - f'gymnasium=={GYMNASIUM_VERSION}', - f'gym=={GYM_VERSION}', - SHIMMY, + # Danijar mirrored for Python 3.11 and 3.12 support + 'procgen-mirror==0.10.7', + #'procgen==0.10.7', ], #'smac': [ # 'git+https://github.com/oxwhirl/smac.git', @@ -144,19 +170,31 @@ #] } -rllib = [ - 'ray[all]==2.0.0', - 'setproctitle==1.1.10', - 'service-identity==21.1.0', - 'pydantic==1.9', -] -cleanrl = [ - 'tensorboard==2.11.2', - 'torch', - 'wandb==0.13.7', - 'psutil==5.9.5', -] +# These are the environments that PufferLib has made +# compatible with the latest version of Gym/Gymnasium/PettingZoo +# They are included in PufferTank as a default heavy install +# We force updated versions of Gym/Gymnasium/PettingZoo here to +# ensure that users do not have issues with conflicting versions +# when switching to incompatible environments +common = cleanrl + [environments[env] for env in [ + 'atari', + #'box2d', + 'bsuite', + #'butterfly', + 'classic_control', + 'crafter', + 'dm_control', + 'dm_lab', + 'griddly', + 'microrts', + 'minigrid', + 'minihack', + 'nethack', + 'nmmo', + 'pokemon_red', + 'procgen', +]] setup( name="pufferlib", @@ -175,23 +213,24 @@ 'docs': docs, 'rllib': rllib, 'cleanrl': cleanrl, - 'compatible-environments': compatible_environments, - **compatible_environments, - **incompatible_environments, + 'common': common, + **environments, }, ext_modules = cythonize("pufferlib/extensions.pyx"), python_requires=">=3.8", license="MIT", author="Joseph Suarez", - author_email="jsuarez@mit.edu", + author_email="jsuarez@puffer.ai", url="https://github.com/PufferAI/PufferLib", - keywords=["Puffer", "AI", "RL"], + keywords=["Puffer", "AI", "RL", "Reinforcement Learning"], classifiers=[ "Intended Audience :: Science/Research", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ], ) @@ -202,4 +241,4 @@ #curl -L -o smac.zip https://blzdistsc2-a.akamaihd.net/Linux/SC2.4.10.zip #unzip -P iagreetotheeula smac.zip #curl -L -o maps.zip https://github.com/oxwhirl/smac/releases/download/v0.1-beta1/SMAC_Maps.zip -#unzip maps.zip && mv SMAC_Maps/ StarCraftII/Maps/ +#unzip maps.zip && mv SMAC_Maps/ StarCraftII/Maps/ \ No newline at end of file