From fd6c27905344631314747396a0a132e48da88848 Mon Sep 17 00:00:00 2001 From: xinpw8 Date: Tue, 5 Mar 2024 03:24:28 +0000 Subject: [PATCH] pufferbox5 --- clean_pufferl.py | 15 ++- config.yaml | 6 +- config_jsuarez.yaml | 121 ++++++++++++++++++ demo.py | 12 +- .../environments/pokemon_red/environment.py | 2 +- run.sh | 2 +- stream_wrapper.py | 5 +- 7 files changed, 144 insertions(+), 19 deletions(-) create mode 100755 config_jsuarez.yaml diff --git a/clean_pufferl.py b/clean_pufferl.py index 66c7a4a3..d697930b 100755 --- a/clean_pufferl.py +++ b/clean_pufferl.py @@ -91,7 +91,7 @@ def create( if exp_name is None: exp_name = str(uuid.uuid4())[:8] # Base directory path - required_resources_dir = Path('/home/bet_adsorption_xinpw8/pokegym/pokegym') # Path('/home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym') + required_resources_dir = Path('/bet_adsorption_xinpw8/PufferLib/pokegym/pokegym') # Path('/home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym') # Path for the required_resources directory required_resources_path = required_resources_dir / "required_resources" required_resources_path.mkdir(parents=True, exist_ok=True) @@ -209,7 +209,7 @@ def create( torch.zeros(shape, device=device), torch.zeros(shape, device=device), ) - obs=torch.zeros(config.batch_size + 1, *obs_shape) + obs=torch.zeros(config.batch_size + 1, *obs_shape, pin_memory=True) # added , pin_memory=True) actions=torch.zeros(config.batch_size + 1, *atn_shape, dtype=int) logprobs=torch.zeros(config.batch_size + 1) rewards=torch.zeros(config.batch_size + 1) @@ -522,7 +522,7 @@ def train(data): delta + config.gamma * config.gae_lambda * nextnonterminal * lastgaelam ) - data.b_obs = b_obs = torch.Tensor(data.obs_ary[b_idxs]) + data.b_obs = b_obs = data.obs[b_idxs].to(data.device, non_blocking=True) # torch.Tensor(data.obs_ary[b_idxs]) b_actions = torch.Tensor(data.actions_ary[b_idxs]).to(data.device, non_blocking=True) b_logprobs = torch.Tensor(data.logprobs_ary[b_idxs]).to(data.device, non_blocking=True) b_dones = torch.Tensor(data.dones_ary[b_idxs]).to(data.device, non_blocking=True) @@ -537,13 +537,16 @@ def train(data): train_time = time.time() pg_losses, entropy_losses, v_losses, clipfracs, old_kls, kls = [], [], [], [], [], [] - mb_obs_buffer = torch.zeros_like(b_obs[0], pin_memory=(data.device == "cuda")) + # COMMENTED OUT BET + # mb_obs_buffer = torch.zeros_like(b_obs[0], pin_memory=(data.device == "cuda")) for epoch in range(config.update_epochs): lstm_state = None for mb in range(num_minibatches): - mb_obs_buffer.copy_(b_obs[mb], non_blocking=True) - mb_obs = mb_obs_buffer.to(data.device, non_blocking=True) + mb_obs = b_obs[mb] + # COMMENTED OUT BET + # mb_obs_buffer.copy_(b_obs[mb], non_blocking=True) + # mb_obs = mb_obs_buffer.to(data.device, non_blocking=True) mb_actions = b_actions[mb].contiguous() mb_values = b_values[mb].reshape(-1) diff --git a/config.yaml b/config.yaml index 8c7d6516..0f718ad3 100755 --- a/config.yaml +++ b/config.yaml @@ -75,7 +75,7 @@ pokemon_red: save_final_state: True print_rewards: True headless: True - init_state: /home/bet_adsorption_xinpw8/pokegym/pokegym/save_state_dir/has_pokedex_nballs_noanim.state # /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir/start_from_state_dir/has_pokedex_nballs_noanim.state + init_state: /bet_adsorption_xinpw8/PufferLib/pokegym/pokegym/save_state_dir/has_pokedex_nballs_noanim.state # /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir/start_from_state_dir/has_pokedex_nballs_noanim.state action_freq: 24 max_steps: 3072000 # 30720000 # Updated to match ep_length early_stop: True @@ -91,10 +91,10 @@ pokemon_red: swap_button: True restricted_start_menu: True # False level_reward_badge_scale: 1.0 - save_state_dir: /home/bet_adsorption_xinpw8/pokegym/pokegym/save_state_dir # /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir + save_state_dir: /bet_adsorption_xinpw8/PufferLib/pokegym/pokegym/save_state_dir # /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir special_exploration_scale: 1.0 enable_item_manager: True # True - enable_stage_manager: False # True + enable_stage_manager: True # True enable_item_purchaser: True # True auto_skip_anim: True auto_skip_anim_frames: 8 diff --git a/config_jsuarez.yaml b/config_jsuarez.yaml new file mode 100755 index 00000000..3cb1120a --- /dev/null +++ b/config_jsuarez.yaml @@ -0,0 +1,121 @@ +train: + seed: 1 + torch_deterministic: True + device: cuda + total_timesteps: 800_000_000 # superceded by pokemon_red package + learning_rate: 0.0003 + num_steps: 128 # 128 + anneal_lr: False # True + gamma: 0.999 # gamma annealing: first 10m steps 0.999, then 0.9996; might have to screw with gamma and steps to make it work + gae_lambda: 0.95 + # num_minibatches: 4 # 4 + update_epochs: 3 # 2 # 3 # superceded by pokemon_red package + norm_adv: True + clip_coef: 0.1 + clip_vloss: True + ent_coef: 0.01 + vf_coef: 0.5 + max_grad_norm: 0.5 + target_kl: ~ + + num_envs: 48 # 128 # 48 # 512 num_envs, 12 envs/worker # superceded by pokemon_red package + envs_per_worker: 1 # or 2 - time it, see which is faster # 8 # 4 # superceded by pokemon_red package + envs_per_batch: 48 # must be <= num_envs # superceded by pokemon_red package + env_pool: True # superceded by pokemon_red package + verbose: True # superceded by pokemon_red package + data_dir: experiments + checkpoint_interval: 500 # 40960 # 2048 * 10 * 2 + pool_kernel: [0] + batch_size: 32768 # 48 # no async to avoid messing with things # 32768 # 128 (?) # superceded by pokemon_red package + batch_rows: 128 # between 128 and 1024 - empricaly# 1024 # 256 # 128 # superceded by pokemon_red package + bptt_horizon: 32 # 16 + vf_clip_coef: 0.1 + compile: True # superceded by pokemon_red package + compile_mode: reduce-overhead + +sweep: + method: random + name: sweep + metric: + goal: maximize + name: episodic_return + # Nested parameters name required by WandB API + parameters: + train: + parameters: + learning_rate: { + 'distribution': 'log_uniform_values', + 'min': 1e-4, + 'max': 1e-1, + } + batch_size: { + 'values': [128, 256, 512, 1024, 2048], + } + batch_rows: { + 'values': [16, 32, 64, 128, 256], + } + bptt_horizon: { + 'values': [4, 8, 16, 32], + } + +pokemon_red: + package: pokemon_red + train: + total_timesteps: 800_000_000 + num_envs: 48 # 256 + envs_per_worker: 1 + envs_per_batch: 48 # 48 # must be divisible by envs_per_worker + update_epochs: 3 # 10 # 3 + gamma: 0.9996 + batch_size: 32768 # 65536 # 32768 + batch_rows: 128 # 256 + compile: True + + # Boey-specific env parameters; loaded by environment.py + save_final_state: True + print_rewards: True + headless: True + init_state: /bet_adsorption_xinpw8/PufferLib/pokegym/pokegym/save_state_dir/has_pokedex_nballs_noanim.state # /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir/start_from_state_dir/has_pokedex_nballs_noanim.state + action_freq: 24 + max_steps: 30720000 # 30720000 # Updated to match ep_length + early_stop: True + early_stopping_min_reward: 2.0 + save_video: False + fast_video: True + explore_weight: 1.5 + use_screen_explore: False + sim_frame_dist: 2000000.0 # 2000000.0 + reward_scale: 4 + extra_buttons: False + noop_button: True + swap_button: True + restricted_start_menu: True # False + level_reward_badge_scale: 1.0 + save_state_dir: /bet_adsorption_xinpw8/PufferLib/pokegym/pokegym/save_state_dir # /home/daa/puffer0.5.2_iron/obs_space_experiments/pokegym/pokegym/save_state_dir + special_exploration_scale: 1.0 + enable_item_manager: True # True + enable_stage_manager: True # True + enable_item_purchaser: True # True + auto_skip_anim: True + auto_skip_anim_frames: 8 + total_envs: 48 # 48 # Updated to match num_cpu + gb_path: PokemonRed.gb + debug: False + level_manager_eval_mode: False + sess_id: generate # Updated dynamically, placeholder for dynamic generation + use_wandb_logging: False + cpu_multiplier: 0.25 + save_freq: 500 # 40960 # 2048 * 10 * 2 + n_steps: 163840 # Calculated as int(5120 // cpu_multiplier) * 1 + num_cpu: 48 # number of processes, 1 env per process # 8 # Calculated as int(32 * cpu_multiplier) + env: + name: pokemon_red +pokemon-red: + package: pokemon_red +pokemonred: + package: pokemon_red +pokemon: + package: pokemon_red +pokegym: + package: pokemon_red + \ No newline at end of file diff --git a/demo.py b/demo.py index 01e403f2..a23d783c 100755 --- a/demo.py +++ b/demo.py @@ -59,12 +59,12 @@ def make_policy(env, env_module, args): policy = pufferlib.frameworks.cleanrl.Policy(policy) # BET ADDED 1 - mode = "default" - if args.train.device == "cuda": - mode = "reduce-overhead" - policy = policy.to(args.train.device, non_blocking=True) - policy.get_value = torch.compile(policy.get_value, mode=mode) - policy.get_action_and_value = torch.compile(policy.get_action_and_value, mode=mode) + # mode = "default" + # if args.train.device == "cuda": + # mode = "reduce-overhead" + # policy = policy.to(args.train.device, non_blocking=True) + # policy.get_value = torch.compile(policy.get_value, mode=mode) + # policy.get_action_and_value = torch.compile(policy.get_action_and_value, mode=mode) return policy.to(args.train.device) diff --git a/pufferlib/environments/pokemon_red/environment.py b/pufferlib/environments/pokemon_red/environment.py index e5c54eeb..6051a5bc 100755 --- a/pufferlib/environments/pokemon_red/environment.py +++ b/pufferlib/environments/pokemon_red/environment.py @@ -24,7 +24,7 @@ def env_creator(name="pokemon_red"): def make(name, **kwargs,): """Pokemon Red""" env = Environment(kwargs) - env = StreamWrapper(env, stream_metadata={"user": " PUFFERBOX4|BET|PUFFERBOX4 \nPUFFERBOX4|BET|PUFFERBOX4 \n====BOEY====\nPUFFERBOX4|BET|PUFFERBOX4 "}) + env = StreamWrapper(env, stream_metadata={"user": "PUFFERBOX5|BET|\n=BOEY=\n"}) # Looks like the following will optionally create the object for you # Or use the one you pass it. I'll just construct it here. return pufferlib.emulation.GymnasiumPufferEnv( diff --git a/run.sh b/run.sh index c4bfd485..c8df9819 100755 --- a/run.sh +++ b/run.sh @@ -1,2 +1,2 @@ #!/bin/bash -python demo.py --backend clean_pufferl --config pokemon_red --no-render --vectorization multiprocessing --mode train --track \ No newline at end of file +python demo.py --backend clean_pufferl --config pokemon_red --no-render --vectorization multiprocessing --mode train --track # --exp-name test4 # --wandb-entity xinpw8 \ No newline at end of file diff --git a/stream_wrapper.py b/stream_wrapper.py index 63e4294a..ba2788df 100755 --- a/stream_wrapper.py +++ b/stream_wrapper.py @@ -32,17 +32,18 @@ def colors_generator(step=1): class StreamWrapper(gym.Wrapper): def __init__(self, env, stream_metadata={}): super().__init__(env) - self.color_generator = color_generator(step=5) # step=1 + self.color_generator = color_generator(step=2) # step=1 # self.ws_address = "wss://poke-ws-test-ulsjzjzwpa-ue.a.run.app/broadcast" self.ws_address = "wss://transdimensional.xyz/broadcast" self.stream_metadata = stream_metadata + self.stream_metadata = {**stream_metadata, "env_id": env.env_id,} # env ids listed self.loop = asyncio.new_event_loop() asyncio.set_event_loop(self.loop) self.websocket = None self.loop.run_until_complete( self.establish_wc_connection() ) - self.upload_interval = 250 + self.upload_interval = 125 self.steam_step_counter = 0 self.coord_list = [] self.start_time = time.time()