diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/MANIFEST.in b/MANIFEST.in old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/clean_pufferl.py b/clean_pufferl.py old mode 100644 new mode 100755 diff --git a/cleanrl_ppo_atari.py b/cleanrl_ppo_atari.py old mode 100644 new mode 100755 diff --git a/config.yaml b/config.yaml old mode 100644 new mode 100755 index 851c53e7..9221a0b7 --- a/config.yaml +++ b/config.yaml @@ -3,7 +3,7 @@ train: torch_deterministic: True device: cuda total_timesteps: 10_000_000 - learning_rate: 2.5e-4 + learning_rate: 0.0004 num_steps: 128 anneal_lr: True gamma: 0.99 @@ -22,7 +22,7 @@ train: envs_per_worker: 1 envs_per_batch: ~ env_pool: True - verbose: True + verbose: False data_dir: experiments checkpoint_interval: 200 cpu_offload: True @@ -664,13 +664,13 @@ pokemon_red: package: pokemon_red train: total_timesteps: 100_000_000 - num_envs: 4 + num_envs: 64 envs_per_worker: 1 - envpool_batch_size: 4 - update_epochs: 3 + envpool_batch_size: 32 + update_epochs: 10 gamma: 0.998 - batch_size: 1024 - batch_rows: 16 + batch_size: 32768 + batch_rows: 64 env: name: pokemon_red pokemon-red: diff --git a/demo.py b/demo.py old mode 100644 new mode 100755 index 8d104667..e4738fe7 --- a/demo.py +++ b/demo.py @@ -39,12 +39,16 @@ def load_from_config(env): return pkg, pufferlib.namespace(**combined_config) def make_policy(env, env_module, args): + policy = env_module.Policy(env, **args.policy) + if args.force_recurrence or env_module.Recurrent is not None: policy = env_module.Recurrent(env, policy, **args.recurrent) policy = pufferlib.frameworks.cleanrl.RecurrentPolicy(policy) + else: policy = pufferlib.frameworks.cleanrl.Policy(policy) + return policy.to(args.train.device) @@ -153,7 +157,7 @@ def train(args, env_module, make_env): parser.add_argument('--no-render', action='store_true', help='Disable render during evaluate') parser.add_argument('--exp-name', type=str, default=None, help="Resume from experiment") parser.add_argument('--vectorization', type=str, default='serial', help='Vectorization method (serial, multiprocessing, ray)') - parser.add_argument('--wandb-entity', type=str, default='jsuarez', help='WandB entity') + parser.add_argument('--wandb-entity', type=str, default='xinpw8', help='WandB entity') parser.add_argument('--wandb-project', type=str, default='pufferlib', help='WandB project') parser.add_argument('--wandb-group', type=str, default='debug', help='WandB group') parser.add_argument('--track', action='store_true', help='Track on WandB') diff --git a/pokemon_red_eval.py b/pokemon_red_eval.py old mode 100644 new mode 100755 diff --git a/pufferlib/__init__.py b/pufferlib/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/emulation.py b/pufferlib/emulation.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/__init__.py b/pufferlib/environments/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/atari/__init__.py b/pufferlib/environments/atari/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/atari/environment.py b/pufferlib/environments/atari/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/atari/torch.py b/pufferlib/environments/atari/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/bsuite/__init__.py b/pufferlib/environments/bsuite/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/bsuite/environment.py b/pufferlib/environments/bsuite/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/bsuite/squared.py b/pufferlib/environments/bsuite/squared.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/bsuite/torch.py b/pufferlib/environments/bsuite/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/butterfly/__init__.py b/pufferlib/environments/butterfly/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/butterfly/environment.py b/pufferlib/environments/butterfly/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/butterfly/torch.py b/pufferlib/environments/butterfly/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/classic_control/__init__.py b/pufferlib/environments/classic_control/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/classic_control/environment.py b/pufferlib/environments/classic_control/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/classic_control/torch.py b/pufferlib/environments/classic_control/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/crafter/__init__.py b/pufferlib/environments/crafter/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/crafter/environment.py b/pufferlib/environments/crafter/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/crafter/torch.py b/pufferlib/environments/crafter/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/dm_control/__init__.py b/pufferlib/environments/dm_control/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/dm_control/environment.py b/pufferlib/environments/dm_control/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/dm_control/torch.py b/pufferlib/environments/dm_control/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/dm_lab/__init__.py b/pufferlib/environments/dm_lab/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/dm_lab/environment.py b/pufferlib/environments/dm_lab/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/dm_lab/torch.py b/pufferlib/environments/dm_lab/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/griddly/__init__.py b/pufferlib/environments/griddly/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/griddly/environment.py b/pufferlib/environments/griddly/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/griddly/torch.py b/pufferlib/environments/griddly/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/links_awaken/__init__.py b/pufferlib/environments/links_awaken/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/links_awaken/environment.py b/pufferlib/environments/links_awaken/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/links_awaken/torch.py b/pufferlib/environments/links_awaken/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/magent/__init__.py b/pufferlib/environments/magent/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/magent/environment.py b/pufferlib/environments/magent/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/magent/torch.py b/pufferlib/environments/magent/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/microrts/__init__.py b/pufferlib/environments/microrts/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/microrts/environment.py b/pufferlib/environments/microrts/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/microrts/torch.py b/pufferlib/environments/microrts/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/minerl/__init__.py b/pufferlib/environments/minerl/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/minerl/environment.py b/pufferlib/environments/minerl/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/minerl/torch.py b/pufferlib/environments/minerl/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/minigrid/__init__.py b/pufferlib/environments/minigrid/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/minigrid/environment.py b/pufferlib/environments/minigrid/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/minigrid/torch.py b/pufferlib/environments/minigrid/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/minihack/__init__.py b/pufferlib/environments/minihack/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/minihack/environment.py b/pufferlib/environments/minihack/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/minihack/torch.py b/pufferlib/environments/minihack/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/nethack/__init__.py b/pufferlib/environments/nethack/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/nethack/environment.py b/pufferlib/environments/nethack/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/nethack/torch.py b/pufferlib/environments/nethack/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/nmmo/__init__.py b/pufferlib/environments/nmmo/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/nmmo/environment.py b/pufferlib/environments/nmmo/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/nmmo/torch.py b/pufferlib/environments/nmmo/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/ocean/README.md b/pufferlib/environments/ocean/README.md old mode 100644 new mode 100755 diff --git a/pufferlib/environments/ocean/__init__.py b/pufferlib/environments/ocean/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/ocean/bandit.py b/pufferlib/environments/ocean/bandit.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/ocean/environment.py b/pufferlib/environments/ocean/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/ocean/memory.py b/pufferlib/environments/ocean/memory.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/ocean/password.py b/pufferlib/environments/ocean/password.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/ocean/squared.py b/pufferlib/environments/ocean/squared.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/ocean/stochastic.py b/pufferlib/environments/ocean/stochastic.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/ocean/torch.py b/pufferlib/environments/ocean/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/open_spiel/__init__.py b/pufferlib/environments/open_spiel/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/open_spiel/environment.py b/pufferlib/environments/open_spiel/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/open_spiel/gymnasium_environment.py b/pufferlib/environments/open_spiel/gymnasium_environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/open_spiel/pettingzoo_environment.py b/pufferlib/environments/open_spiel/pettingzoo_environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/open_spiel/torch.py b/pufferlib/environments/open_spiel/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/open_spiel/utils.py b/pufferlib/environments/open_spiel/utils.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/pokemon_red/__init__.py b/pufferlib/environments/pokemon_red/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/pokemon_red/environment.py b/pufferlib/environments/pokemon_red/environment.py old mode 100644 new mode 100755 index 299f5993..7e6bd922 --- a/pufferlib/environments/pokemon_red/environment.py +++ b/pufferlib/environments/pokemon_red/environment.py @@ -13,6 +13,6 @@ def env_creator(name='pokemon_red'): def make(name, headless: bool = True, state_path=None): '''Pokemon Red''' - env = Environment(headless=headless, state_path=state_path) + env = Environment() return pufferlib.emulation.GymnasiumPufferEnv(env=env, postprocessor_cls=pufferlib.emulation.BasicPostprocessor) diff --git a/pufferlib/environments/pokemon_red/torch.py b/pufferlib/environments/pokemon_red/torch.py old mode 100644 new mode 100755 index e773d519..fd056ebf --- a/pufferlib/environments/pokemon_red/torch.py +++ b/pufferlib/environments/pokemon_red/torch.py @@ -1,19 +1,208 @@ +from torch.nn import functional as F +from pdb import set_trace as T import pufferlib.models +from typing import Callable, Dict, List, Optional, Tuple, Type, Union +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor, is_image_space, get_flattened_obs_dim, NatureCNN, TensorDict, gym +from gymnasium import spaces +import torch as th +from torch import nn class Recurrent(pufferlib.models.RecurrentWrapper): def __init__(self, env, policy, input_size=512, hidden_size=512, num_layers=1): super().__init__(env, policy, input_size, hidden_size, num_layers) -class Policy(pufferlib.models.Convolutional): - def __init__(self, env, input_size=512, hidden_size=512, output_size=512, - framestack=3, flat_size=64*5*6): - super().__init__( - env=env, - input_size=input_size, - hidden_size=hidden_size, - output_size=output_size, - framestack=framestack, - flat_size=flat_size, - channels_last=True, +# class Policy(pufferlib.models.Convolutional): +# def __init__(self, env, input_size=512, hidden_size=512, output_size=512, +# framestack=3, flat_size=64*5*6): +# super().__init__( +# env=env, +# input_size=input_size, +# hidden_size=hidden_size, +# output_size=output_size, +# framestack=framestack, +# flat_size=flat_size, +# channels_last=True, +# ) + + +class Policy(pufferlib.models.Policy): + """ + Combined features extractor for Dict observation spaces. + Builds a features extractor for each key of the space. Input from each space + is fed through a separate submodule (CNN or MLP, depending on input shape), + the output features are concatenated and fed through additional MLP network ("combined"). + + :param observation_space: + :param cnn_output_dim: Number of features to output from each CNN submodule(s). Defaults to + 256 to avoid exploding network sizes. + :param normalized_image: Whether to assume that the image is already normalized + or not (this disables dtype and bounds checks): when True, it only checks that + the space is a Box and has 3 dimensions. + Otherwise, it checks that it has expected dtype (uint8) and bounds (values in [0, 255]). + """ + + def __init__( + self, + env, + cnn_output_dim: int = 256, + normalized_image: bool = False, + ) -> None: + # TODO we do not know features-dim here before going over all the items, so put something there. This is dirty! + super().__init__(env) + + self.flat_observation_space = env.flat_observation_space + self.flat_observation_structure = env.flat_observation_structure + # observation_space.spaces.items() + + # image (3, 36, 40) + self.image_cnn = NatureCNN(env.structured_observation_space['image'], features_dim=cnn_output_dim, normalized_image=normalized_image) + + # poke_move_ids (12, 4) -> (12, 4, 8) + self.poke_move_ids_embedding = nn.Embedding(167, 8, padding_idx=0) + # concat with poke_move_pps (12, 4, 2) + # input (12, 4, 10) for fc relu + self.move_fc_relu = nn.Sequential( + nn.Linear(10, 8), + nn.ReLU(), + nn.Linear(8, 8), + nn.ReLU(), + ) + # max pool + self.move_max_pool = nn.AdaptiveMaxPool2d(output_size=(1, 16)) + # output (12, 1, 16), sqeeze(-2) -> (12, 16) + + # poke_type_ids (12, 2) -> (12, 2, 8) + self.poke_type_ids_embedding = nn.Embedding(17, 8, padding_idx=0) + # (12, 2, 8) -> (12, 8) by sum(dim=-2) + + # poke_ids (12, ) -> (12, 8) + self.poke_ids_embedding = nn.Embedding(192, 16, padding_idx=0) + + # pokemon fc relu + self.poke_fc_relu = nn.Sequential( + nn.Linear(63, 32), + nn.ReLU(), + nn.Linear(32, 32), + nn.ReLU(), + ) + + # pokemon party head + self.poke_party_head = nn.Sequential( + nn.Linear(32, 32), + nn.ReLU(), + nn.Linear(32, 32), + ) + # get the first 6 pokemon and do max pool + self.poke_party_head_max_pool = nn.AdaptiveMaxPool2d(output_size=(1, 32)) + + # pokemon opp head + self.poke_opp_head = nn.Sequential( + nn.Linear(32, 32), + nn.ReLU(), + nn.Linear(32, 32), + ) + # get the last 6 pokemon and do max pool + self.poke_opp_head_max_pool = nn.AdaptiveMaxPool2d(output_size=(1, 32)) + + # item_ids embedding + self.item_ids_embedding = nn.Embedding(256, 16, padding_idx=0) # (20, 16) + # item_ids fc relu + self.item_ids_fc_relu = nn.Sequential( + nn.Linear(17, 16), + nn.ReLU(), + nn.Linear(16, 16), + nn.ReLU(), ) + # item_ids max pool + self.item_ids_max_pool = nn.AdaptiveMaxPool2d(output_size=(1, 16)) + + # event_ids embedding + self.event_ids_embedding = nn.Embedding(2570, 16, padding_idx=0) # (20, ) + # event_ids fc relu + self.event_ids_fc_relu = nn.Sequential( + nn.Linear(17, 16), + nn.ReLU(), + nn.Linear(16, 16), + nn.ReLU(), + ) + # event_ids max pool + self.event_ids_max_pool = nn.AdaptiveMaxPool2d(output_size=(1, 16)) + + + self._features_dim = 406 + + self.fc1 = nn.Linear(406,512) + self.fc2 = nn.Linear(512,512) + self.action = nn.Linear(512, self.action_space.n) + self.value_head = nn.Linear(512,1) + + + + def encode_observations(self, observations: TensorDict) -> th.Tensor: + observations = pufferlib.emulation.unpack_batched_obs(observations, + self.flat_observation_space, self.flat_observation_structure) + + img = self.image_cnn(observations['image']) # (256, ) + + # Pokemon + # Moves + embedded_poke_move_ids = self.poke_move_ids_embedding(observations['poke_move_ids'].to(th.int)) + poke_move_pps = observations['poke_move_pps'] + poke_moves = th.cat([embedded_poke_move_ids, poke_move_pps], dim=-1) + poke_moves = self.move_fc_relu(poke_moves) + poke_moves = self.move_max_pool(poke_moves).squeeze(-2) # (12, 16) + # Types + embedded_poke_type_ids = self.poke_type_ids_embedding(observations['poke_type_ids'].to(th.int)) + poke_types = th.sum(embedded_poke_type_ids, dim=-2) # (12, 8) + # Pokemon ID + embedded_poke_ids = self.poke_ids_embedding(observations['poke_ids'].to(th.int)) + poke_ids = embedded_poke_ids # (12, 8) + # Pokemon stats (12, 23) + poke_stats = observations['poke_all'] + # All pokemon features + pokemon_concat = th.cat([poke_moves, poke_types, poke_ids, poke_stats], dim=-1) # (12, 63) + pokemon_features = self.poke_fc_relu(pokemon_concat) # (12, 32) + + # Pokemon party head + party_pokemon_features = pokemon_features[..., :6, :] # (6, 32), ... for batch dim + poke_party_head = self.poke_party_head(party_pokemon_features) # (6, 32) + poke_party_head = self.poke_party_head_max_pool(poke_party_head).squeeze(-2) # (6, 32) -> (32, ) + + # Pokemon opp head + opp_pokemon_features = pokemon_features[..., 6:, :] # (6, 32), ... for batch dim + poke_opp_head = self.poke_opp_head(opp_pokemon_features) # (6, 32) + poke_opp_head = self.poke_opp_head_max_pool(poke_opp_head).squeeze(-2) # (6, 32) -> (32, ) + + # Items + embedded_item_ids = self.item_ids_embedding(observations['item_ids'].to(th.int)) # (20, 16) + # item_quantity + item_quantity = observations['item_quantity'] # (20, 1) + item_concat = th.cat([embedded_item_ids, item_quantity], dim=-1) # (20, 17) + item_features = self.item_ids_fc_relu(item_concat) # (20, 16) + item_features = self.item_ids_max_pool(item_features).squeeze(-2) # (20, 16) -> (16, ) + + # Events + embedded_event_ids = self.event_ids_embedding(observations['event_ids'].to(th.int)) + # event_step_since + event_step_since = observations['event_step_since'] # (20, 1) + event_concat = th.cat([embedded_event_ids, event_step_since], dim=-1) # (20, 17) + event_features = self.event_ids_fc_relu(event_concat) + event_features = self.event_ids_max_pool(event_features).squeeze(-2) # (20, 16) -> (16, ) + + # Map_IDs + + + # Raw vector + vector = observations['vector'] # (54, ) + + # Concat all features + all_features = th.cat([img, poke_party_head, poke_opp_head, item_features, event_features, vector], dim=-1) # (406, ) + + hidden = self.fc2(F.relu(self.fc1(all_features))) + return hidden, None + + def decode_actions(self, hidden, lookup): + action = self.action(hidden) + value = self.value_head(hidden) + return action, value \ No newline at end of file diff --git a/pufferlib/environments/procgen/__init__.py b/pufferlib/environments/procgen/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/procgen/environment.py b/pufferlib/environments/procgen/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/procgen/torch.py b/pufferlib/environments/procgen/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/smac/__init__.py b/pufferlib/environments/smac/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/smac/environment.py b/pufferlib/environments/smac/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/smac/torch.py b/pufferlib/environments/smac/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/stable_retro/__init__.py b/pufferlib/environments/stable_retro/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/stable_retro/environment.py b/pufferlib/environments/stable_retro/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/stable_retro/torch.py b/pufferlib/environments/stable_retro/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/test/__init__.py b/pufferlib/environments/test/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/test/environment.py b/pufferlib/environments/test/environment.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/test/mock_environments.py b/pufferlib/environments/test/mock_environments.py old mode 100644 new mode 100755 diff --git a/pufferlib/environments/test/torch.py b/pufferlib/environments/test/torch.py old mode 100644 new mode 100755 diff --git a/pufferlib/evaluation.py b/pufferlib/evaluation.py old mode 100644 new mode 100755 diff --git a/pufferlib/exceptions.py b/pufferlib/exceptions.py old mode 100644 new mode 100755 diff --git a/pufferlib/extensions.c b/pufferlib/extensions.c old mode 100644 new mode 100755 index 497a4234..5df84d33 --- a/pufferlib/extensions.c +++ b/pufferlib/extensions.c @@ -1,4 +1,4 @@ -/* Generated by Cython 3.0.6 */ +/* Generated by Cython 3.0.8 */ /* BEGIN: Cython Metadata { @@ -36,10 +36,10 @@ END: Cython Metadata */ #else #define __PYX_EXTRA_ABI_MODULE_NAME "" #endif -#define CYTHON_ABI "3_0_6" __PYX_EXTRA_ABI_MODULE_NAME +#define CYTHON_ABI "3_0_8" __PYX_EXTRA_ABI_MODULE_NAME #define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI #define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "." -#define CYTHON_HEX_VERSION 0x030006F0 +#define CYTHON_HEX_VERSION 0x030008F0 #define CYTHON_FUTURE_DIVISION 1 #include #ifndef offsetof @@ -581,14 +581,14 @@ END: Cython Metadata */ PyObject *exception_table = NULL; PyObject *types_module=NULL, *code_type=NULL, *result=NULL; #if __PYX_LIMITED_VERSION_HEX < 0x030B0000 - PyObject *version_info; // borrowed + PyObject *version_info; PyObject *py_minor_version = NULL; #endif long minor_version = 0; PyObject *type, *value, *traceback; PyErr_Fetch(&type, &value, &traceback); #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000 - minor_version = 11; // we don't yet need to distinguish between versions > 11 + minor_version = 11; #else if (!(version_info = PySys_GetObject("version_info"))) goto end; if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end; @@ -646,7 +646,7 @@ END: Cython Metadata */ PyObject *fv, PyObject *cell, PyObject* fn, PyObject *name, int fline, PyObject *lnos) { PyCodeObject *result; - PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0); // we don't have access to __pyx_empty_bytes here + PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0); if (!empty_bytes) return NULL; result = #if PY_VERSION_HEX >= 0x030C0000 @@ -1342,7 +1342,7 @@ static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*); #endif typedef Py_ssize_t __Pyx_compact_pylong; typedef size_t __Pyx_compact_upylong; - #else // Py < 3.12 + #else #define __Pyx_PyLong_IsNeg(x) (Py_SIZE(x) < 0) #define __Pyx_PyLong_IsNonNeg(x) (Py_SIZE(x) >= 0) #define __Pyx_PyLong_IsZero(x) (Py_SIZE(x) == 0) @@ -1719,8 +1719,8 @@ static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg) #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg) #else - #define __Pyx_Arg_NewRef_VARARGS(arg) arg // no-op - #define __Pyx_Arg_XDECREF_VARARGS(arg) // no-op - arg is borrowed + #define __Pyx_Arg_NewRef_VARARGS(arg) arg + #define __Pyx_Arg_XDECREF_VARARGS(arg) #endif #define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds) #define __Pyx_KwValues_VARARGS(args, nargs) NULL @@ -1736,8 +1736,9 @@ static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int #else #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw) #endif - #define __Pyx_Arg_NewRef_FASTCALL(arg) arg // no-op, __Pyx_Arg_FASTCALL is direct and this needs - #define __Pyx_Arg_XDECREF_FASTCALL(arg) // no-op - arg was returned from array + #define __Pyx_Arg_NewRef_FASTCALL(arg) arg /* no-op, __Pyx_Arg_FASTCALL is direct and this needs + to have the same reference counting */ + #define __Pyx_Arg_XDECREF_FASTCALL(arg) #else #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS @@ -1951,7 +1952,7 @@ typedef struct { #endif void *defaults; int defaults_pyobjects; - size_t defaults_size; // used by FusedFunction for copying defaults + size_t defaults_size; int flags; PyObject *defaults_tuple; PyObject *defaults_kwdict; @@ -4123,7 +4124,7 @@ static CYTHON_SMALL_CODE int __pyx_pymod_exec_extensions(PyObject *__pyx_pyinit_ __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) { int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef); - __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to extensions pseudovariable */ + __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to "extensions" pseudovariable */ if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error) pystate_addmodule_run = 1; } @@ -4536,9 +4537,10 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg /* UnpackUnboundCMethod */ static PyObject *__Pyx_SelflessCall(PyObject *method, PyObject *args, PyObject *kwargs) { + PyObject *result; PyObject *selfless_args = PyTuple_GetSlice(args, 1, PyTuple_Size(args)); if (unlikely(!selfless_args)) return NULL; - PyObject *result = PyObject_Call(method, selfless_args, kwargs); + result = PyObject_Call(method, selfless_args, kwargs); Py_DECREF(selfless_args); return result; } @@ -4637,9 +4639,10 @@ static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { /* IterFinish */ static CYTHON_INLINE int __Pyx_IterFinish(void) { + PyObject* exc_type; __Pyx_PyThreadState_declare __Pyx_PyThreadState_assign - PyObject* exc_type = __Pyx_PyErr_CurrentExceptionType(); + exc_type = __Pyx_PyErr_CurrentExceptionType(); if (unlikely(exc_type)) { if (unlikely(!__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) return -1; @@ -4858,11 +4861,11 @@ static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyO { int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ); if (unlikely(eq != 0)) { - if (unlikely(eq < 0)) return NULL; // error + if (unlikely(eq < 0)) return NULL; return kwvalues[i]; } } - return NULL; // not found (no exception set) + return NULL; } #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues) { @@ -4949,7 +4952,7 @@ static int __Pyx_ParseOptionalKeywords( if (*name) { values[name-argnames] = value; #if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(value); // transfer ownership of value to values + Py_INCREF(value); Py_DECREF(key); #endif key = NULL; @@ -4968,7 +4971,7 @@ static int __Pyx_ParseOptionalKeywords( && _PyString_Eq(**name, key)) { values[name-argnames] = value; #if CYTHON_AVOID_BORROWED_REFS - value = NULL; // ownership transferred to values + value = NULL; #endif break; } @@ -5000,7 +5003,7 @@ static int __Pyx_ParseOptionalKeywords( if (cmp == 0) { values[name-argnames] = value; #if CYTHON_AVOID_BORROWED_REFS - value = NULL; // ownership transferred to values + value = NULL; #endif break; } @@ -6843,7 +6846,7 @@ static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( #else py_code = PyCode_NewEmpty(filename, funcname, py_line); #endif - Py_XDECREF(py_funcname); // XDECREF since it's only set on Py3 if cline + Py_XDECREF(py_funcname); return py_code; bad: Py_XDECREF(py_funcname); diff --git a/pufferlib/extensions.pyx b/pufferlib/extensions.pyx old mode 100644 new mode 100755 diff --git a/pufferlib/frameworks/__init__.py b/pufferlib/frameworks/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/frameworks/cleanrl.py b/pufferlib/frameworks/cleanrl.py old mode 100644 new mode 100755 diff --git a/pufferlib/frameworks/rllib.py b/pufferlib/frameworks/rllib.py old mode 100644 new mode 100755 diff --git a/pufferlib/models.py b/pufferlib/models.py old mode 100644 new mode 100755 diff --git a/pufferlib/namespace.py b/pufferlib/namespace.py old mode 100644 new mode 100755 diff --git a/pufferlib/policy_pool.py b/pufferlib/policy_pool.py old mode 100644 new mode 100755 diff --git a/pufferlib/policy_ranker.py b/pufferlib/policy_ranker.py old mode 100644 new mode 100755 diff --git a/pufferlib/policy_store.py b/pufferlib/policy_store.py old mode 100644 new mode 100755 diff --git a/pufferlib/pytorch.py b/pufferlib/pytorch.py old mode 100644 new mode 100755 diff --git a/pufferlib/rating.py b/pufferlib/rating.py old mode 100644 new mode 100755 diff --git a/pufferlib/spaces.py b/pufferlib/spaces.py old mode 100644 new mode 100755 diff --git a/pufferlib/utils.py b/pufferlib/utils.py old mode 100644 new mode 100755 diff --git a/pufferlib/vectorization/__init__.py b/pufferlib/vectorization/__init__.py old mode 100644 new mode 100755 diff --git a/pufferlib/vectorization/gym_multi_env.py b/pufferlib/vectorization/gym_multi_env.py old mode 100644 new mode 100755 diff --git a/pufferlib/vectorization/multi_env.py b/pufferlib/vectorization/multi_env.py old mode 100644 new mode 100755 diff --git a/pufferlib/vectorization/multiprocessing_vec_env.py b/pufferlib/vectorization/multiprocessing_vec_env.py old mode 100644 new mode 100755 diff --git a/pufferlib/vectorization/pettingzoo_multi_env.py b/pufferlib/vectorization/pettingzoo_multi_env.py old mode 100644 new mode 100755 diff --git a/pufferlib/vectorization/ray_vec_env.py b/pufferlib/vectorization/ray_vec_env.py old mode 100644 new mode 100755 diff --git a/pufferlib/vectorization/serial_vec_env.py b/pufferlib/vectorization/serial_vec_env.py old mode 100644 new mode 100755 diff --git a/pufferlib/vectorization/vec_env.py b/pufferlib/vectorization/vec_env.py old mode 100644 new mode 100755 diff --git a/pufferlib/version.py b/pufferlib/version.py old mode 100644 new mode 100755 diff --git a/pufferlib/wrappers.py b/pufferlib/wrappers.py old mode 100644 new mode 100755 diff --git a/pyproject.toml b/pyproject.toml old mode 100644 new mode 100755 diff --git a/rllib_ppo.py b/rllib_ppo.py old mode 100644 new mode 100755 diff --git a/sb3_demo.py b/sb3_demo.py old mode 100644 new mode 100755 diff --git a/scripts/run_baselines.sh b/scripts/run_baselines.sh old mode 100644 new mode 100755 diff --git a/scripts/run_bsuite.py b/scripts/run_bsuite.py old mode 100644 new mode 100755 diff --git a/scripts/run_procgen_small.sh b/scripts/run_procgen_small.sh old mode 100644 new mode 100755 diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 diff --git a/test_pokemon_red.py b/test_pokemon_red.py old mode 100644 new mode 100755 diff --git a/tests/__init__.py b/tests/__init__.py old mode 100644 new mode 100755 diff --git a/tests/mock_environments.py b/tests/mock_environments.py old mode 100644 new mode 100755 diff --git a/tests/pool/envpool_results.npy b/tests/pool/envpool_results.npy old mode 100644 new mode 100755 diff --git a/tests/pool/plot_packing.py b/tests/pool/plot_packing.py old mode 100644 new mode 100755 diff --git a/tests/pool/test_basic_multprocessing.py b/tests/pool/test_basic_multprocessing.py old mode 100644 new mode 100755 diff --git a/tests/pool/test_envpool.py b/tests/pool/test_envpool.py old mode 100644 new mode 100755 diff --git a/tests/pool/test_multiprocessing.py b/tests/pool/test_multiprocessing.py old mode 100644 new mode 100755 diff --git a/tests/test_atari_reset.py b/tests/test_atari_reset.py old mode 100644 new mode 100755 diff --git a/tests/test_cleanrl_utils.py b/tests/test_cleanrl_utils.py old mode 100644 new mode 100755 diff --git a/tests/test_docs_nmmo.py b/tests/test_docs_nmmo.py old mode 100644 new mode 100755 diff --git a/tests/test_emulation.py b/tests/test_emulation.py old mode 100644 new mode 100755 diff --git a/tests/test_environments.py b/tests/test_environments.py old mode 100644 new mode 100755 diff --git a/tests/test_error_handling.py b/tests/test_error_handling.py old mode 100644 new mode 100755 diff --git a/tests/test_extensions.py b/tests/test_extensions.py old mode 100644 new mode 100755 diff --git a/tests/test_flatten.py b/tests/test_flatten.py old mode 100644 new mode 100755 diff --git a/tests/test_import_performance.py b/tests/test_import_performance.py old mode 100644 new mode 100755 diff --git a/tests/test_namespace.py b/tests/test_namespace.py old mode 100644 new mode 100755 diff --git a/tests/test_policy_pool.py b/tests/test_policy_pool.py old mode 100644 new mode 100755 diff --git a/tests/test_policy_ranker.py b/tests/test_policy_ranker.py old mode 100644 new mode 100755 diff --git a/tests/test_policy_store.py b/tests/test_policy_store.py old mode 100644 new mode 100755 diff --git a/tests/test_registry.sh b/tests/test_registry.sh old mode 100644 new mode 100755 diff --git a/tests/test_render.py b/tests/test_render.py old mode 100644 new mode 100755 diff --git a/tests/test_serialize.py b/tests/test_serialize.py old mode 100644 new mode 100755 diff --git a/tests/test_teams.py b/tests/test_teams.py old mode 100644 new mode 100755 diff --git a/tests/test_tournament.py b/tests/test_tournament.py old mode 100644 new mode 100755 diff --git a/tests/test_utils.py b/tests/test_utils.py old mode 100644 new mode 100755 diff --git a/tests/test_vectorization.py b/tests/test_vectorization.py old mode 100644 new mode 100755