diff --git a/CHANGELOG.md b/CHANGELOG.md index 91171159b..772128d56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## Release 2.2.0a11 (WIP) +## Release 2.2.0a12 (WIP) ### Breaking Changes - Removed `gym` dependency, the package is still required for some pretrained agents. @@ -10,6 +10,8 @@ - Added `ppo_lstm` to hyperparams_opt.py (@technocrat13) ### Bug fixes +- Upgraded to `pybullet_envs_gymnasium>=0.4.0` +- Removed old hacks (for instance limiting offpolicy algorithms to one env at test time) ### Documentation diff --git a/requirements.txt b/requirements.txt index 698aabf41..71fbdd8c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ stable-baselines3[extra_no_roms,tests,docs]>=2.2.0a11,<3.0 sb3-contrib>=2.2.0a11,<3.0 box2d-py==2.3.8 pybullet -pybullet_envs_gymnasium +pybullet_envs_gymnasium>=0.4.0 # minigrid # scikit-optimize optuna~=3.0 diff --git a/rl_zoo3/enjoy.py b/rl_zoo3/enjoy.py index 71cdd8467..4cb717a7d 100644 --- a/rl_zoo3/enjoy.py +++ b/rl_zoo3/enjoy.py @@ -126,9 +126,6 @@ def enjoy() -> None: # noqa: C901 # Off-policy algorithm only support one env for now off_policy_algos = ["qrdqn", "dqn", "ddpg", "sac", "her", "td3", "tqc"] - if algo in off_policy_algos: - args.n_envs = 1 - set_random_seed(args.seed) if args.num_threads > 0: diff --git a/rl_zoo3/utils.py b/rl_zoo3/utils.py index 575f27974..7ecd92669 100644 --- a/rl_zoo3/utils.py +++ b/rl_zoo3/utils.py @@ -211,9 +211,6 @@ def create_test_env( :param env_kwargs: Optional keyword argument to pass to the env constructor :return: """ - # Avoid circular import - from rl_zoo3.exp_manager import ExperimentManager - # Create the environment and wrap it if necessary assert hyperparams is not None env_wrapper = get_wrapper_class(hyperparams) @@ -224,12 +221,8 @@ def create_test_env( del hyperparams["env_wrapper"] vec_env_kwargs: Dict[str, Any] = {} - vec_env_cls = DummyVecEnv - if n_envs > 1 or (ExperimentManager.is_bullet(env_id) and should_render): - # HACK: force SubprocVecEnv for Bullet env - # as Pybullet envs does not follow gym.render() interface - vec_env_cls = SubprocVecEnv # type: ignore[assignment] - # start_method = 'spawn' for thread safe + # Avoid potential shared memory issue + vec_env_cls = SubprocVecEnv if n_envs > 1 else DummyVecEnv # Fix for gym 0.26, to keep old behavior env_kwargs = env_kwargs or {} @@ -252,7 +245,7 @@ def make_env(**kwargs) -> gym.Env: seed=seed, wrapper_class=env_wrapper, env_kwargs=env_kwargs, - vec_env_cls=vec_env_cls, + vec_env_cls=vec_env_cls, # type: ignore[arg-type] vec_env_kwargs=vec_env_kwargs, ) diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt index 13ce6d730..5740e0cc4 100644 --- a/rl_zoo3/version.txt +++ b/rl_zoo3/version.txt @@ -1 +1 @@ -2.2.0a11 +2.2.0a12