DLR-RM · araffin · Sep 28, 2023 · Sep 26, 2023 · Sep 27, 2023 · Sep 27, 2023
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -3,7 +3,7 @@
 Changelog
 ==========
 
-Release 2.2.0a5 (WIP)
+Release 2.2.0a6 (WIP)
 --------------------------
 
 Breaking Changes:
@@ -49,6 +49,9 @@ Others:
 - Fixed ``stable_baselines3/common/vec_env/vec_video_recorder.py`` type hints
 - Fixed ``stable_baselines3/common/save_util.py`` type hints
 - Updated docker images to  Ubuntu Jammy using micromamba 1.5
+- Fixed ``stable_baselines3/common/buffers.py`` type hints
+- Fixed ``stable_baselines3/her/her_replay_buffer.py`` type hints
+- Buffers do no call an additional ``.copy()`` when storing new transitions
 
 Documentation:
 ^^^^^^^^^^^^^^

diff --git a/pyproject.toml b/pyproject.toml
@@ -27,19 +27,27 @@ line-length = 127
 [tool.pytype]
 inputs = ["stable_baselines3"]
 disable = ["pyi-error"]
+# Checked with mypy
+exclude = [
+  "stable_baselines3/common/buffers.py",
+  "stable_baselines3/common/base_class.py",
+  "stable_baselines3/common/callbacks.py",
+  "stable_baselines3/common/on_policy_algorithm.py",
+  "stable_baselines3/common/vec_env/stacked_observations.py",
+  "stable_baselines3/common/vec_env/subproc_vec_env.py",
+  "stable_baselines3/common/vec_env/patch_gym.py"
+]
 
 [tool.mypy]
 ignore_missing_imports = true
 follow_imports = "silent"
 show_error_codes = true
 exclude = """(?x)(
-    stable_baselines3/common/buffers.py$
-    | stable_baselines3/common/distributions.py$
+    stable_baselines3/common/distributions.py$
     | stable_baselines3/common/off_policy_algorithm.py$
     | stable_baselines3/common/policies.py$
     | stable_baselines3/common/vec_env/__init__.py$
     | stable_baselines3/common/vec_env/vec_normalize.py$
-    | stable_baselines3/her/her_replay_buffer.py$
     | tests/test_logger.py$
     | tests/test_train_eval_mode.py$
   )"""

diff --git a/stable_baselines3/common/base_class.py b/stable_baselines3/common/base_class.py
@@ -420,9 +420,7 @@ def _setup_learn(
         # Avoid resetting the environment when calling ``.learn()`` consecutive times
         if reset_num_timesteps or self._last_obs is None:
             assert self.env is not None
-            # pytype: disable=annotation-type-mismatch
             self._last_obs = self.env.reset()  # type: ignore[assignment]
-            # pytype: enable=annotation-type-mismatch
             self._last_episode_starts = np.ones((self.env.num_envs,), dtype=bool)
             # Retrieve unnormalized observation for saving into the buffer
             if self._vec_normalize_env is not None:
@@ -707,7 +705,7 @@ def load(  # noqa: C901
 
         # Gym -> Gymnasium space conversion
         for key in {"observation_space", "action_space"}:
-            data[key] = _convert_space(data[key])  # pytype: disable=unsupported-operands
+            data[key] = _convert_space(data[key])
 
         if env is not None:
             # Wrap first if needed
@@ -726,14 +724,12 @@ def load(  # noqa: C901
             if "env" in data:
                 env = data["env"]
 
-        # pytype: disable=not-instantiable,wrong-keyword-args
         model = cls(
             policy=data["policy_class"],
             env=env,
             device=device,
             _init_setup_model=False,  # type: ignore[call-arg]
         )
-        # pytype: enable=not-instantiable,wrong-keyword-args
 
         # load parameters
         model.__dict__.update(data)
@@ -776,7 +772,7 @@ def load(  # noqa: C901
         # Sample gSDE exploration matrix, so it uses the right device
         # see issue #44
         if model.use_sde:
-            model.policy.reset_noise()  # type: ignore[operator]  # pytype: disable=attribute-error
+            model.policy.reset_noise()  # type: ignore[operator]
         return model
 
     def get_parameters(self) -> Dict[str, Dict]: