From a653aec10d8cb4b82443f818fdde123f06c95f75 Mon Sep 17 00:00:00 2001
From: Francesco Capuano <74058581+fracapuano@users.noreply.github.com>
Date: Wed, 10 Jan 2024 14:46:40 +0100
Subject: [PATCH] Docs: Env attributes should be modified using env setters 
 (#1789)

* add: paragraph on how to modify vec envs attributes via setters (solves
DLR-RM#1573)

* Update vec env doc

* Update callback doc and SB3 version

* Fix indentation

---------

Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
---
 docs/guide/callbacks.rst      | 15 ++++---
 docs/guide/vec_envs.rst       | 84 +++++++++++++++++++++++++++++++++++
 docs/misc/changelog.rst       | 35 ++++++++++++++-
 stable_baselines3/version.txt |  2 +-
 4 files changed, 127 insertions(+), 9 deletions(-)

diff --git a/docs/guide/callbacks.rst b/docs/guide/callbacks.rst
index 239966a6f..472f42114 100644
--- a/docs/guide/callbacks.rst
+++ b/docs/guide/callbacks.rst
@@ -29,24 +29,25 @@ You can find two examples of custom callbacks in the documentation: one for savi
 
         :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
         """
-        def __init__(self, verbose=0):
+        def __init__(self, verbose: int = 0):
             super().__init__(verbose)
             # Those variables will be accessible in the callback
             # (they are defined in the base class)
             # The RL model
             # self.model = None  # type: BaseAlgorithm
             # An alias for self.model.get_env(), the environment used for training
-            # self.training_env = None  # type: Union[gym.Env, VecEnv, None]
+            # self.training_env # type: VecEnv
             # Number of time the callback was called
             # self.n_calls = 0  # type: int
+            # num_timesteps = n_envs * n times env.step() was called
             # self.num_timesteps = 0  # type: int
             # local and global variables
-            # self.locals = None  # type: Dict[str, Any]
-            # self.globals = None  # type: Dict[str, Any]
+            # self.locals = {}  # type: Dict[str, Any]
+            # self.globals = {}  # type: Dict[str, Any]
             # The logger object, used to report things in the terminal
-            # self.logger = None  # stable_baselines3.common.logger
-            # # Sometimes, for event callback, it is useful
-            # # to have access to the parent object
+            # self.logger # type: stable_baselines3.common.logger.Logger
+            # Sometimes, for event callback, it is useful
+            # to have access to the parent object
             # self.parent = None  # type: Optional[BaseCallback]
 
         def _on_training_start(self) -> None:
diff --git a/docs/guide/vec_envs.rst b/docs/guide/vec_envs.rst
index 792fedecb..c04001c7c 100644
--- a/docs/guide/vec_envs.rst
+++ b/docs/guide/vec_envs.rst
@@ -96,6 +96,90 @@ SB3 VecEnv API is actually close to Gym 0.21 API but differs to Gym 0.26+ API:
   ``vec_env.env_method("method_name", args1, args2, kwargs1=kwargs1)`` and ``vec_env.set_attr("attribute_name", new_value)``.
 
 
+Modifying Vectorized Environments Attributes
+--------------------------------------------
+
+If you plan to `modify the attributes of an environment <https://github.com/DLR-RM/stable-baselines3/issues/1573>`_ while it is used (e.g., modifying an attribute specifying the task carried out for a portion of training when doing multi-task learning, or
+a parameter of the environment dynamics), you must expose a setter method.
+In fact, directly accessing the environment attribute in the callback can lead to unexpected behavior because environments can be wrapped (using gym or VecEnv wrappers, the ``Monitor`` wrapper being one example).
+
+Consider the following example for a custom env:
+
+.. code-block:: python
+
+	import gymnasium as gym
+	from gymnasium import spaces
+
+	from stable_baselines3.common.env_util import make_vec_env
+
+
+	class MyMultiTaskEnv(gym.Env):
+
+	  def __init__(self):
+	      super().__init__()
+	      """
+	      A state and action space for robotic locomotion.
+	      The multi-task twist is that the policy would need to adapt to different terrains, each with its own
+	      friction coefficient, mu.
+	      The friction coefficient is the only parameter that changes between tasks.
+	      mu is a scalar between 0 and 1, and during training a callback is used to update mu.
+	      """
+	      ...
+
+	  def step(self, action):
+	    # Do something, depending on the action and current value of mu the next state is computed
+	    return self._get_obs(), reward, done, truncated, info
+
+	  def set_mu(self, new_mu: float) -> None:
+	      # Note: this value should be used only at the next reset
+	      self.mu = new_mu
+
+	# Example of wrapped env
+	# env is of type <TimeLimit<OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>>
+	env = gym.make("CartPole-v1")
+	# To access the base env, without wrapper, you should use `.unwrapped`
+	# or env.get_wrapper_attr("gravity") to include wrappers
+	env.unwrapped.gravity
+	# SB3 uses VecEnv for training, where `env.unwrapped.x = new_value` cannot be used to set an attribute
+	# therefore, you should expose a setter like `set_mu` to properly set an attribute
+	vec_env = make_vec_env(MyMultiTaskEnv)
+	# Print current mu value
+	# Note: you should use vec_env.env_method("get_wrapper_attr", "mu") in Gymnasium v1.0
+	print(vec_env.env_method("get_wrapper_attr", "mu"))
+	# Change `mu` attribute via the setter
+	vec_env.env_method("set_mu", "mu", 0.1)
+
+
+In this example ``env.mu`` cannot be accessed/changed directly because it is wrapped in a ``VecEnv`` and because it could be wrapped with other wrappers (see `GH#1573 <https://github.com/DLR-RM/stable-baselines3/issues/1573>`_ for a longer explanation).
+Instead, the callback should use the ``set_mu`` method via the ``env_method`` method for Vectorized Environments.
+
+.. code-block:: python
+
+	from itertools import cycle
+
+	class ChangeMuCallback(BaseCallback):
+	  """
+	  This callback changes the value of mu during training looping
+	  through a list of values until training is aborted.
+	  The environment is implemented so that the impact of changing
+	  the value of mu mid-episode is visible only after the episode is over
+	  and the reset method has been called.
+	  """"
+	  def __init__(self):
+	    super().__init__()
+	    # An iterator that contains the different of the friction coefficient
+	    self.mus = cycle([0.1, 0.2, 0.5, 0.13, 0.9])
+
+	  def _on_step(self):
+	    # Note: in practice, you should not change this value at every step
+	    # but rather depending on some events/metrics like agent performance/episode termination
+	    # both accessible via the `self.logger` or `self.locals` variables
+	    self.training_env.env_method("set_mu", next(self.mus))
+
+This callback can then be used to safely modify environment attributes during training since
+it calls the environment setter method.
+
+
 Vectorized Environments Wrappers
 --------------------------------
 
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
index 647a0e89e..cbfe41f9d 100644
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@@ -3,6 +3,39 @@
 Changelog
 ==========
 
+
+Release 2.3.0a0 (WIP)
+--------------------------
+
+Breaking Changes:
+^^^^^^^^^^^^^^^^^
+
+New Features:
+^^^^^^^^^^^^^
+
+Bug Fixes:
+^^^^^^^^^^
+
+`SB3-Contrib`_
+^^^^^^^^^^^^^^
+
+`RL Zoo`_
+^^^^^^^^^
+
+`SBX`_ (SB3 + Jax)
+^^^^^^^^^^^^^^^^^^
+
+Deprecations:
+^^^^^^^^^^^^^
+
+Others:
+^^^^^^^
+
+Documentation:
+^^^^^^^^^^^^^^
+- Added a paragraph on modifying vectorized environment parameters via setters (@fracapuano)
+- Updated callback code example
+
 Release 2.2.1 (2023-11-17)
 --------------------------
 **Support for options at reset, bug fixes and better error messages**
@@ -1490,7 +1523,7 @@ And all the contributors:
 @flodorner @KuKuXia @NeoExtended @PartiallyTyped @mmcenta @richardwu @kinalmehta @rolandgvc @tkelestemur @mloo3
 @tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio
 @diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray
-@tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn
+@tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @fracapuano @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn
 @ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan
 @benblack769 @bstee615 @c-rizz @skandermoalla @MihaiAnca13 @davidblom603 @ayeright @cyprienc
 @wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev @timokau @kachayev @cleversonahum
diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt
index c043eea77..00b35529e 100644
--- a/stable_baselines3/version.txt
+++ b/stable_baselines3/version.txt
@@ -1 +1 @@
-2.2.1
+2.3.0a0