From a653aec10d8cb4b82443f818fdde123f06c95f75 Mon Sep 17 00:00:00 2001 From: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Date: Wed, 10 Jan 2024 14:46:40 +0100 Subject: [PATCH] Docs: Env attributes should be modified using env setters (#1789) * add: paragraph on how to modify vec envs attributes via setters (solves DLR-RM#1573) * Update vec env doc * Update callback doc and SB3 version * Fix indentation --------- Co-authored-by: Antonin Raffin --- docs/guide/callbacks.rst | 15 ++++--- docs/guide/vec_envs.rst | 84 +++++++++++++++++++++++++++++++++++ docs/misc/changelog.rst | 35 ++++++++++++++- stable_baselines3/version.txt | 2 +- 4 files changed, 127 insertions(+), 9 deletions(-) diff --git a/docs/guide/callbacks.rst b/docs/guide/callbacks.rst index 239966a6f..472f42114 100644 --- a/docs/guide/callbacks.rst +++ b/docs/guide/callbacks.rst @@ -29,24 +29,25 @@ You can find two examples of custom callbacks in the documentation: one for savi :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages """ - def __init__(self, verbose=0): + def __init__(self, verbose: int = 0): super().__init__(verbose) # Those variables will be accessible in the callback # (they are defined in the base class) # The RL model # self.model = None # type: BaseAlgorithm # An alias for self.model.get_env(), the environment used for training - # self.training_env = None # type: Union[gym.Env, VecEnv, None] + # self.training_env # type: VecEnv # Number of time the callback was called # self.n_calls = 0 # type: int + # num_timesteps = n_envs * n times env.step() was called # self.num_timesteps = 0 # type: int # local and global variables - # self.locals = None # type: Dict[str, Any] - # self.globals = None # type: Dict[str, Any] + # self.locals = {} # type: Dict[str, Any] + # self.globals = {} # type: Dict[str, Any] # The logger object, used to report things in the terminal - # self.logger = None # stable_baselines3.common.logger - # # Sometimes, for event callback, it is useful - # # to have access to the parent object + # self.logger # type: stable_baselines3.common.logger.Logger + # Sometimes, for event callback, it is useful + # to have access to the parent object # self.parent = None # type: Optional[BaseCallback] def _on_training_start(self) -> None: diff --git a/docs/guide/vec_envs.rst b/docs/guide/vec_envs.rst index 792fedecb..c04001c7c 100644 --- a/docs/guide/vec_envs.rst +++ b/docs/guide/vec_envs.rst @@ -96,6 +96,90 @@ SB3 VecEnv API is actually close to Gym 0.21 API but differs to Gym 0.26+ API: ``vec_env.env_method("method_name", args1, args2, kwargs1=kwargs1)`` and ``vec_env.set_attr("attribute_name", new_value)``. +Modifying Vectorized Environments Attributes +-------------------------------------------- + +If you plan to `modify the attributes of an environment `_ while it is used (e.g., modifying an attribute specifying the task carried out for a portion of training when doing multi-task learning, or +a parameter of the environment dynamics), you must expose a setter method. +In fact, directly accessing the environment attribute in the callback can lead to unexpected behavior because environments can be wrapped (using gym or VecEnv wrappers, the ``Monitor`` wrapper being one example). + +Consider the following example for a custom env: + +.. code-block:: python + + import gymnasium as gym + from gymnasium import spaces + + from stable_baselines3.common.env_util import make_vec_env + + + class MyMultiTaskEnv(gym.Env): + + def __init__(self): + super().__init__() + """ + A state and action space for robotic locomotion. + The multi-task twist is that the policy would need to adapt to different terrains, each with its own + friction coefficient, mu. + The friction coefficient is the only parameter that changes between tasks. + mu is a scalar between 0 and 1, and during training a callback is used to update mu. + """ + ... + + def step(self, action): + # Do something, depending on the action and current value of mu the next state is computed + return self._get_obs(), reward, done, truncated, info + + def set_mu(self, new_mu: float) -> None: + # Note: this value should be used only at the next reset + self.mu = new_mu + + # Example of wrapped env + # env is of type >>>> + env = gym.make("CartPole-v1") + # To access the base env, without wrapper, you should use `.unwrapped` + # or env.get_wrapper_attr("gravity") to include wrappers + env.unwrapped.gravity + # SB3 uses VecEnv for training, where `env.unwrapped.x = new_value` cannot be used to set an attribute + # therefore, you should expose a setter like `set_mu` to properly set an attribute + vec_env = make_vec_env(MyMultiTaskEnv) + # Print current mu value + # Note: you should use vec_env.env_method("get_wrapper_attr", "mu") in Gymnasium v1.0 + print(vec_env.env_method("get_wrapper_attr", "mu")) + # Change `mu` attribute via the setter + vec_env.env_method("set_mu", "mu", 0.1) + + +In this example ``env.mu`` cannot be accessed/changed directly because it is wrapped in a ``VecEnv`` and because it could be wrapped with other wrappers (see `GH#1573 `_ for a longer explanation). +Instead, the callback should use the ``set_mu`` method via the ``env_method`` method for Vectorized Environments. + +.. code-block:: python + + from itertools import cycle + + class ChangeMuCallback(BaseCallback): + """ + This callback changes the value of mu during training looping + through a list of values until training is aborted. + The environment is implemented so that the impact of changing + the value of mu mid-episode is visible only after the episode is over + and the reset method has been called. + """" + def __init__(self): + super().__init__() + # An iterator that contains the different of the friction coefficient + self.mus = cycle([0.1, 0.2, 0.5, 0.13, 0.9]) + + def _on_step(self): + # Note: in practice, you should not change this value at every step + # but rather depending on some events/metrics like agent performance/episode termination + # both accessible via the `self.logger` or `self.locals` variables + self.training_env.env_method("set_mu", next(self.mus)) + +This callback can then be used to safely modify environment attributes during training since +it calls the environment setter method. + + Vectorized Environments Wrappers -------------------------------- diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 647a0e89e..cbfe41f9d 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -3,6 +3,39 @@ Changelog ========== + +Release 2.3.0a0 (WIP) +-------------------------- + +Breaking Changes: +^^^^^^^^^^^^^^^^^ + +New Features: +^^^^^^^^^^^^^ + +Bug Fixes: +^^^^^^^^^^ + +`SB3-Contrib`_ +^^^^^^^^^^^^^^ + +`RL Zoo`_ +^^^^^^^^^ + +`SBX`_ (SB3 + Jax) +^^^^^^^^^^^^^^^^^^ + +Deprecations: +^^^^^^^^^^^^^ + +Others: +^^^^^^^ + +Documentation: +^^^^^^^^^^^^^^ +- Added a paragraph on modifying vectorized environment parameters via setters (@fracapuano) +- Updated callback code example + Release 2.2.1 (2023-11-17) -------------------------- **Support for options at reset, bug fixes and better error messages** @@ -1490,7 +1523,7 @@ And all the contributors: @flodorner @KuKuXia @NeoExtended @PartiallyTyped @mmcenta @richardwu @kinalmehta @rolandgvc @tkelestemur @mloo3 @tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio @diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray -@tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn +@tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @fracapuano @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn @ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan @benblack769 @bstee615 @c-rizz @skandermoalla @MihaiAnca13 @davidblom603 @ayeright @cyprienc @wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev @timokau @kachayev @cleversonahum diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt index c043eea77..00b35529e 100644 --- a/stable_baselines3/version.txt +++ b/stable_baselines3/version.txt @@ -1 +1 @@ -2.2.1 +2.3.0a0