(v3.2.3) - Sinergym IncrementalWrapper (#391)

* Added ContinuousIncrementalWrapper class * Merging with main * IncrementalWrapper refinement * Added documentation for new incremental wrapper * Apply spelling fix * Added tests * Updated Sinergym version from 3.2.2 to 3.2.3 * pytype fix
ugr-sail · Feb 23, 2024 · 3203826 · 3203826
1 parent bbffab5
commit 3203826
Show file tree

Hide file tree

Showing 13 changed files with 243 additions and 11 deletions.
diff --git a/docs/source/_static/discrete_incremental_wrapper.png b/docs/source/_static/discrete_incremental_wrapper.png
diff --git a/docs/source/_static/incremental_wrapper.png b/docs/source/_static/incremental_wrapper.png
diff --git a/docs/source/pages/environments.rst b/docs/source/pages/environments.rst
@@ -461,7 +461,7 @@ To add new buildings for use with *Sinergym*, follow these steps:
 
    The challenge lies in knowing the names but not the possible *Keys* (EnergyPlus does not initially provide this information). 
    These names can be used to define the environment (see step 3). If the *Key* is incorrect, *Sinergym* will notify of the 
-   error and provide a file called **data_available.txt** in the aoutput, since it has already connected with the EnergyPlus API. This file will 
+   error and provide a file called **data_available.txt** in the output, since it has already connected with the EnergyPlus API. This file will 
    contain all the **controllable schedulers** for the actions and all the **observable variables**, this time with their respective *Keys*, 
    enabling the correct definition of the environment.
 

diff --git a/docs/source/pages/modules/sinergym.utils.wrappers.IncrementalWrapper.rst b/docs/source/pages/modules/sinergym.utils.wrappers.IncrementalWrapper.rst
@@ -0,0 +1,46 @@
+sinergym.utils.wrappers.IncrementalWrapper
+==========================================
+
+.. currentmodule:: sinergym.utils.wrappers
+
+.. autoclass:: IncrementalWrapper
+   :members:                                                           
+   :undoc-members:               
+
+
+   .. automethod:: __init__
+
+
+   .. rubric:: Methods
+
+   .. autosummary::
+
+      ~IncrementalWrapper.__init__
+      ~IncrementalWrapper.action
+      ~IncrementalWrapper.class_name
+      ~IncrementalWrapper.close
+      ~IncrementalWrapper.get_wrapper_attr
+      ~IncrementalWrapper.render
+      ~IncrementalWrapper.reset
+      ~IncrementalWrapper.step
+      ~IncrementalWrapper.wrapper_spec
+
+
+
+
+
+   .. rubric:: Attributes
+
+   .. autosummary::
+
+      ~IncrementalWrapper.action_space
+      ~IncrementalWrapper.logger
+      ~IncrementalWrapper.metadata
+      ~IncrementalWrapper.np_random
+      ~IncrementalWrapper.observation_space
+      ~IncrementalWrapper.render_mode
+      ~IncrementalWrapper.reward_range
+      ~IncrementalWrapper.spec
+      ~IncrementalWrapper.unwrapped
+
+
diff --git a/docs/source/pages/modules/sinergym.utils.wrappers.rst b/docs/source/pages/modules/sinergym.utils.wrappers.rst
@@ -22,6 +22,7 @@
       DatetimeWrapper
       DiscreteIncrementalWrapper
       DiscretizeEnv
+      IncrementalWrapper
       LoggerWrapper
       MultiObjectiveReward
       MultiObsWrapper

diff --git a/docs/source/pages/wrappers.rst b/docs/source/pages/wrappers.rst
@@ -72,19 +72,43 @@ output doesn't match with original environment action space, an error will be ra
   :alt: Discretize wrapper graph.
   :align: center
 
+***************************
+IncrementalWrapper
+***************************
+
+A wrapper to transform some of the continuous environment variables into actions indicating an increase/decrease in their current value, 
+rather than directly setting the value. To compute the possible increments/decrements for each variable, a dictionary is specified as 
+an argument, indicating the name of each variable to be transformed as the key, and the value being a tuple of values called **delta** and 
+**step**. This achieves a set of possible increments for each desired variable.
+
+- **delta**: Maximum range of increments and decrements.
+
+- **step**: Interval of intermediate values within the ranges.
+
+The following figure illustrates its operation, basically the values are rounded with nearest increment value and summed with
+current real values of simulation:
+
+.. image:: /_static/incremental_wrapper.png
+  :scale: 50 %
+  :alt: Incremental wrapper graph.
+  :align: center
+
 ***************************
 DiscreteIncrementalWrapper
 ***************************
 
 A wrapper for an incremental setpoint action space environment. This wrapper
 will update an environment, converting it in a *discrete* environment with an action mapping function and action space 
-depending on the **step** and **delta** specified. The action will be sum with **current setpoint** values instead of overwrite the latest action. 
+depending on the **delta** and **step** specified. The action will be sum with **current setpoint** values instead of overwrite the latest action. 
 Then, the action is the current setpoint values with the increase instead of the discrete value action whose purpose is to define 
-the increment/decrement itself. 
+the increment/decrement itself.
 
-.. image:: /_static/incremental_wrapper.png
+.. warning:: "This wrapper fully changes the action space from continuous to discrete, meaning that increments/decrements 
+             apply to all variables. In essence, selecting variables individually as in IncrementalWrapper is not possible."
+
+.. image:: /_static/discrete_incremental_wrapper.png
   :scale: 50 %
-  :alt: Incremental wrapper graph.
+  :alt: Discrete incremental wrapper graph.
   :align: center
 
 ***********************

diff --git a/docs/source/spelling_wordlist.txt b/docs/source/spelling_wordlist.txt
@@ -10,6 +10,7 @@ async
 atari
 attr
 auth
+AUTOBALANCE
 avconv
 backend
 backpropagate
@@ -24,10 +25,12 @@ cartpole
 cfg
 Cloudpickle
 cnn
+Cº
 coef
 colab
 Conda
 conda
+conf
 Config
 config
 Contrib
@@ -56,6 +59,11 @@ deserialized
 dicts
 dir
 DiscreteIncrementalWrapper
+discretization
+Discretization
+discretize
+DiscretizeEnv
+discretizes
 Dockerfile
 docstring
 doesn
@@ -111,6 +119,7 @@ hyperparameter
 hyperparameters
 idd
 idf
+IncrementalWrapper
 init
 initialise
 initialize
@@ -122,6 +131,7 @@ json
 kfac
 Khee
 kwargs
+Lieber
 lifecycle
 LinearReward
 Lisboa
@@ -140,6 +150,7 @@ Mlflow
 mlflow
 mlp
 ModelJSON
+modularizing
 mon
 mujoco
 MultiObjectiveReward
@@ -154,6 +165,8 @@ ndarrays
 neglogp
 nn
 normalisation
+NormalizeAction
+NormalizedLinearReward
 NormalizeObservation
 np
 npz
@@ -198,6 +211,7 @@ RBCDatacenter
 repo
 reproducibility
 rescale
+resistive
 rl
 rollout
 rollouts
@@ -215,6 +229,7 @@ softmax
 src
 stdout
 stepsize
+stochasticity
 str
 subprocess
 subprocesses

diff --git a/examples/drl.ipynb b/examples/drl.ipynb
@@ -441,7 +441,7 @@
             "source": [
                 "Now, is time to train the model with the callbacks defined earlier. This may take a few minutes, depending on your computer.\n",
                 "\n",
-                ":warning: The warning messages that appear in `model.learn()` output is due to Stable Baselines 3 is not adapted to new standar to get environment attributes yet."
+                ":warning: The warning messages that appear in `model.learn()` output is due to Stable Baselines 3 is not adapted to new standard to get environment attributes yet."
             ]
         },
         {

diff --git a/examples/wrappers_examples.ipynb b/examples/wrappers_examples.ipynb
@@ -425,7 +425,7 @@
             "cell_type": "markdown",
             "metadata": {},
             "source": [
-                "This is an example about how to normalize a previous continuos environment action space. If we don't define the range values, it will be constructed with the range `[-1,1]` by default:"
+                "This is an example about how to normalize a previous continuous environment action space. If we don't define the range values, it will be constructed with the range `[-1,1]` by default:"
             ]
         },
         {
@@ -538,7 +538,7 @@
             "cell_type": "markdown",
             "metadata": {},
             "source": [
-                "This is an example about how to discretize a previous continuos environment. We will require the **new discrete action space** and an **action mapping function** whose output matches with the original unwrapped environment action space:"
+                "This is an example about how to discretize a previous continuous environment. We will require the **new discrete action space** and an **action mapping function** whose output matches with the original unwrapped environment action space:"
             ]
         },
         {

diff --git a/sinergym/utils/wrappers.py b/sinergym/utils/wrappers.py
@@ -510,6 +510,119 @@ def observation(self, obs: np.ndarray) -> np.ndarray:
         return new_obs
 
 
+class IncrementalWrapper(gym.ActionWrapper):
+    """A wrapper for an incremental values of desired action variables"""
+
+    logger = Logger().getLogger(name='WRAPPER IncrementalWrapper',
+                                level=LOG_WRAPPERS_LEVEL)
+
+    def __init__(
+        self,
+        env: gym.Env,
+        incremental_variables_definition: Dict[str, Tuple[float, float]],
+        initial_values: List[float],
+    ):
+        """
+        Args:
+            env (gym.Env): Original Sinergym environment.
+            incremental_variables_definition (Dict[str, Tuple[float, float]]): Dictionary defining incremental variables.
+                                                                           Key: variable name, Value: Tuple with delta and step values.
+                                                                           Delta: maximum range, Step: intermediate value jumps.
+            initial_values (List[float]): Initial values for incremental variables. Length of this list and dictionary must match.
+        """
+
+        super().__init__(env)
+
+        # Params
+        self.current_values = initial_values
+
+        # Check environment is valid
+        try:
+            assert not self.env.get_wrapper_attr('is_discrete')
+        except AssertionError as err:
+            self.logger.error(
+                'Env wrapped by this wrapper must be continuous.')
+            raise err
+        try:
+            assert all([variable in self.env.get_wrapper_attr('action_variables')
+                       for variable in list(incremental_variables_definition.keys())])
+        except AssertionError as err:
+            self.logger.error(
+                'Some of the incremental variables specified does not exist as action variable in environment.')
+            raise err
+        try:
+            assert len(initial_values) == len(
+                incremental_variables_definition)
+        except AssertionError as err:
+            self.logger.error(
+                'Number of incremental variables does not match with initial values')
+            raise err
+
+        # All posible incremental variations
+        self.values_definition = {}
+        # Original action space variables
+        action_space_low = deepcopy(self.env.action_space.low)
+        action_space_high = deepcopy(self.env.action_space.high)
+        # Calculating incremental variations and action space for each
+        # incremental variable
+        for variable, (delta_temp,
+                       step_temp) in incremental_variables_definition.items():
+
+            # Possible incrementations for each incremental variable.
+            values = np.arange(
+                step_temp,
+                delta_temp +
+                step_temp /
+                10,
+                step_temp)
+            values = [v for v in [*-np.flip(values), 0, *values]]
+
+            # Index of the action variable
+            index = self.env.get_wrapper_attr(
+                'action_variables').index(variable)
+
+            self.values_definition[index] = values
+            action_space_low[index] = min(values)
+            action_space_high[index] = max(values)
+
+        # New action space definition
+        self.action_space = gym.spaces.Box(
+            low=action_space_low,
+            high=action_space_high,
+            shape=self.env.action_space.shape,
+            dtype=np.float32)
+
+        self.logger.info(
+            'New incremental continuous action space: {}'.format(
+                self.action_space))
+        self.logger.info(
+            'Incremental variables configuration (variable: delta, step): {}'.format(
+                incremental_variables_definition))
+        self.logger.info('Wrapper initialized')
+
+    def action(self, action):
+        """Takes the continuous action and apply increment/decrement before to send to the next environment layer."""
+        action_ = deepcopy(action)
+
+        # Update current values with incremental values where required
+        for i, (index, values) in enumerate(self.values_definition.items()):
+            # Get increment value
+            increment_value = action[index]
+            # Round increment value to nearest value
+            increment_value = min(
+                values, key=lambda x: abs(
+                    x - increment_value))
+            # Update current_values
+            self.current_values[i] += increment_value
+            # Clip the value with original action space
+            self.current_values[i] = max(self.env.action_space.low[index], min(
+                self.current_values[i], self.env.action_space.high[index]))
+
+            action_[index] = self.current_values[i]
+
+        return list(action_)
+
+
 class DiscreteIncrementalWrapper(gym.ActionWrapper):
     """A wrapper for an incremental setpoint discrete action space environment.
     WARNING: A environment with only temperature setpoints control must be used

diff --git a/sinergym/version.txt b/sinergym/version.txt
@@ -1 +1 @@
-3.2.2
+3.2.3
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -439,6 +439,18 @@ def env_wrapper_previousobs(env_5zone):
 
 @pytest.fixture(scope='function')
 def env_wrapper_incremental(env_5zone):
+    return IncrementalWrapper(
+        env=env_5zone,
+        incremental_variables_definition={
+            'Heating_Setpoint_RL': (2.0, 0.5),
+            'Cooling_Setpoint_RL': (1.0, 0.25)
+        },
+        initial_values=[21.0, 25.0],
+    )
+
+
+@pytest.fixture(scope='function')
+def env_discrete_wrapper_incremental(env_5zone):
     return DiscreteIncrementalWrapper(
         env=env_5zone,
         initial_values=[21.0, 25.0],