diff --git a/docs/source/_static/discrete_incremental_wrapper.png b/docs/source/_static/discrete_incremental_wrapper.png new file mode 100644 index 000000000..9c4cff4ae Binary files /dev/null and b/docs/source/_static/discrete_incremental_wrapper.png differ diff --git a/docs/source/_static/incremental_wrapper.png b/docs/source/_static/incremental_wrapper.png index 9c4cff4ae..546c773da 100644 Binary files a/docs/source/_static/incremental_wrapper.png and b/docs/source/_static/incremental_wrapper.png differ diff --git a/docs/source/pages/environments.rst b/docs/source/pages/environments.rst index 18d058617..4161e1963 100644 --- a/docs/source/pages/environments.rst +++ b/docs/source/pages/environments.rst @@ -461,7 +461,7 @@ To add new buildings for use with *Sinergym*, follow these steps: The challenge lies in knowing the names but not the possible *Keys* (EnergyPlus does not initially provide this information). These names can be used to define the environment (see step 3). If the *Key* is incorrect, *Sinergym* will notify of the - error and provide a file called **data_available.txt** in the aoutput, since it has already connected with the EnergyPlus API. This file will + error and provide a file called **data_available.txt** in the output, since it has already connected with the EnergyPlus API. This file will contain all the **controllable schedulers** for the actions and all the **observable variables**, this time with their respective *Keys*, enabling the correct definition of the environment. diff --git a/docs/source/pages/modules/sinergym.utils.wrappers.IncrementalWrapper.rst b/docs/source/pages/modules/sinergym.utils.wrappers.IncrementalWrapper.rst new file mode 100644 index 000000000..bc18989a3 --- /dev/null +++ b/docs/source/pages/modules/sinergym.utils.wrappers.IncrementalWrapper.rst @@ -0,0 +1,46 @@ +sinergym.utils.wrappers.IncrementalWrapper +========================================== + +.. currentmodule:: sinergym.utils.wrappers + +.. autoclass:: IncrementalWrapper + :members: + :undoc-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~IncrementalWrapper.__init__ + ~IncrementalWrapper.action + ~IncrementalWrapper.class_name + ~IncrementalWrapper.close + ~IncrementalWrapper.get_wrapper_attr + ~IncrementalWrapper.render + ~IncrementalWrapper.reset + ~IncrementalWrapper.step + ~IncrementalWrapper.wrapper_spec + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~IncrementalWrapper.action_space + ~IncrementalWrapper.logger + ~IncrementalWrapper.metadata + ~IncrementalWrapper.np_random + ~IncrementalWrapper.observation_space + ~IncrementalWrapper.render_mode + ~IncrementalWrapper.reward_range + ~IncrementalWrapper.spec + ~IncrementalWrapper.unwrapped + + \ No newline at end of file diff --git a/docs/source/pages/modules/sinergym.utils.wrappers.rst b/docs/source/pages/modules/sinergym.utils.wrappers.rst index 8d4e5cb90..15d1709e2 100644 --- a/docs/source/pages/modules/sinergym.utils.wrappers.rst +++ b/docs/source/pages/modules/sinergym.utils.wrappers.rst @@ -22,6 +22,7 @@ DatetimeWrapper DiscreteIncrementalWrapper DiscretizeEnv + IncrementalWrapper LoggerWrapper MultiObjectiveReward MultiObsWrapper diff --git a/docs/source/pages/wrappers.rst b/docs/source/pages/wrappers.rst index 6307730b3..ae0e91bb7 100644 --- a/docs/source/pages/wrappers.rst +++ b/docs/source/pages/wrappers.rst @@ -72,19 +72,43 @@ output doesn't match with original environment action space, an error will be ra :alt: Discretize wrapper graph. :align: center +*************************** +IncrementalWrapper +*************************** + +A wrapper to transform some of the continuous environment variables into actions indicating an increase/decrease in their current value, +rather than directly setting the value. To compute the possible increments/decrements for each variable, a dictionary is specified as +an argument, indicating the name of each variable to be transformed as the key, and the value being a tuple of values called **delta** and +**step**. This achieves a set of possible increments for each desired variable. + +- **delta**: Maximum range of increments and decrements. + +- **step**: Interval of intermediate values within the ranges. + +The following figure illustrates its operation, basically the values are rounded with nearest increment value and summed with +current real values of simulation: + +.. image:: /_static/incremental_wrapper.png + :scale: 50 % + :alt: Incremental wrapper graph. + :align: center + *************************** DiscreteIncrementalWrapper *************************** A wrapper for an incremental setpoint action space environment. This wrapper will update an environment, converting it in a *discrete* environment with an action mapping function and action space -depending on the **step** and **delta** specified. The action will be sum with **current setpoint** values instead of overwrite the latest action. +depending on the **delta** and **step** specified. The action will be sum with **current setpoint** values instead of overwrite the latest action. Then, the action is the current setpoint values with the increase instead of the discrete value action whose purpose is to define -the increment/decrement itself. +the increment/decrement itself. -.. image:: /_static/incremental_wrapper.png +.. warning:: "This wrapper fully changes the action space from continuous to discrete, meaning that increments/decrements + apply to all variables. In essence, selecting variables individually as in IncrementalWrapper is not possible." + +.. image:: /_static/discrete_incremental_wrapper.png :scale: 50 % - :alt: Incremental wrapper graph. + :alt: Discrete incremental wrapper graph. :align: center *********************** diff --git a/docs/source/spelling_wordlist.txt b/docs/source/spelling_wordlist.txt index f66df4ed4..a59449fb0 100644 --- a/docs/source/spelling_wordlist.txt +++ b/docs/source/spelling_wordlist.txt @@ -10,6 +10,7 @@ async atari attr auth +AUTOBALANCE avconv backend backpropagate @@ -24,10 +25,12 @@ cartpole cfg Cloudpickle cnn +CÂș coef colab Conda conda +conf Config config Contrib @@ -56,6 +59,11 @@ deserialized dicts dir DiscreteIncrementalWrapper +discretization +Discretization +discretize +DiscretizeEnv +discretizes Dockerfile docstring doesn @@ -111,6 +119,7 @@ hyperparameter hyperparameters idd idf +IncrementalWrapper init initialise initialize @@ -122,6 +131,7 @@ json kfac Khee kwargs +Lieber lifecycle LinearReward Lisboa @@ -140,6 +150,7 @@ Mlflow mlflow mlp ModelJSON +modularizing mon mujoco MultiObjectiveReward @@ -154,6 +165,8 @@ ndarrays neglogp nn normalisation +NormalizeAction +NormalizedLinearReward NormalizeObservation np npz @@ -198,6 +211,7 @@ RBCDatacenter repo reproducibility rescale +resistive rl rollout rollouts @@ -215,6 +229,7 @@ softmax src stdout stepsize +stochasticity str subprocess subprocesses diff --git a/examples/drl.ipynb b/examples/drl.ipynb index 66f162848..dc68ec0b1 100644 --- a/examples/drl.ipynb +++ b/examples/drl.ipynb @@ -441,7 +441,7 @@ "source": [ "Now, is time to train the model with the callbacks defined earlier. This may take a few minutes, depending on your computer.\n", "\n", - ":warning: The warning messages that appear in `model.learn()` output is due to Stable Baselines 3 is not adapted to new standar to get environment attributes yet." + ":warning: The warning messages that appear in `model.learn()` output is due to Stable Baselines 3 is not adapted to new standard to get environment attributes yet." ] }, { diff --git a/examples/wrappers_examples.ipynb b/examples/wrappers_examples.ipynb index 01a6c2b6d..a58689864 100644 --- a/examples/wrappers_examples.ipynb +++ b/examples/wrappers_examples.ipynb @@ -425,7 +425,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This is an example about how to normalize a previous continuos environment action space. If we don't define the range values, it will be constructed with the range `[-1,1]` by default:" + "This is an example about how to normalize a previous continuous environment action space. If we don't define the range values, it will be constructed with the range `[-1,1]` by default:" ] }, { @@ -538,7 +538,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This is an example about how to discretize a previous continuos environment. We will require the **new discrete action space** and an **action mapping function** whose output matches with the original unwrapped environment action space:" + "This is an example about how to discretize a previous continuous environment. We will require the **new discrete action space** and an **action mapping function** whose output matches with the original unwrapped environment action space:" ] }, { diff --git a/sinergym/utils/wrappers.py b/sinergym/utils/wrappers.py index d8184b439..61999ac07 100644 --- a/sinergym/utils/wrappers.py +++ b/sinergym/utils/wrappers.py @@ -510,6 +510,119 @@ def observation(self, obs: np.ndarray) -> np.ndarray: return new_obs +class IncrementalWrapper(gym.ActionWrapper): + """A wrapper for an incremental values of desired action variables""" + + logger = Logger().getLogger(name='WRAPPER IncrementalWrapper', + level=LOG_WRAPPERS_LEVEL) + + def __init__( + self, + env: gym.Env, + incremental_variables_definition: Dict[str, Tuple[float, float]], + initial_values: List[float], + ): + """ + Args: + env (gym.Env): Original Sinergym environment. + incremental_variables_definition (Dict[str, Tuple[float, float]]): Dictionary defining incremental variables. + Key: variable name, Value: Tuple with delta and step values. + Delta: maximum range, Step: intermediate value jumps. + initial_values (List[float]): Initial values for incremental variables. Length of this list and dictionary must match. + """ + + super().__init__(env) + + # Params + self.current_values = initial_values + + # Check environment is valid + try: + assert not self.env.get_wrapper_attr('is_discrete') + except AssertionError as err: + self.logger.error( + 'Env wrapped by this wrapper must be continuous.') + raise err + try: + assert all([variable in self.env.get_wrapper_attr('action_variables') + for variable in list(incremental_variables_definition.keys())]) + except AssertionError as err: + self.logger.error( + 'Some of the incremental variables specified does not exist as action variable in environment.') + raise err + try: + assert len(initial_values) == len( + incremental_variables_definition) + except AssertionError as err: + self.logger.error( + 'Number of incremental variables does not match with initial values') + raise err + + # All posible incremental variations + self.values_definition = {} + # Original action space variables + action_space_low = deepcopy(self.env.action_space.low) + action_space_high = deepcopy(self.env.action_space.high) + # Calculating incremental variations and action space for each + # incremental variable + for variable, (delta_temp, + step_temp) in incremental_variables_definition.items(): + + # Possible incrementations for each incremental variable. + values = np.arange( + step_temp, + delta_temp + + step_temp / + 10, + step_temp) + values = [v for v in [*-np.flip(values), 0, *values]] + + # Index of the action variable + index = self.env.get_wrapper_attr( + 'action_variables').index(variable) + + self.values_definition[index] = values + action_space_low[index] = min(values) + action_space_high[index] = max(values) + + # New action space definition + self.action_space = gym.spaces.Box( + low=action_space_low, + high=action_space_high, + shape=self.env.action_space.shape, + dtype=np.float32) + + self.logger.info( + 'New incremental continuous action space: {}'.format( + self.action_space)) + self.logger.info( + 'Incremental variables configuration (variable: delta, step): {}'.format( + incremental_variables_definition)) + self.logger.info('Wrapper initialized') + + def action(self, action): + """Takes the continuous action and apply increment/decrement before to send to the next environment layer.""" + action_ = deepcopy(action) + + # Update current values with incremental values where required + for i, (index, values) in enumerate(self.values_definition.items()): + # Get increment value + increment_value = action[index] + # Round increment value to nearest value + increment_value = min( + values, key=lambda x: abs( + x - increment_value)) + # Update current_values + self.current_values[i] += increment_value + # Clip the value with original action space + self.current_values[i] = max(self.env.action_space.low[index], min( + self.current_values[i], self.env.action_space.high[index])) + + action_[index] = self.current_values[i] + + return list(action_) + + class DiscreteIncrementalWrapper(gym.ActionWrapper): """A wrapper for an incremental setpoint discrete action space environment. WARNING: A environment with only temperature setpoints control must be used diff --git a/sinergym/version.txt b/sinergym/version.txt index acf9bf09d..06eda28ac 100644 --- a/sinergym/version.txt +++ b/sinergym/version.txt @@ -1 +1 @@ -3.2.2 \ No newline at end of file +3.2.3 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index c3bbe3c13..334753647 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -439,6 +439,18 @@ def env_wrapper_previousobs(env_5zone): @pytest.fixture(scope='function') def env_wrapper_incremental(env_5zone): + return IncrementalWrapper( + env=env_5zone, + incremental_variables_definition={ + 'Heating_Setpoint_RL': (2.0, 0.5), + 'Cooling_Setpoint_RL': (1.0, 0.25) + }, + initial_values=[21.0, 25.0], + ) + + +@pytest.fixture(scope='function') +def env_discrete_wrapper_incremental(env_5zone): return DiscreteIncrementalWrapper( env=env_5zone, initial_values=[21.0, 25.0], diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py index 9edaff457..fac07faa5 100644 --- a/tests/test_wrapper.py +++ b/tests/test_wrapper.py @@ -111,11 +111,32 @@ def test_previous_observation_wrapper(env_name, request): original_obs1, obs2[-len(env.previous_variables):]) +def test_incremental_wrapper(env_wrapper_incremental): + + # Check initial values are initialized + assert hasattr(env_wrapper_incremental, 'values_definition') + assert len(env_wrapper_incremental.get_wrapper_attr('current_values')) == 2 + + old_values = env_wrapper_incremental.get_wrapper_attr( + 'current_values').copy() + # Check if action selected is applied correctly + env_wrapper_incremental.reset() + action = [-0.42, 0.3] + rounded_action = [-0.5, 0.25] + _, _, _, _, info = env_wrapper_incremental.step(action) + assert env_wrapper_incremental.get_wrapper_attr( + 'current_values') == [old_values[i] + rounded_action[i] for i in range(len(old_values))] + for i, (index, values) in enumerate( + env_wrapper_incremental.get_wrapper_attr('values_definition').items()): + assert env_wrapper_incremental.get_wrapper_attr( + 'current_values')[i] == info['action'][index] + + @pytest.mark.parametrize('env_name', - [('env_wrapper_incremental'), + [('env_discrete_wrapper_incremental'), ('env_all_wrappers'), ]) -def test_incremental_wrapper(env_name, request): +def test_discrete_incremental_wrapper(env_name, request): env = request.getfixturevalue(env_name) # Check initial setpoints values is initialized