Skip to content

Commit

Permalink
(v3.0.2) Reward function Patch (#356)
Browse files Browse the repository at this point in the history
* Rewards update: Reward for several power variables support and reward terms improved (separated terms, values used and abs comfort and energy)

* Updated info keys about reward terms in the rest of the Sinergym project

* Added abs_energy in Sinergym CSVLogger

* Fixed tests error reports

* Updated rewards documentation

* Updated Sinergym version from 3.0.1 to 3.0.2
  • Loading branch information
AlejandroCN7 authored Aug 30, 2023
1 parent 6ca30e3 commit d26cfc3
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 95 deletions.
9 changes: 5 additions & 4 deletions docs/source/pages/rewards.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@ different name in different buildings.

The main parameters that it is considered in a function reward will be the next:

- **temperature_variable**: This field can be an *str* (only a unique zone temperature)
- **temperature_variables**: This field can be an *str* (only a unique zone temperature)
or a *list* (with several zone temperatures).

- **energy_variable**: Name of the observation variable where energy consumption is
- **energy_variables**: Name of the observation variables where energy consumption is
reflected.

- **range_comfort_winter**: Temperature comfort range for cold season. Depends on
Expand All @@ -80,8 +80,9 @@ The main parameters that it is considered in a function reward will be the next:
By default, all environments use ``LinearReward`` with default parameters. If you want to change this, see
an example in :ref:`Adding a new reward`.

.. note:: By default, reward class will return the reward value and the terms used in its calculation.
This terms will be added to info dict in environment automatically.
.. note:: By default, reward class will return the reward value and the terms used in its calculation. Terms
depends on the reward function used specifically.
These terms will be added to info dict in environment automatically.

.. warning:: When specifying a different reward with `gym.make` than the
default environment ID, it is very important to set the `reward_kwargs`
Expand Down
4 changes: 2 additions & 2 deletions examples/change_environment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@
"from sinergym.utils.rewards import ExpReward\n",
"\n",
"env = gym.make('Eplus-5zone-hot-continuous-v1', reward=ExpReward, reward_kwargs={\n",
" 'temperature_variable': 'air_temperature',\n",
" 'energy_variable': 'HVAC_electricity_demand_rate',\n",
" 'temperature_variables': 'air_temperature',\n",
" 'energy_variables': 'HVAC_electricity_demand_rate',\n",
" 'range_comfort_winter': (20.0, 23.5),\n",
" 'range_comfort_summer': (23.0, 26.0),\n",
" 'energy_weight': 0.1})"
Expand Down
28 changes: 14 additions & 14 deletions sinergym/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
'actuators': DEFAULT_5ZONE_ACTUATORS,
'reward': LinearReward,
'reward_kwargs': {
'temperature_variable': 'air_temperature',
'energy_variable': 'HVAC_electricity_demand_rate',
'temperature_variables': 'air_temperature',
'energy_variables': 'HVAC_electricity_demand_rate',
'range_comfort_winter': (
20.0,
23.5),
Expand Down Expand Up @@ -65,8 +65,8 @@
reg_kwargs['meters'] = DEFAULT_5ZONE_METERS
reg_kwargs['reward'] = LinearReward
reg_kwargs['reward_kwargs'] = {
'temperature_variable': 'air_temperature',
'energy_variable': 'HVAC_electricity_demand_rate',
'temperature_variables': 'air_temperature',
'energy_variables': 'HVAC_electricity_demand_rate',
'range_comfort_winter': (
20.0,
23.5),
Expand All @@ -84,10 +84,10 @@
reg_kwargs['meters'] = DEFAULT_DATACENTER_METERS
reg_kwargs['reward'] = LinearReward
reg_kwargs['reward_kwargs'] = {
'temperature_variable': [
'temperature_variables': [
'west_zone_temperature',
'east_zone_temperature'],
'energy_variable': 'HVAC_electricity_demand_rate',
'energy_variables': 'HVAC_electricity_demand_rate',
'range_comfort_winter': (
18,
27),
Expand All @@ -105,11 +105,11 @@
reg_kwargs['meters'] = DEFAULT_WAREHOUSE_METERS
reg_kwargs['reward'] = LinearReward
reg_kwargs['reward_kwargs'] = {
'temperature_variable': [
'temperature_variables': [
'office_temperature',
'fstorage_temperature',
'bstorage_temperature'],
'energy_variable': 'HVAC_electricity_demand_rate',
'energy_variables': 'HVAC_electricity_demand_rate',
'range_comfort_winter': (
18,
27),
Expand All @@ -127,7 +127,7 @@
reg_kwargs['meters'] = DEFAULT_OFFICE_METERS
reg_kwargs['reward'] = LinearReward
reg_kwargs['reward_kwargs'] = {
'temperature_variable': [
'temperature_variables': [
'zone1_temperature',
'zone2_temperature',
'zone3_temperature',
Expand All @@ -147,7 +147,7 @@
'zone17_temperature',
'zone18_temperature'
],
'energy_variable': 'HVAC_electricity_demand_rate',
'energy_variables': 'HVAC_electricity_demand_rate',
'range_comfort_winter': (18, 27),
'range_comfort_summer': (18, 27)
}
Expand All @@ -162,7 +162,7 @@
reg_kwargs['meters'] = DEFAULT_OFFICEGRID_METERS
reg_kwargs['reward'] = LinearReward
reg_kwargs['reward_kwargs'] = {
'temperature_variable': [
'temperature_variables': [
'zone1_temperature',
'zone2_temperature',
'zone3_temperature',
Expand All @@ -182,7 +182,7 @@
'zone17_temperature',
'zone18_temperature',
'zone19_temperature'],
'energy_variable': 'HVAC_electricity_demand_rate',
'energy_variables': 'HVAC_electricity_demand_rate',
'range_comfort_winter': (
20.0,
23.5),
Expand All @@ -200,13 +200,13 @@
reg_kwargs['meters'] = DEFAULT_SHOP_METERS
reg_kwargs['reward'] = LinearReward
reg_kwargs['reward_kwargs'] = {
'temperature_variable': [
'temperature_variables': [
'zone1_temperature',
'zone2_temperature',
'zone3_temperature',
'zone4_temperature',
'zone5_temperature'],
'energy_variable': 'HVAC_electricity_demand_rate',
'energy_variables': 'HVAC_electricity_demand_rate',
'range_comfort_winter': (
20.0,
23.5),
Expand Down
8 changes: 4 additions & 4 deletions sinergym/utils/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,10 @@ def _on_step(self) -> bool:
except KeyError:
print('Algorithm reward key in locals dict unknown')

self.ep_powers.append(info['total_energy'])
self.ep_term_comfort.append(info['reward_comfort'])
self.ep_term_energy.append(info['reward_energy'])
if (info['reward_comfort'] != 0):
self.ep_powers.append(info['abs_energy'])
self.ep_term_comfort.append(info['comfort_term'])
self.ep_term_energy.append(info['energy_term'])
if (info['comfort_term'] != 0):
self.num_comfort_violation += 1
self.ep_timesteps += 1

Expand Down
8 changes: 4 additions & 4 deletions sinergym/utils/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ def evaluate_policy(
obs, state=state, deterministic=deterministic)
obs, reward, terminated, _, info = env.step(action)
episode_cumulative_reward += reward
episode_cumulative_power += info['total_energy']
episode_cumulative_power_penalty += info['reward_energy']
episode_cumulative_comfort_penalty += info['reward_comfort']
if info['reward_comfort'] != 0:
episode_cumulative_power += info['abs_energy']
episode_cumulative_power_penalty += info['energy_term']
episode_cumulative_comfort_penalty += info['comfort_term']
if info['comfort_term'] != 0:
episode_steps_comfort_violation += 1
if callback is not None:
callback(locals(), globals())
Expand Down
24 changes: 15 additions & 9 deletions sinergym/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def __init__(
'comfort_penalties': [],
'abs_comfort': [],
'power_penalties': [],
'abs_energy': [],
'total_timesteps': 0,
'total_time_elapsed': 0,
'comfort_violation_timesteps': 0
Expand All @@ -136,9 +137,10 @@ def _create_row_content(
info.get('timestep')] + list(obs) + list(action) + [
info.get('time_elapsed(hours)'),
info.get('reward'),
info.get('reward_energy'),
info.get('reward_comfort'),
info.get('energy_term'),
info.get('comfort_term'),
info.get('abs_comfort'),
info.get('abs_energy'),
terminated]

def _store_step_information(
Expand All @@ -158,18 +160,21 @@ def _store_step_information(
# In reset, info this keys are not available
if info.get('reward'):
self.episode_data['rewards'].append(info.get('reward'))
if info.get('total_energy'):
self.episode_data['powers'].append(info.get('total_energy'))
if info.get('reward_comfort'):
if info.get('abd_energy'):
self.episode_data['powers'].append(info.get('abs_energy'))
if info.get('comfort_term'):
self.episode_data['comfort_penalties'].append(
info.get('reward_comfort'))
if info.get('reward_energy') is not None:
info.get('comfort_term'))
if info.get('energy_term') is not None:
self.episode_data['power_penalties'].append(
info.get('reward_energy'))
info.get('energy_term'))
if info.get('abs_comfort') is not None:
self.episode_data['abs_comfort'].append(
info.get('abs_comfort'))
if info.get('reward_comfort') != 0:
if info.get('abs_energy') is not None:
self.episode_data['abs_energy'].append(
info.get('abs_energy'))
if info.get('comfort_term') != 0:
self.episode_data['comfort_violation_timesteps'] += 1
self.episode_data['total_timesteps'] = info.get('timestep')
self.episode_data['total_time_elapsed'] = info.get('time_elapsed')
Expand All @@ -185,6 +190,7 @@ def _reset_logger(self) -> None:
'comfort_penalties': [],
'abs_comfort': [],
'power_penalties': [],
'abs_energy': [],
'total_timesteps': 0,
'total_time_elapsed': 0,
'comfort_violation_timesteps': 0
Expand Down
Loading

0 comments on commit d26cfc3

Please sign in to comment.