Skip to content

Commit

Permalink
Evaluation SB3 logging callback: Added some metrics and fixed names
Browse files Browse the repository at this point in the history
  • Loading branch information
AlejandroCN7 committed Sep 13, 2023
1 parent b892bf3 commit 4999200
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 61 deletions.
93 changes: 55 additions & 38 deletions sinergym/utils/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,16 @@ def __init__(
self.log_path = log_path
self.log_metrics = {
'timesteps': [],
'mean_rewards': [],
'cumulative_rewards': [],
'ep_lengths': [],
'ep_powers': [],
'ep_comfort_violations': [],
'episodes_comfort_penalties': [],
'episodes_power_penalties': [],
'episodes_mean_reward': [],
'episodes_cumulative_reward': [],
'episodes_length': [],
'episodes_cumulative_power': [],
'episodes_mean_power': [],
'episodes_comfort_violation': [],
'episodes_cumulative_comfort_penalty': [],
'episodes_mean_comfort_penalty': [],
'episodes_cumulative_energy_penalty': [],
'episodes_mean_energy_penalty': [],
}
self.evaluations_results = []
self.evaluations_timesteps = []
Expand All @@ -252,7 +255,7 @@ def __init__(
self.evaluations_power_consumption = []
self.evaluations_comfort_violation = []
self.evaluations_comfort_penalty = []
self.evaluations_power_penalty = []
self.evaluations_energy_penalty = []
self.evaluation_metrics = {}

def _init_callback(self) -> None:
Expand Down Expand Up @@ -310,14 +313,16 @@ def _on_step(self) -> bool:
# We close training env before to start the evaluation
self.training_env.close()

# episodes_rewards, episodes_lengths, episodes_powers, episodes_comfort_violations, episodes_comfort_penalties, episodes_power_penalties
# episodes_mean_reward, episodes_cumulative_reward, episodes_length,
# episodes_cumulative_power, episodes_mean_power, episodes_comfort_violation,
# episodes_cumulative_comfort_penalty, episodes_mean_comfort_penalty,
# episodes_cumulative_energy_penalty, episodes_mean_energy_penalty
episodes_data = evaluate_policy(
self.model,
self.eval_env,
n_eval_episodes=self.n_eval_episodes,
render=self.render,
deterministic=self.deterministic,
warn=self.warn,
callback=self._log_success_callback,
)

Expand All @@ -327,20 +332,26 @@ def _on_step(self) -> bool:

if self.log_path is not None:
self.log_metrics['timesteps'].append(self.num_timesteps)
self.log_metrics['cumulative_rewards'].append(
episodes_data['episodes_cumulative_rewards'])
self.log_metrics['mean_rewards'].append(
episodes_data['episodes_mean_rewards'])
self.log_metrics['ep_lengths'].append(
episodes_data['episodes_lengths'])
self.log_metrics['ep_powers'].append(
episodes_data['episodes_powers'])
self.log_metrics['ep_comfort_violations'].append(
episodes_data['episodes_comfort_violations'])
self.log_metrics['episodes_comfort_penalties'].append(
episodes_data['episodes_comfort_penalties'])
self.log_metrics['episodes_power_penalties'].append(
episodes_data['episodes_power_penalties'])
self.log_metrics['episodes_cumulative_reward'].append(
episodes_data['episodes_cumulative_reward'])
self.log_metrics['episodes_mean_reward'].append(
episodes_data['episodes_mean_reward'])
self.log_metrics['episodes_length'].append(
episodes_data['episodes_length'])
self.log_metrics['episodes_cumulative_power'].append(
episodes_data['episodes_cumulative_power'])
self.log_metrics['episodes_mean_power'].append(
episodes_data['episodes_mean_power'])
self.log_metrics['episodes_comfort_violation'].append(
episodes_data['episodes_comfort_violation'])
self.log_metrics['episodes_cumulative_comfort_penalty'].append(
episodes_data['episodes_cumulative_comfort_penalty'])
self.log_metrics['episodes_mean_comfort_penalty'].append(
episodes_data['episodes_mean_comfort_penalty'])
self.log_metrics['episodes_cumulative_energy_penalty'].append(
episodes_data['episodes_cumulative_energy_penalty'])
self.log_metrics['episodes_mean_energy_penalty'].append(
episodes_data['episodes_mean_energy_penalty'])

kwargs = {}
# Save success log if present
Expand All @@ -356,29 +367,35 @@ def _on_step(self) -> bool:
)

mean_reward, std_reward = np.mean(
episodes_data['episodes_mean_rewards']), np.std(
episodes_data['episodes_mean_rewards'])
episodes_data['episodes_mean_reward']), np.std(
episodes_data['episodes_mean_reward'])
mean_cumulative_reward, std_cumulative_reward = np.mean(
episodes_data['episodes_cumulative_rewards']), np.std(
episodes_data['episodes_cumulative_rewards'])
episodes_data['episodes_cumulative_reward']), np.std(
episodes_data['episodes_cumulative_reward'])
mean_ep_length, std_ep_length = np.mean(
episodes_data['episodes_lengths']), np.std(
episodes_data['episodes_lengths'])
episodes_data['episodes_length']), np.std(
episodes_data['episodes_length'])
self.last_reward = mean_cumulative_reward

self.evaluation_metrics['mean_reward'] = mean_reward
self.evaluation_metrics['std_reward'] = std_reward
self.evaluation_metrics['mean_cumulative_reward'] = mean_cumulative_reward
self.evaluation_metrics['cumulative_reward'] = mean_cumulative_reward
self.evaluation_metrics['std_cumulative_reward'] = std_cumulative_reward
self.evaluation_metrics['mean_ep_length'] = mean_ep_length
self.evaluation_metrics['episode_length'] = mean_ep_length
self.evaluation_metrics['cumulative_power_consumption'] = np.mean(
episodes_data['episodes_cumulative_power'])
self.evaluation_metrics['mean_power_consumption'] = np.mean(
episodes_data['episodes_powers'])
episodes_data['episodes_mean_power'])
self.evaluation_metrics['comfort_violation(%)'] = np.mean(
episodes_data['episodes_comfort_violations'])
self.evaluation_metrics['comfort_penalty'] = np.mean(
episodes_data['episodes_comfort_penalties'])
self.evaluation_metrics['power_penalty'] = np.mean(
episodes_data['episodes_power_penalties'])
episodes_data['episodes_comfort_violation'])
self.evaluation_metrics['cumulative_comfort_penalty'] = np.mean(
episodes_data['episodes_cumulative_comfort_penalty'])
self.evaluation_metrics['mean_comfort_penalty'] = np.mean(
episodes_data['episodes_mean_comfort_penalty'])
self.evaluation_metrics['cumulative_energy_penalty'] = np.mean(
episodes_data['episodes_cumulative_energy_penalty'])
self.evaluation_metrics['mean_energy_penalty'] = np.mean(
episodes_data['episodes_mean_energy_penalty'])

if self.verbose >= 1:
print(
Expand Down
52 changes: 29 additions & 23 deletions sinergym/utils/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@ def evaluate_policy(
n_eval_episodes: int = 10,
deterministic: bool = True,
render: bool = False,
callback: Optional[Callable[[Dict[str, Any], Dict[str, Any]], None]] = None,
reward_threshold: Optional[float] = None,
return_episode_rewards: bool = False,
warn: bool = True,
callback: Optional[Callable[[Dict[str, Any], Dict[str, Any]], None]] = None
) -> Dict[str, list]:
"""
Runs policy for ``n_eval_episodes`` episodes and returns average reward and other Sinergym metrics.
Expand Down Expand Up @@ -51,13 +48,16 @@ def evaluate_policy(
(in number of steps).
"""
result = {
'episodes_cumulative_rewards': [],
'episodes_mean_rewards': [],
'episodes_lengths': [],
'episodes_powers': [],
'episodes_comfort_violations': [],
'episodes_comfort_penalties': [],
'episodes_power_penalties': []
'episodes_cumulative_reward': [],
'episodes_mean_reward': [],
'episodes_length': [],
'episodes_cumulative_power': [],
'episodes_mean_power': [],
'episodes_comfort_violation': [],
'episodes_cumulative_comfort_penalty': [],
'episodes_mean_comfort_penalty': [],
'episodes_cumulative_energy_penalty': [],
'episodes_mean_energy_penalty': []
}
episodes_executed = 0
while episodes_executed < n_eval_episodes:
Expand All @@ -68,7 +68,7 @@ def evaluate_policy(
episode_steps_comfort_violation = 0
episode_cumulative_power = 0.0
episode_cumulative_comfort_penalty = 0.0
episode_cumulative_power_penalty = 0.0
episode_cumulative_energy_penalty = 0.0
# ---------------------------------------------------------------------------- #
# Running episode and accumulate values #
# ---------------------------------------------------------------------------- #
Expand All @@ -78,9 +78,9 @@ def evaluate_policy(
obs, reward, terminated, _, info = env.step(action)
episode_cumulative_reward += reward
episode_cumulative_power += info['abs_energy']
episode_cumulative_power_penalty += info['energy_term']
episode_cumulative_energy_penalty += info['energy_term']
episode_cumulative_comfort_penalty += info['comfort_term']
if info['comfort_term'] != 0:
if info['comfort_term'] < 0:
episode_steps_comfort_violation += 1
if callback is not None:
callback(locals(), globals())
Expand All @@ -91,19 +91,25 @@ def evaluate_policy(
# ---------------------------------------------------------------------------- #
# Storing accumulated values in result #
# ---------------------------------------------------------------------------- #
result['episodes_cumulative_rewards'].append(episode_cumulative_reward)
result['episodes_mean_rewards'].append(
result['episodes_cumulative_reward'].append(episode_cumulative_reward)
result['episodes_mean_reward'].append(
episode_cumulative_reward / episode_length)
result['episodes_lengths'].append(episode_length)
result['episodes_powers'].append(episode_cumulative_power)
result['episodes_length'].append(episode_length)
result['episodes_cumulative_power'].append(episode_cumulative_power)
result['episodes_mean_power'].append(
episode_cumulative_power / episode_length)
try:
result['episodes_comfort_violations'].append(
result['episodes_comfort_violation'].append(
episode_steps_comfort_violation / episode_length * 100)
except ZeroDivisionError:
result['episodes_comfort_violations'].append(np.nan)
result['episodes_comfort_penalties'].append(
result['episodes_comfort_violation'].append(np.nan)
result['episodes_cumulative_comfort_penalty'].append(
episode_cumulative_comfort_penalty)
result['episodes_power_penalties'].append(
episode_cumulative_power_penalty)
result['episodes_mean_comfort_penalty'].append(
episode_cumulative_comfort_penalty / episode_length)
result['episodes_cumulative_energy_penalty'].append(
episode_cumulative_energy_penalty)
result['episodes_mean_energy_penalty'].append(
episode_cumulative_energy_penalty / episode_length)

return result

0 comments on commit 4999200

Please sign in to comment.