From 49992002acc3081f1cfa91c8c4394a9cb26f931e Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Wed, 13 Sep 2023 14:44:17 +0000 Subject: [PATCH] Evaluation SB3 logging callback: Added some metrics and fixed names --- sinergym/utils/callbacks.py | 93 +++++++++++++++++++++--------------- sinergym/utils/evaluation.py | 52 +++++++++++--------- 2 files changed, 84 insertions(+), 61 deletions(-) diff --git a/sinergym/utils/callbacks.py b/sinergym/utils/callbacks.py index bedf15d47b..e327524178 100644 --- a/sinergym/utils/callbacks.py +++ b/sinergym/utils/callbacks.py @@ -233,13 +233,16 @@ def __init__( self.log_path = log_path self.log_metrics = { 'timesteps': [], - 'mean_rewards': [], - 'cumulative_rewards': [], - 'ep_lengths': [], - 'ep_powers': [], - 'ep_comfort_violations': [], - 'episodes_comfort_penalties': [], - 'episodes_power_penalties': [], + 'episodes_mean_reward': [], + 'episodes_cumulative_reward': [], + 'episodes_length': [], + 'episodes_cumulative_power': [], + 'episodes_mean_power': [], + 'episodes_comfort_violation': [], + 'episodes_cumulative_comfort_penalty': [], + 'episodes_mean_comfort_penalty': [], + 'episodes_cumulative_energy_penalty': [], + 'episodes_mean_energy_penalty': [], } self.evaluations_results = [] self.evaluations_timesteps = [] @@ -252,7 +255,7 @@ def __init__( self.evaluations_power_consumption = [] self.evaluations_comfort_violation = [] self.evaluations_comfort_penalty = [] - self.evaluations_power_penalty = [] + self.evaluations_energy_penalty = [] self.evaluation_metrics = {} def _init_callback(self) -> None: @@ -310,14 +313,16 @@ def _on_step(self) -> bool: # We close training env before to start the evaluation self.training_env.close() - # episodes_rewards, episodes_lengths, episodes_powers, episodes_comfort_violations, episodes_comfort_penalties, episodes_power_penalties + # episodes_mean_reward, episodes_cumulative_reward, episodes_length, + # episodes_cumulative_power, episodes_mean_power, episodes_comfort_violation, + # episodes_cumulative_comfort_penalty, episodes_mean_comfort_penalty, + # episodes_cumulative_energy_penalty, episodes_mean_energy_penalty episodes_data = evaluate_policy( self.model, self.eval_env, n_eval_episodes=self.n_eval_episodes, render=self.render, deterministic=self.deterministic, - warn=self.warn, callback=self._log_success_callback, ) @@ -327,20 +332,26 @@ def _on_step(self) -> bool: if self.log_path is not None: self.log_metrics['timesteps'].append(self.num_timesteps) - self.log_metrics['cumulative_rewards'].append( - episodes_data['episodes_cumulative_rewards']) - self.log_metrics['mean_rewards'].append( - episodes_data['episodes_mean_rewards']) - self.log_metrics['ep_lengths'].append( - episodes_data['episodes_lengths']) - self.log_metrics['ep_powers'].append( - episodes_data['episodes_powers']) - self.log_metrics['ep_comfort_violations'].append( - episodes_data['episodes_comfort_violations']) - self.log_metrics['episodes_comfort_penalties'].append( - episodes_data['episodes_comfort_penalties']) - self.log_metrics['episodes_power_penalties'].append( - episodes_data['episodes_power_penalties']) + self.log_metrics['episodes_cumulative_reward'].append( + episodes_data['episodes_cumulative_reward']) + self.log_metrics['episodes_mean_reward'].append( + episodes_data['episodes_mean_reward']) + self.log_metrics['episodes_length'].append( + episodes_data['episodes_length']) + self.log_metrics['episodes_cumulative_power'].append( + episodes_data['episodes_cumulative_power']) + self.log_metrics['episodes_mean_power'].append( + episodes_data['episodes_mean_power']) + self.log_metrics['episodes_comfort_violation'].append( + episodes_data['episodes_comfort_violation']) + self.log_metrics['episodes_cumulative_comfort_penalty'].append( + episodes_data['episodes_cumulative_comfort_penalty']) + self.log_metrics['episodes_mean_comfort_penalty'].append( + episodes_data['episodes_mean_comfort_penalty']) + self.log_metrics['episodes_cumulative_energy_penalty'].append( + episodes_data['episodes_cumulative_energy_penalty']) + self.log_metrics['episodes_mean_energy_penalty'].append( + episodes_data['episodes_mean_energy_penalty']) kwargs = {} # Save success log if present @@ -356,29 +367,35 @@ def _on_step(self) -> bool: ) mean_reward, std_reward = np.mean( - episodes_data['episodes_mean_rewards']), np.std( - episodes_data['episodes_mean_rewards']) + episodes_data['episodes_mean_reward']), np.std( + episodes_data['episodes_mean_reward']) mean_cumulative_reward, std_cumulative_reward = np.mean( - episodes_data['episodes_cumulative_rewards']), np.std( - episodes_data['episodes_cumulative_rewards']) + episodes_data['episodes_cumulative_reward']), np.std( + episodes_data['episodes_cumulative_reward']) mean_ep_length, std_ep_length = np.mean( - episodes_data['episodes_lengths']), np.std( - episodes_data['episodes_lengths']) + episodes_data['episodes_length']), np.std( + episodes_data['episodes_length']) self.last_reward = mean_cumulative_reward self.evaluation_metrics['mean_reward'] = mean_reward self.evaluation_metrics['std_reward'] = std_reward - self.evaluation_metrics['mean_cumulative_reward'] = mean_cumulative_reward + self.evaluation_metrics['cumulative_reward'] = mean_cumulative_reward self.evaluation_metrics['std_cumulative_reward'] = std_cumulative_reward - self.evaluation_metrics['mean_ep_length'] = mean_ep_length + self.evaluation_metrics['episode_length'] = mean_ep_length + self.evaluation_metrics['cumulative_power_consumption'] = np.mean( + episodes_data['episodes_cumulative_power']) self.evaluation_metrics['mean_power_consumption'] = np.mean( - episodes_data['episodes_powers']) + episodes_data['episodes_mean_power']) self.evaluation_metrics['comfort_violation(%)'] = np.mean( - episodes_data['episodes_comfort_violations']) - self.evaluation_metrics['comfort_penalty'] = np.mean( - episodes_data['episodes_comfort_penalties']) - self.evaluation_metrics['power_penalty'] = np.mean( - episodes_data['episodes_power_penalties']) + episodes_data['episodes_comfort_violation']) + self.evaluation_metrics['cumulative_comfort_penalty'] = np.mean( + episodes_data['episodes_cumulative_comfort_penalty']) + self.evaluation_metrics['mean_comfort_penalty'] = np.mean( + episodes_data['episodes_mean_comfort_penalty']) + self.evaluation_metrics['cumulative_energy_penalty'] = np.mean( + episodes_data['episodes_cumulative_energy_penalty']) + self.evaluation_metrics['mean_energy_penalty'] = np.mean( + episodes_data['episodes_mean_energy_penalty']) if self.verbose >= 1: print( diff --git a/sinergym/utils/evaluation.py b/sinergym/utils/evaluation.py index a827ed0dcd..1e9378a4f8 100644 --- a/sinergym/utils/evaluation.py +++ b/sinergym/utils/evaluation.py @@ -14,10 +14,7 @@ def evaluate_policy( n_eval_episodes: int = 10, deterministic: bool = True, render: bool = False, - callback: Optional[Callable[[Dict[str, Any], Dict[str, Any]], None]] = None, - reward_threshold: Optional[float] = None, - return_episode_rewards: bool = False, - warn: bool = True, + callback: Optional[Callable[[Dict[str, Any], Dict[str, Any]], None]] = None ) -> Dict[str, list]: """ Runs policy for ``n_eval_episodes`` episodes and returns average reward and other Sinergym metrics. @@ -51,13 +48,16 @@ def evaluate_policy( (in number of steps). """ result = { - 'episodes_cumulative_rewards': [], - 'episodes_mean_rewards': [], - 'episodes_lengths': [], - 'episodes_powers': [], - 'episodes_comfort_violations': [], - 'episodes_comfort_penalties': [], - 'episodes_power_penalties': [] + 'episodes_cumulative_reward': [], + 'episodes_mean_reward': [], + 'episodes_length': [], + 'episodes_cumulative_power': [], + 'episodes_mean_power': [], + 'episodes_comfort_violation': [], + 'episodes_cumulative_comfort_penalty': [], + 'episodes_mean_comfort_penalty': [], + 'episodes_cumulative_energy_penalty': [], + 'episodes_mean_energy_penalty': [] } episodes_executed = 0 while episodes_executed < n_eval_episodes: @@ -68,7 +68,7 @@ def evaluate_policy( episode_steps_comfort_violation = 0 episode_cumulative_power = 0.0 episode_cumulative_comfort_penalty = 0.0 - episode_cumulative_power_penalty = 0.0 + episode_cumulative_energy_penalty = 0.0 # ---------------------------------------------------------------------------- # # Running episode and accumulate values # # ---------------------------------------------------------------------------- # @@ -78,9 +78,9 @@ def evaluate_policy( obs, reward, terminated, _, info = env.step(action) episode_cumulative_reward += reward episode_cumulative_power += info['abs_energy'] - episode_cumulative_power_penalty += info['energy_term'] + episode_cumulative_energy_penalty += info['energy_term'] episode_cumulative_comfort_penalty += info['comfort_term'] - if info['comfort_term'] != 0: + if info['comfort_term'] < 0: episode_steps_comfort_violation += 1 if callback is not None: callback(locals(), globals()) @@ -91,19 +91,25 @@ def evaluate_policy( # ---------------------------------------------------------------------------- # # Storing accumulated values in result # # ---------------------------------------------------------------------------- # - result['episodes_cumulative_rewards'].append(episode_cumulative_reward) - result['episodes_mean_rewards'].append( + result['episodes_cumulative_reward'].append(episode_cumulative_reward) + result['episodes_mean_reward'].append( episode_cumulative_reward / episode_length) - result['episodes_lengths'].append(episode_length) - result['episodes_powers'].append(episode_cumulative_power) + result['episodes_length'].append(episode_length) + result['episodes_cumulative_power'].append(episode_cumulative_power) + result['episodes_mean_power'].append( + episode_cumulative_power / episode_length) try: - result['episodes_comfort_violations'].append( + result['episodes_comfort_violation'].append( episode_steps_comfort_violation / episode_length * 100) except ZeroDivisionError: - result['episodes_comfort_violations'].append(np.nan) - result['episodes_comfort_penalties'].append( + result['episodes_comfort_violation'].append(np.nan) + result['episodes_cumulative_comfort_penalty'].append( episode_cumulative_comfort_penalty) - result['episodes_power_penalties'].append( - episode_cumulative_power_penalty) + result['episodes_mean_comfort_penalty'].append( + episode_cumulative_comfort_penalty / episode_length) + result['episodes_cumulative_energy_penalty'].append( + episode_cumulative_energy_penalty) + result['episodes_mean_energy_penalty'].append( + episode_cumulative_energy_penalty / episode_length) return result