From f9003a55d518dcac1a10f965055d937e82519c1f Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Fri, 4 Oct 2024 09:01:54 +0000 Subject: [PATCH 1/5] scripts/eval/load_agent.py: fix bug in example script (random agent loop instead of using loaded model) --- scripts/eval/load_agent.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/scripts/eval/load_agent.py b/scripts/eval/load_agent.py index 3e0d6f434..89da17531 100644 --- a/scripts/eval/load_agent.py +++ b/scripts/eval/load_agent.py @@ -91,7 +91,7 @@ # parse str parameters to sinergym Callable or Objects if it is # required if isinstance(value, str): - if '.' in value: + if '.' in value and '.txt' not in value: parameters[name] = eval(value) env = wrapper_class(env=env, ** parameters) @@ -152,25 +152,13 @@ for i in range(conf['episodes']): # Reset the environment to start a new episode obs, info = env.reset() - rewards = [] truncated = terminated = False - current_month = 0 while not (terminated or truncated): - # Random action control - a = env.action_space.sample() + # Use the agent to predict the next action + a, _ = model.predict(obs, deterministic=True) # Read observation and reward obs, reward, terminated, truncated, info = env.step(a) - rewards.append(reward) - # If this timestep is a new month start - if info['month'] != current_month: # display results every month - current_month = info['month'] - # Print information - logger.info('Reward: {}'.format(sum(rewards))) - logger.info('Info: {}'.format(info)) - # Final episode information print - logger.info( - 'Episode {} - Mean reward: {} - Cumulative Reward: {}'.format( - i, np.mean(rewards), sum(rewards))) + env.close() # ---------------------------------------------------------------------------- # From 594b4bea4f7d82cab927da2802507f1a69793fb7 Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Fri, 4 Oct 2024 09:02:54 +0000 Subject: [PATCH 2/5] Normalization calibration save: latest calibration saved in rootoutput folder and intermediate states in episode folders --- sinergym/utils/wrappers.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sinergym/utils/wrappers.py b/sinergym/utils/wrappers.py index 8c7bc79ae..de91b6ffd 100644 --- a/sinergym/utils/wrappers.py +++ b/sinergym/utils/wrappers.py @@ -287,21 +287,25 @@ def step(self, action: Union[int, np.ndarray]) -> Tuple[ def reset(self, **kwargs): """Resets the environment and normalizes the observation.""" + + # Update normalization calibration if it is required + if self.get_wrapper_attr('episode') > 0: + self._save_normalization_calibration() + obs, info = self.env.reset(**kwargs) # Save original obs in class attribute self.unwrapped_observation = deepcopy(obs) - # Update normalization calibration if it is required - self._save_normalization_calibration() - return self.normalize(np.array([obs]))[0], info def close(self): - """Close the environment and save normalization calibration.""" - self.env.close() + """save normalization calibration and close the environment.""" # Update normalization calibration if it is required - self._save_normalization_calibration() + if self.get_wrapper_attr('episode') > 0: + self._save_normalization_calibration() + + self.env.close() # ----------------------- Wrapper extra functionality ----------------------- # @@ -337,7 +341,12 @@ def _save_normalization_calibration(self): """ self.logger.info( 'Saving normalization calibration data.') - # Save in txt in output folder + # Save in txt in episode output folder + np.savetxt(fname=self.get_wrapper_attr( + 'episode_path') + '/mean.txt', X=self.mean) + np.savetxt(fname=self.get_wrapper_attr( + 'episode_path') + '/var.txt', X=self.var) + # Overwrite output root folder mean and var as latest calibration np.savetxt(fname=self.get_wrapper_attr( 'workspace_path') + '/mean.txt', X=self.mean) np.savetxt(fname=self.get_wrapper_attr( From b76fb0130bc9d3c0408c2a7fbd2e709d845b50b4 Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Fri, 4 Oct 2024 09:03:29 +0000 Subject: [PATCH 3/5] LoggerEvalCallback: Normalization calibration saved in evaluation folder (if normalization wrapper is applied) --- sinergym/utils/callbacks.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sinergym/utils/callbacks.py b/sinergym/utils/callbacks.py index b41c18387..08ed6454f 100644 --- a/sinergym/utils/callbacks.py +++ b/sinergym/utils/callbacks.py @@ -160,6 +160,18 @@ def _on_event(self) -> None: self.save_path, 'best_model.zip')) self.best_mean_reward = evaluation_summary['mean_reward'] + # Save normalization calibration if exists + if is_wrapped(self.eval_env, NormalizeObservation): + self.logger.info( + 'Save normalization calibration in evaluation folder') + np.savetxt( + fname=self.save_path + + '/mean.txt', + X=self.eval_env.mean) + np.savetxt( + fname=self.save_path + + '/var.txt', + X=self.eval_env.var) # We close evaluation env and starts training env again self.eval_env.close() From cf4310692dddf39e1d7c8a4ef42c0ef8144f762c Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Fri, 4 Oct 2024 09:03:54 +0000 Subject: [PATCH 4/5] Documentation: Indicate this changes in normalization wrapper --- docs/source/pages/wrappers.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/source/pages/wrappers.rst b/docs/source/pages/wrappers.rst index 9826b0db8..a44be198e 100644 --- a/docs/source/pages/wrappers.rst +++ b/docs/source/pages/wrappers.rst @@ -122,17 +122,17 @@ with caution. However, *Sinergym* enhances its functionality with some additional features: -- It includes the last unnormalized observation as an attribute, which is very useful for logging. +- It includes the last unnormalized observation as an environment attribute, which is very useful for logging. - It provides access to the means and variations used for normalization calibration, addressing the low-level issues found in the original wrapper. -- Similarly, these calibration values can be set via a method or in the constructor. - These values can be specified neither in list/numpy array format or writing the txt path +- Similarly, these calibration values can be set via a method or in the wrapper constructor. + These values can be specified neither in list/numpy array format or writing the txt file path previously generated. Refer to the :ref:`API reference` for more information. - The automatic calibration can be enabled or disabled as you interact with the environment, allowing the - calibration to remain static instead of adaptive. + calibration to remain static instead of adaptive (useful for model evaluations). In addition, this wrapper saves the values of **mean and var in txt files in the *Sinergym* output**. This should be used in case of evaluating the model later. @@ -140,6 +140,10 @@ An example of its use can be found in the use case :ref:`Loading a model`. It is also important that normalization calibration update is deactivated during evaluation processes. +Sinergym with this wrapper will save the mean and variance files in the root output directory as last +calibration values. However, it also will save the intermediate values as files in episode directories, and in the +best model found if :ref:`Evaluation Callback` is active in a DRL training process. + These functionalities are crucial when evaluating models trained using this wrapper. For more details, visit `#407 `__. From 1a94493cfbc39f95b80c53b5370a0d4230b5f7df Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Fri, 4 Oct 2024 09:04:16 +0000 Subject: [PATCH 5/5] Updated Sinergym version from 3.6.2 to 3.6.3 --- pyproject.toml | 2 +- sinergym/version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e704c2603..6b51891c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ package-mode = true name = "sinergym" -version = "3.6.2" +version = "3.6.3" description = "The goal of sinergym is to create an environment following OpenAI Gym interface for wrapping simulation engines for building control using deep reinforcement learning." license = "MIT" diff --git a/sinergym/version.txt b/sinergym/version.txt index 77a069e39..1ac53bb4b 100644 --- a/sinergym/version.txt +++ b/sinergym/version.txt @@ -1 +1 @@ -3.6.2 \ No newline at end of file +3.6.3 \ No newline at end of file