ugr-sail · AlejandroCN7 · Oct 4, 2024 · Oct 4, 2024 · Oct 4, 2024 · Oct 4, 2024
diff --git a/docs/source/pages/wrappers.rst b/docs/source/pages/wrappers.rst
@@ -122,24 +122,28 @@ with caution.
 
 However, *Sinergym* enhances its functionality with some additional features:
 
-- It includes the last unnormalized observation as an attribute, which is very useful for logging.
+- It includes the last unnormalized observation as an environment attribute, which is very useful for logging.
 
 - It provides access to the means and variations used for normalization calibration, addressing the low-level 
   issues found in the original wrapper.
 
-- Similarly, these calibration values can be set via a method or in the constructor. 
-  These values can be specified neither in list/numpy array format or writing the txt path 
+- Similarly, these calibration values can be set via a method or in the wrapper constructor. 
+  These values can be specified neither in list/numpy array format or writing the txt file path 
   previously generated. Refer to the :ref:`API reference` for more information.
 
 - The automatic calibration can be enabled or disabled as you interact with the environment, allowing the 
-  calibration to remain static instead of adaptive.
+  calibration to remain static instead of adaptive (useful for model evaluations).
 
 In addition, this wrapper saves the values of **mean and var in txt files in the 
 *Sinergym* output**. This should be used in case of evaluating the model later. 
 An example of its use can be found in the use case :ref:`Loading a model`. It is
 also important that normalization calibration update is deactivated during evaluation
 processes.
 
+Sinergym with this wrapper will save the mean and variance files in the root output directory as last
+calibration values. However, it also will save the intermediate values as files in episode directories, and in the
+best model found if :ref:`Evaluation Callback` is active in a DRL training process.
+
 These functionalities are crucial when evaluating models trained using this wrapper. 
 For more details, visit `#407 <https://github.com/ugr-sail/sinergym/issues/407>`__.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@
 package-mode = true
 name = "sinergym"
 
-version = "3.6.2"
+version = "3.6.3"
 description = "The goal of sinergym is to create an environment following OpenAI Gym interface for wrapping simulation engines for building control using deep reinforcement learning."
 license = "MIT"
 

diff --git a/scripts/eval/load_agent.py b/scripts/eval/load_agent.py
@@ -91,7 +91,7 @@
                 # parse str parameters to sinergym Callable or Objects if it is
                 # required
                 if isinstance(value, str):
-                    if '.' in value:
+                    if '.' in value and '.txt' not in value:
                         parameters[name] = eval(value)
             env = wrapper_class(env=env, ** parameters)
 
@@ -152,25 +152,13 @@
     for i in range(conf['episodes']):
         # Reset the environment to start a new episode
         obs, info = env.reset()
-        rewards = []
         truncated = terminated = False
-        current_month = 0
         while not (terminated or truncated):
-            # Random action control
-            a = env.action_space.sample()
+            # Use the agent to predict the next action
+            a, _ = model.predict(obs, deterministic=True)
             # Read observation and reward
             obs, reward, terminated, truncated, info = env.step(a)
-            rewards.append(reward)
-            # If this timestep is a new month start
-            if info['month'] != current_month:  # display results every month
-                current_month = info['month']
-                # Print information
-                logger.info('Reward: {}'.format(sum(rewards)))
-                logger.info('Info: {}'.format(info))
-                # Final episode information print
-                logger.info(
-                    'Episode {} - Mean reward: {} - Cumulative Reward: {}'.format(
-                        i, np.mean(rewards), sum(rewards)))
+
     env.close()
 
     # ---------------------------------------------------------------------------- #

diff --git a/sinergym/utils/callbacks.py b/sinergym/utils/callbacks.py
@@ -160,6 +160,18 @@ def _on_event(self) -> None:
                     self.save_path,
                     'best_model.zip'))
             self.best_mean_reward = evaluation_summary['mean_reward']
+            # Save normalization calibration if exists
+            if is_wrapped(self.eval_env, NormalizeObservation):
+                self.logger.info(
+                    'Save normalization calibration in evaluation folder')
+                np.savetxt(
+                    fname=self.save_path +
+                    '/mean.txt',
+                    X=self.eval_env.mean)
+                np.savetxt(
+                    fname=self.save_path +
+                    '/var.txt',
+                    X=self.eval_env.var)
 
         # We close evaluation env and starts training env again
         self.eval_env.close()

diff --git a/sinergym/utils/wrappers.py b/sinergym/utils/wrappers.py
@@ -287,21 +287,25 @@ def step(self, action: Union[int, np.ndarray]) -> Tuple[
 
     def reset(self, **kwargs):
         """Resets the environment and normalizes the observation."""
+
+        # Update normalization calibration if it is required
+        if self.get_wrapper_attr('episode') > 0:
+            self._save_normalization_calibration()
+
         obs, info = self.env.reset(**kwargs)
 
         # Save original obs in class attribute
         self.unwrapped_observation = deepcopy(obs)
 
-        # Update normalization calibration if it is required
-        self._save_normalization_calibration()
-
         return self.normalize(np.array([obs]))[0], info
 
     def close(self):
-        """Close the environment and save normalization calibration."""
-        self.env.close()
+        """save normalization calibration and close the environment."""
         # Update normalization calibration if it is required
-        self._save_normalization_calibration()
+        if self.get_wrapper_attr('episode') > 0:
+            self._save_normalization_calibration()
+
+        self.env.close()
 
     # ----------------------- Wrapper extra functionality ----------------------- #
 
@@ -337,7 +341,12 @@ def _save_normalization_calibration(self):
         """
         self.logger.info(
             'Saving normalization calibration data.')
-        # Save in txt in output folder
+        # Save in txt in episode output folder
+        np.savetxt(fname=self.get_wrapper_attr(
+            'episode_path') + '/mean.txt', X=self.mean)
+        np.savetxt(fname=self.get_wrapper_attr(
+            'episode_path') + '/var.txt', X=self.var)
+        # Overwrite output root folder mean and var as latest calibration
         np.savetxt(fname=self.get_wrapper_attr(
             'workspace_path') + '/mean.txt', X=self.mean)
         np.savetxt(fname=self.get_wrapper_attr(

diff --git a/sinergym/version.txt b/sinergym/version.txt
@@ -1 +1 @@
-3.6.2
+3.6.3