From bda440514187991731175364091a23fd0afbb24a Mon Sep 17 00:00:00 2001
From: Javier Arroyo <javier.arroyo@kuleuven.be>
Date: Tue, 14 Nov 2023 17:36:02 +0100
Subject: [PATCH] Create test for vectorized environment.

---
 examples/run_vectorized.py    | 19 ++++++++++----
 testing/test_boptestGymEnv.py | 47 +++++++++++++++++++++++++++++++++--
 2 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/examples/run_vectorized.py b/examples/run_vectorized.py
index 8509be5..aa33988 100644
--- a/examples/run_vectorized.py
+++ b/examples/run_vectorized.py
@@ -91,9 +91,18 @@ def _init():
 
     return _init
 
-def train_DQN_vectorized(venv):
-    # Define logging directory. Monitoring data and agent model will be stored here
-    log_dir = os.path.join(utilities.get_root_path(), 'examples', 'agents', 'DQN_vectorized')
+def train_DQN_vectorized(venv, 
+                         log_dir=os.path.join(utilities.get_root_path(), 'examples', 'agents', 'DQN_vectorized')):
+    '''Method to train DQN agent using vectorized environment. 
+
+    Parameters
+    ----------
+    venv: stable_baselines3.common.vec_env.SubprocVecEnv
+        vectorized environment to be learned. 
+
+    '''
+
+    # Create logging directory if not exists. Monitoring data and agent model will be stored here
     os.makedirs(log_dir, exist_ok=True)
 
     # Modify the environment to include the callback
@@ -101,10 +110,10 @@ def train_DQN_vectorized(venv):
             
     # Create the callback: evaluate with one episode after 100 steps for training. We keep it very short for testing.
     # When using multiple environments, each call to ``env.step()`` will effectively correspond to ``n_envs`` steps. 
-    # To account for that, you can use ``eval_freq = eval_freq/len(envs)``
+    # To account for that, you can use ``eval_freq = eval_freq/venv.num_envs``
     eval_freq = 100
     eval_callback = EvalCallback(venv, best_model_save_path=log_dir, log_path=log_dir, 
-                                 eval_freq=int(eval_freq/len(envs)), n_eval_episodes=1, deterministic=True)
+                                 eval_freq=int(eval_freq/venv.num_envs), n_eval_episodes=1, deterministic=True)
 
     # Try to find CUDA core since it's optimized for parallel computing tasks
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
diff --git a/testing/test_boptestGymEnv.py b/testing/test_boptestGymEnv.py
index b57b0ac..8aedf36 100644
--- a/testing/test_boptestGymEnv.py
+++ b/testing/test_boptestGymEnv.py
@@ -12,11 +12,12 @@
 import shutil
 from testing import utilities
 from examples import run_baseline, run_sample, run_save_callback,\
-    run_variable_episode, train_RL
+    run_variable_episode, run_vectorized, train_RL
 from collections import OrderedDict
 from boptestGymEnv import BoptestGymEnv
 from stable_baselines3.common.env_checker import check_env
-from stable_baselines3 import A2C
+from stable_baselines3.common.vec_env import SubprocVecEnv
+from stable_baselines3 import A2C, DQN
 
 url = 'http://127.0.0.1:5000'
 
@@ -382,6 +383,48 @@ def test_variable_episode(self):
         
         # Remove model to prove further testing
         shutil.rmtree(log_dir, ignore_errors=True)
+    
+    def test_vectorized(self, boptest_root = "./"):
+        '''
+        Instantiates a vectorized environment with two BOPTEST-Gym environment replicas
+        and learns from them when running in parallel using DQN for 100 timesteps.
+        It assumes that `generateDockerComposeYml.py` is called first using
+        `num_services=2` and `TESTCASE=bestest_hydronic_heat_pump docker-compose up` 
+        is invoked after to initialize the two BOPTEST test cases. 
+        Note that this test is also using the `EvalCallback` class from 
+        `stable_baselines3.common.callbacks` instead of the
+        `boptestGymEnv.SaveAndTestCallback` that we typically use because 
+        the former was more convenient for use with vectorized environments. 
+
+        '''
+        # Define logging directory. Monitoring data and agent model will be stored here
+        log_dir = os.path.join(utilities.get_root_path(), 'examples', 'agents', 'DQN_vectorized')
+
+        # Use URLs obtained from docker-compose.yml
+        urls = run_vectorized.generate_urls_from_yml(boptest_root_dir=boptest_root)
+
+        # Create BOPTEST-Gym environment replicas
+        envs = [run_vectorized.make_env(url) for url in urls]
+        
+        # Create a vectorized environment using SubprocVecEnv
+        venv = SubprocVecEnv(envs)
+        
+        # Perform a short training example with parallel learning
+        run_vectorized.train_DQN_vectorized(venv, log_dir=log_dir)  
+        
+        # Load the trained agent
+        model = DQN.load(os.path.join(log_dir, 'best_model'))
+        
+        # Test one step with the trained model
+        obs = venv.reset()[0]
+        df = pd.DataFrame([model.predict(obs)[0]], columns=['value'])
+        df.index.name = 'keys'
+        ref_filepath    = os.path.join(utilities.get_root_path(), 
+                            'testing', 'references', 'vectorized_training.csv')
+        self.compare_ref_values_df(df, ref_filepath)
+        
+        # Remove model to prove further testing
+        shutil.rmtree(log_dir, ignore_errors=True)     
         
     def check_obs_act_rew_kpi(self, obs=None, act=None, rew=None, kpi=None,
                               label='default'):