Merge pull request #154 from ibpsa/multi_dimensional_action

Multi dimensional action
ibpsa · Oct 3, 2024 · 4ebfb9a · 4ebfb9a
2 parents 9335d8c + c2f1625
commit 4ebfb9a
Show file tree

Hide file tree

Showing 8 changed files with 187 additions and 9 deletions.
diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
@@ -24,6 +24,24 @@ jobs:
           sudo apt-get install -y docker-compose
       - name: Test local version
         run: make test-local-in-container
+  test-multiaction:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: testing
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v3
+      - name: Pull boptestgym image from registry
+        run: make pull-boptestgym
+      - name: Pull boptest_base image from registry
+        run: make pull-boptestbase
+      - name: Install Docker Compose
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y docker-compose
+      - name: Test multi-action
+        run: make test-multiaction-in-container
   test-vectorized:
     runs-on: ubuntu-latest
     defaults:

diff --git a/boptestGymEnv.py b/boptestGymEnv.py
@@ -985,8 +985,8 @@ def __init__(self, env, n_bins_act=10):
         env: gym.Env
             Original gym environment
         n_bins_obs: integer
-            Number of bins to be used in the transformed observation space
-            for each observation. 
+            Number of bins to be used in the transformed action space
+            for each action. 
         
         '''
 
@@ -1012,6 +1012,48 @@ def __init__(self, env, n_bins_act=10):
 
         # Instantiate discretized action space
         self.action_space = spaces.Discrete((n_bins_act+1) ** self.n_act)
+
+    def _get_indices(self, action_wrapper):
+        """
+        Returns the indices of the discretized action space corresponding to the given action wrapper.
+        
+        Parameters
+        ----------
+        action_wrapper : int
+            The action wrapper value to be converted to indices.
+        
+        Returns
+        -------
+        list
+            A list of indices representing the discretized action space.
+
+        Example
+        -------
+        Suppose:
+        self.n_act = 3 (number of actions)
+        self.n_bins_act = 3 (number of bins per action, this means 4 values possible per action)
+        self.val_bins_act = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23]] (value bins for each action)
+        
+        Then, `_get_indices` example, for action_wrapper = 37:
+        indices = []
+        Loop 3 times:
+        Iteration 1: indices.append((37 % (3+1)) -> indices = [1], action_wrapper //= 4 -> action_wrapper = 9
+        Iteration 2: indices.append((9 % (3+1)) -> indices = [1, 1], action_wrapper //= 4 -> action_wrapper = 2
+        Iteration 3: indices.append((2 % (3+1)) -> indices = [1, 1, 2], action_wrapper //= 4 -> action_wrapper = 0
+        Reverse indices: [2, 1, 1]
+
+        Note
+        ----
+        To understand why we need to add 1 in `action_wrapper%(self.n_bins_act+1)` think of the edge case
+        where we only have one bin. If the action_wrapper is 1, then the index should be 1, but if we do not
+        add 1 to `self.n_bins_act`, the index would be 0 (because 1%1=0). The underlying reason is that 
+        n_bins_act is the number of bins, not the number of possible action values.
+        """
+        indices=[]
+        for _ in range(self.n_act):
+            indices.append(action_wrapper%(self.n_bins_act+1))
+            action_wrapper //= self.n_bins_act
+        return indices[::-1]    
 
     def action(self, action_wrapper):
         '''This method accepts a single parameter (the modified action
@@ -1030,17 +1072,28 @@ def action(self, action_wrapper):
         
         Notes
         -----
-        To better understand what this method needs to do, see how the 
+        To better understand what this method needs to do, see what the 
         `gym.ActionWrapper` parent class is doing in `gym.core`:
         
         Implement something here that performs the following mapping:
         DiscretizedObservationWrapper.action_space --> DiscretizedActionWrapper.action_space
-        
+
+        Example
+        -------
+        For action_wrapper = 37 (follows the example of `_get_indices` above):
+
+        indices = [2, 1, 1]
+        Map indices to action values:
+        bins[2] from [0, 1, 2, 3] -> 2
+        bins[1] from [10, 11, 12, 13] -> 11
+        bins[1] from [20, 21, 22, 23] -> 21
+        Convert to NumPy array: np.asarray([2, 11, 21])
+        Return action: [2, 11, 21]
         '''
-
+        indices = self._get_indices(action_wrapper)
         # Get the action values from bin indexes
         action = [bins[x]
-                  for x, bins in zip(action_wrapper.flatten(), 
+                  for x, bins in zip(indices, 
                                      self.val_bins_act)]
 
         action = np.asarray(action).astype(self.env.action_space.dtype)

diff --git a/examples/run_multiaction.py b/examples/run_multiaction.py
@@ -0,0 +1,54 @@
+import random
+from stable_baselines3 import DQN
+from boptestGymEnv import BoptestGymEnv, NormalizedObservationWrapper, DiscretizedActionWrapper
+
+url = 'http://127.0.0.1:5000'
+seed = 123456
+
+# Seed for random starting times of episodes
+random.seed(seed)
+
+def train_multiaction():
+    '''Method to train a DQN agent with a multi-dimensional action environment. 
+
+    '''
+
+    env = BoptestGymEnv(
+            url=url,
+            actions=['oveTZonSet_u', 'oveTSupSet_u', 'oveCO2ZonSet_u'],
+            observations={
+                'time': (0, 604800),
+                'reaTZon_y': (280., 310.),
+                'TDryBul': (265, 303),
+                'HDirNor': (0, 862),
+                'InternalGainsRad[1]': (0, 219),
+                'PriceElectricPowerHighlyDynamic': (-0.4, 0.4),
+                'LowerSetp[1]': (280., 310.),
+                'UpperSetp[1]': (280., 310.)
+            },
+            predictive_period=24*3600,
+            regressive_period=6*3600,
+            max_episode_length=24*3600,
+            warmup_period=24*3600,
+            step_period=3600,
+            random_start_time=False,
+            start_time=31*24*3600
+        )
+
+    # Normalize observations and discretize action space
+    env = NormalizedObservationWrapper(env)
+    env = DiscretizedActionWrapper(env, n_bins_act=10)
+
+    # Instantiate an RL agent
+    model = DQN('MlpPolicy', env, verbose=1, gamma=0.99,
+                learning_rate=5e-4, batch_size=24, seed=seed,
+                buffer_size=365*24, learning_starts=24, train_freq=1)
+
+    model.learn(total_timesteps=100)
+
+    return env, model
+
+
+
+
+
diff --git a/releasenotes.md b/releasenotes.md
@@ -1,6 +1,13 @@
 # Release Notes
 
-BOPTEST-Gym has two main dependencies: BOPTEST and Stable-Baselines3. For simplicity, the first two digits of the version number match the same two digits of the BOPTEST version of which BOPTEST-Gym is compatible with. For example, BOPTEST-Gym v0.3.x is compatible with BOPTEST v0.3.x. The last digit is reserved for other internal edits specific to this repository only. See [here](https://github.com/ibpsa/project1-boptest/blob/master/releasenotes.md) for BOPTEST release notes. 
+BOPTEST-Gym has two main dependencies: BOPTEST and Stable-Baselines3. For simplicity, the first two digits of the version number match the same two digits of the BOPTEST version of which BOPTEST-Gym is compatible with. For example, BOPTEST-Gym v0.6.x is compatible with BOPTEST v0.6.x. The last digit is reserved for other internal edits specific to this repository only. See [here](https://github.com/ibpsa/project1-boptest/blob/master/releasenotes.md) for BOPTEST release notes. 
+
+
+## BOPTEST-Gym v0.6.0-dev
+
+Released on xx/xx/xxxx.
+
+- Support for multi-dimensional action spaces. A multi-dimensional action space is tested in the `singlezone_commercial_hydronic` test case. This is for [#19](https://github.com/ibpsa/project1-boptest-gym/issues/19). 
 
 ## BOPTEST-Gym v0.6.0
 

diff --git a/testing/Makefile b/testing/Makefile
@@ -90,11 +90,19 @@ run-boptest-case:
 	make download-boptest
 	cd project1-boptest-${BOPTEST_COMMIT} && \
 	TESTCASE=bestest_hydronic_heat_pump docker compose up -d
+	sleep 10
 
 run-boptest-case-no-cache:
 	make download-boptest
 	cd project1-boptest-${BOPTEST_COMMIT} && \
 	TESTCASE=bestest_hydronic_heat_pump docker compose up -d --force-recreate --build
+	sleep 10
+
+run-boptest-case-commercial:
+	make download-boptest
+	cd project1-boptest-${BOPTEST_COMMIT} && \
+	TESTCASE=singlezone_commercial_hydronic docker compose up -d && \
+	sleep 10
 
 run-boptest-vectorized:
 	make download-boptest && \
@@ -114,6 +122,9 @@ cleanup-boptest:
 test-local:
 	python3 -m unittest test_boptestGymEnv.BoptestGymEnvTest
 
+test-multiaction:
+	python3 -m unittest test_boptestGymEnv.BoptestGymEnvMultiActTest
+
 # Vectorized needs to run separate since modifies docker-compose.yml to have multiple boptest instances 
 test-vectorized:
 	python3 -m unittest test_boptestGymEnv.BoptestGymVecTest project1-boptest-${BOPTEST_COMMIT}
@@ -130,6 +141,14 @@ test-local-in-container:
 	make stop-boptest-case
 	make cleanup-boptest
 
+test-multiaction-in-container:
+	make run-boptest-case-commercial
+	make run-boptestgym-detached
+	make exec-boptestgym ARGS="make test-multiaction"
+	make stop-boptestgym
+	make stop-boptest-case
+	make cleanup-boptest
+
 test-vectorized-in-container:
 	make run-boptest-vectorized
 	make run-boptestgym-detached

diff --git a/testing/references/multiaction_training.csv b/testing/references/multiaction_training.csv
@@ -0,0 +1,2 @@
+keys,value
+0,841
diff --git a/testing/references/vectorized_training.csv b/testing/references/vectorized_training.csv
@@ -1,2 +1,2 @@
 keys,value
-0,0
+0,0
diff --git a/testing/test_boptestGymEnv.py b/testing/test_boptestGymEnv.py
@@ -13,7 +13,7 @@
 import shutil
 from testing import utilities
 from examples import run_baseline, run_sample, run_save_callback,\
-    run_variable_episode, run_vectorized, train_RL
+    run_variable_episode, run_vectorized, run_multiaction, train_RL
 from collections import OrderedDict
 from boptestGymEnv import BoptestGymEnv
 from stable_baselines3.common.env_checker import check_env
@@ -576,5 +576,30 @@ def check_from_cell_output(self, cell_output, str_output):
         # Check results
         self.compare_ref_json(out_json, file_ref)
 
+class BoptestGymEnvMultiActTest(unittest.TestCase, utilities.partialChecks):
+    ''' Test multi-action training with the `singlezone_commercial_hydronic`
+    test case. 
+    '''
+
+
+    def test_training_multi_action(self):
+        '''Checks an estimated action after an agent is trained in a multi-action environment.'''
+
+        # Train an agent in a multi-action environment.
+        self.env, model = run_multiaction.train_multiaction()
+
+        # Test one step with the trained model
+        obs = self.env.reset()[0]
+        df = pd.DataFrame([model.predict(obs)[0]], columns=['value'])
+        df.index.name = 'keys'
+        ref_filepath    = os.path.join(utilities.get_root_path(), 
+                            'testing', 'references', 'multiaction_training.csv')
+        self.compare_ref_values_df(df, ref_filepath)
+
+    def tearDown(self):
+        '''Clean up after each test.'''
+        self.env.close()
+
+
 if __name__ == '__main__':
     utilities.run_tests(os.path.basename(__file__))