diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml index a526493..f381a48 100644 --- a/.github/workflows/github-actions.yml +++ b/.github/workflows/github-actions.yml @@ -24,6 +24,24 @@ jobs: sudo apt-get install -y docker-compose - name: Test local version run: make test-local-in-container + test-multiaction: + runs-on: ubuntu-latest + defaults: + run: + working-directory: testing + steps: + - name: Check out repository code + uses: actions/checkout@v3 + - name: Pull boptestgym image from registry + run: make pull-boptestgym + - name: Pull boptest_base image from registry + run: make pull-boptestbase + - name: Install Docker Compose + run: | + sudo apt-get update + sudo apt-get install -y docker-compose + - name: Test multi-action + run: make test-multiaction-in-container test-vectorized: runs-on: ubuntu-latest defaults: diff --git a/boptestGymEnv.py b/boptestGymEnv.py index 5b67306..9904da9 100644 --- a/boptestGymEnv.py +++ b/boptestGymEnv.py @@ -985,8 +985,8 @@ def __init__(self, env, n_bins_act=10): env: gym.Env Original gym environment n_bins_obs: integer - Number of bins to be used in the transformed observation space - for each observation. + Number of bins to be used in the transformed action space + for each action. ''' @@ -1012,6 +1012,48 @@ def __init__(self, env, n_bins_act=10): # Instantiate discretized action space self.action_space = spaces.Discrete((n_bins_act+1) ** self.n_act) + + def _get_indices(self, action_wrapper): + """ + Returns the indices of the discretized action space corresponding to the given action wrapper. + + Parameters + ---------- + action_wrapper : int + The action wrapper value to be converted to indices. + + Returns + ------- + list + A list of indices representing the discretized action space. + + Example + ------- + Suppose: + self.n_act = 3 (number of actions) + self.n_bins_act = 3 (number of bins per action, this means 4 values possible per action) + self.val_bins_act = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23]] (value bins for each action) + + Then, `_get_indices` example, for action_wrapper = 37: + indices = [] + Loop 3 times: + Iteration 1: indices.append((37 % (3+1)) -> indices = [1], action_wrapper //= 4 -> action_wrapper = 9 + Iteration 2: indices.append((9 % (3+1)) -> indices = [1, 1], action_wrapper //= 4 -> action_wrapper = 2 + Iteration 3: indices.append((2 % (3+1)) -> indices = [1, 1, 2], action_wrapper //= 4 -> action_wrapper = 0 + Reverse indices: [2, 1, 1] + + Note + ---- + To understand why we need to add 1 in `action_wrapper%(self.n_bins_act+1)` think of the edge case + where we only have one bin. If the action_wrapper is 1, then the index should be 1, but if we do not + add 1 to `self.n_bins_act`, the index would be 0 (because 1%1=0). The underlying reason is that + n_bins_act is the number of bins, not the number of possible action values. + """ + indices=[] + for _ in range(self.n_act): + indices.append(action_wrapper%(self.n_bins_act+1)) + action_wrapper //= self.n_bins_act + return indices[::-1] def action(self, action_wrapper): '''This method accepts a single parameter (the modified action @@ -1030,17 +1072,28 @@ def action(self, action_wrapper): Notes ----- - To better understand what this method needs to do, see how the + To better understand what this method needs to do, see what the `gym.ActionWrapper` parent class is doing in `gym.core`: Implement something here that performs the following mapping: DiscretizedObservationWrapper.action_space --> DiscretizedActionWrapper.action_space - + + Example + ------- + For action_wrapper = 37 (follows the example of `_get_indices` above): + + indices = [2, 1, 1] + Map indices to action values: + bins[2] from [0, 1, 2, 3] -> 2 + bins[1] from [10, 11, 12, 13] -> 11 + bins[1] from [20, 21, 22, 23] -> 21 + Convert to NumPy array: np.asarray([2, 11, 21]) + Return action: [2, 11, 21] ''' - + indices = self._get_indices(action_wrapper) # Get the action values from bin indexes action = [bins[x] - for x, bins in zip(action_wrapper.flatten(), + for x, bins in zip(indices, self.val_bins_act)] action = np.asarray(action).astype(self.env.action_space.dtype) diff --git a/examples/run_multiaction.py b/examples/run_multiaction.py new file mode 100644 index 0000000..dcc44d6 --- /dev/null +++ b/examples/run_multiaction.py @@ -0,0 +1,54 @@ +import random +from stable_baselines3 import DQN +from boptestGymEnv import BoptestGymEnv, NormalizedObservationWrapper, DiscretizedActionWrapper + +url = 'http://127.0.0.1:5000' +seed = 123456 + +# Seed for random starting times of episodes +random.seed(seed) + +def train_multiaction(): + '''Method to train a DQN agent with a multi-dimensional action environment. + + ''' + + env = BoptestGymEnv( + url=url, + actions=['oveTZonSet_u', 'oveTSupSet_u', 'oveCO2ZonSet_u'], + observations={ + 'time': (0, 604800), + 'reaTZon_y': (280., 310.), + 'TDryBul': (265, 303), + 'HDirNor': (0, 862), + 'InternalGainsRad[1]': (0, 219), + 'PriceElectricPowerHighlyDynamic': (-0.4, 0.4), + 'LowerSetp[1]': (280., 310.), + 'UpperSetp[1]': (280., 310.) + }, + predictive_period=24*3600, + regressive_period=6*3600, + max_episode_length=24*3600, + warmup_period=24*3600, + step_period=3600, + random_start_time=False, + start_time=31*24*3600 + ) + + # Normalize observations and discretize action space + env = NormalizedObservationWrapper(env) + env = DiscretizedActionWrapper(env, n_bins_act=10) + + # Instantiate an RL agent + model = DQN('MlpPolicy', env, verbose=1, gamma=0.99, + learning_rate=5e-4, batch_size=24, seed=seed, + buffer_size=365*24, learning_starts=24, train_freq=1) + + model.learn(total_timesteps=100) + + return env, model + + + + + diff --git a/releasenotes.md b/releasenotes.md index 9c696a0..b05e18e 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -1,6 +1,13 @@ # Release Notes -BOPTEST-Gym has two main dependencies: BOPTEST and Stable-Baselines3. For simplicity, the first two digits of the version number match the same two digits of the BOPTEST version of which BOPTEST-Gym is compatible with. For example, BOPTEST-Gym v0.3.x is compatible with BOPTEST v0.3.x. The last digit is reserved for other internal edits specific to this repository only. See [here](https://github.com/ibpsa/project1-boptest/blob/master/releasenotes.md) for BOPTEST release notes. +BOPTEST-Gym has two main dependencies: BOPTEST and Stable-Baselines3. For simplicity, the first two digits of the version number match the same two digits of the BOPTEST version of which BOPTEST-Gym is compatible with. For example, BOPTEST-Gym v0.6.x is compatible with BOPTEST v0.6.x. The last digit is reserved for other internal edits specific to this repository only. See [here](https://github.com/ibpsa/project1-boptest/blob/master/releasenotes.md) for BOPTEST release notes. + + +## BOPTEST-Gym v0.6.0-dev + +Released on xx/xx/xxxx. + +- Support for multi-dimensional action spaces. A multi-dimensional action space is tested in the `singlezone_commercial_hydronic` test case. This is for [#19](https://github.com/ibpsa/project1-boptest-gym/issues/19). ## BOPTEST-Gym v0.6.0 diff --git a/testing/Makefile b/testing/Makefile index ea04040..8784ad0 100644 --- a/testing/Makefile +++ b/testing/Makefile @@ -90,11 +90,19 @@ run-boptest-case: make download-boptest cd project1-boptest-${BOPTEST_COMMIT} && \ TESTCASE=bestest_hydronic_heat_pump docker compose up -d + sleep 10 run-boptest-case-no-cache: make download-boptest cd project1-boptest-${BOPTEST_COMMIT} && \ TESTCASE=bestest_hydronic_heat_pump docker compose up -d --force-recreate --build + sleep 10 + +run-boptest-case-commercial: + make download-boptest + cd project1-boptest-${BOPTEST_COMMIT} && \ + TESTCASE=singlezone_commercial_hydronic docker compose up -d && \ + sleep 10 run-boptest-vectorized: make download-boptest && \ @@ -114,6 +122,9 @@ cleanup-boptest: test-local: python3 -m unittest test_boptestGymEnv.BoptestGymEnvTest +test-multiaction: + python3 -m unittest test_boptestGymEnv.BoptestGymEnvMultiActTest + # Vectorized needs to run separate since modifies docker-compose.yml to have multiple boptest instances test-vectorized: python3 -m unittest test_boptestGymEnv.BoptestGymVecTest project1-boptest-${BOPTEST_COMMIT} @@ -130,6 +141,14 @@ test-local-in-container: make stop-boptest-case make cleanup-boptest +test-multiaction-in-container: + make run-boptest-case-commercial + make run-boptestgym-detached + make exec-boptestgym ARGS="make test-multiaction" + make stop-boptestgym + make stop-boptest-case + make cleanup-boptest + test-vectorized-in-container: make run-boptest-vectorized make run-boptestgym-detached diff --git a/testing/references/multiaction_training.csv b/testing/references/multiaction_training.csv new file mode 100644 index 0000000..8b4092a --- /dev/null +++ b/testing/references/multiaction_training.csv @@ -0,0 +1,2 @@ +keys,value +0,841 diff --git a/testing/references/vectorized_training.csv b/testing/references/vectorized_training.csv index 59144a1..2d768af 100644 --- a/testing/references/vectorized_training.csv +++ b/testing/references/vectorized_training.csv @@ -1,2 +1,2 @@ keys,value -0,0 +0,0 \ No newline at end of file diff --git a/testing/test_boptestGymEnv.py b/testing/test_boptestGymEnv.py index bfed024..0447c28 100644 --- a/testing/test_boptestGymEnv.py +++ b/testing/test_boptestGymEnv.py @@ -13,7 +13,7 @@ import shutil from testing import utilities from examples import run_baseline, run_sample, run_save_callback,\ - run_variable_episode, run_vectorized, train_RL + run_variable_episode, run_vectorized, run_multiaction, train_RL from collections import OrderedDict from boptestGymEnv import BoptestGymEnv from stable_baselines3.common.env_checker import check_env @@ -576,5 +576,30 @@ def check_from_cell_output(self, cell_output, str_output): # Check results self.compare_ref_json(out_json, file_ref) +class BoptestGymEnvMultiActTest(unittest.TestCase, utilities.partialChecks): + ''' Test multi-action training with the `singlezone_commercial_hydronic` + test case. + ''' + + + def test_training_multi_action(self): + '''Checks an estimated action after an agent is trained in a multi-action environment.''' + + # Train an agent in a multi-action environment. + self.env, model = run_multiaction.train_multiaction() + + # Test one step with the trained model + obs = self.env.reset()[0] + df = pd.DataFrame([model.predict(obs)[0]], columns=['value']) + df.index.name = 'keys' + ref_filepath = os.path.join(utilities.get_root_path(), + 'testing', 'references', 'multiaction_training.csv') + self.compare_ref_values_df(df, ref_filepath) + + def tearDown(self): + '''Clean up after each test.''' + self.env.close() + + if __name__ == '__main__': utilities.run_tests(os.path.basename(__file__))