Skip to content

Commit

Permalink
Merge pull request #154 from ibpsa/multi_dimensional_action
Browse files Browse the repository at this point in the history
Multi dimensional action
  • Loading branch information
javiarrobas authored Oct 3, 2024
2 parents 9335d8c + c2f1625 commit 4ebfb9a
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 9 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/github-actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,24 @@ jobs:
sudo apt-get install -y docker-compose
- name: Test local version
run: make test-local-in-container
test-multiaction:
runs-on: ubuntu-latest
defaults:
run:
working-directory: testing
steps:
- name: Check out repository code
uses: actions/checkout@v3
- name: Pull boptestgym image from registry
run: make pull-boptestgym
- name: Pull boptest_base image from registry
run: make pull-boptestbase
- name: Install Docker Compose
run: |
sudo apt-get update
sudo apt-get install -y docker-compose
- name: Test multi-action
run: make test-multiaction-in-container
test-vectorized:
runs-on: ubuntu-latest
defaults:
Expand Down
65 changes: 59 additions & 6 deletions boptestGymEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,8 +985,8 @@ def __init__(self, env, n_bins_act=10):
env: gym.Env
Original gym environment
n_bins_obs: integer
Number of bins to be used in the transformed observation space
for each observation.
Number of bins to be used in the transformed action space
for each action.
'''

Expand All @@ -1012,6 +1012,48 @@ def __init__(self, env, n_bins_act=10):

# Instantiate discretized action space
self.action_space = spaces.Discrete((n_bins_act+1) ** self.n_act)

def _get_indices(self, action_wrapper):
"""
Returns the indices of the discretized action space corresponding to the given action wrapper.
Parameters
----------
action_wrapper : int
The action wrapper value to be converted to indices.
Returns
-------
list
A list of indices representing the discretized action space.
Example
-------
Suppose:
self.n_act = 3 (number of actions)
self.n_bins_act = 3 (number of bins per action, this means 4 values possible per action)
self.val_bins_act = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23]] (value bins for each action)
Then, `_get_indices` example, for action_wrapper = 37:
indices = []
Loop 3 times:
Iteration 1: indices.append((37 % (3+1)) -> indices = [1], action_wrapper //= 4 -> action_wrapper = 9
Iteration 2: indices.append((9 % (3+1)) -> indices = [1, 1], action_wrapper //= 4 -> action_wrapper = 2
Iteration 3: indices.append((2 % (3+1)) -> indices = [1, 1, 2], action_wrapper //= 4 -> action_wrapper = 0
Reverse indices: [2, 1, 1]
Note
----
To understand why we need to add 1 in `action_wrapper%(self.n_bins_act+1)` think of the edge case
where we only have one bin. If the action_wrapper is 1, then the index should be 1, but if we do not
add 1 to `self.n_bins_act`, the index would be 0 (because 1%1=0). The underlying reason is that
n_bins_act is the number of bins, not the number of possible action values.
"""
indices=[]
for _ in range(self.n_act):
indices.append(action_wrapper%(self.n_bins_act+1))
action_wrapper //= self.n_bins_act
return indices[::-1]

def action(self, action_wrapper):
'''This method accepts a single parameter (the modified action
Expand All @@ -1030,17 +1072,28 @@ def action(self, action_wrapper):
Notes
-----
To better understand what this method needs to do, see how the
To better understand what this method needs to do, see what the
`gym.ActionWrapper` parent class is doing in `gym.core`:
Implement something here that performs the following mapping:
DiscretizedObservationWrapper.action_space --> DiscretizedActionWrapper.action_space
Example
-------
For action_wrapper = 37 (follows the example of `_get_indices` above):
indices = [2, 1, 1]
Map indices to action values:
bins[2] from [0, 1, 2, 3] -> 2
bins[1] from [10, 11, 12, 13] -> 11
bins[1] from [20, 21, 22, 23] -> 21
Convert to NumPy array: np.asarray([2, 11, 21])
Return action: [2, 11, 21]
'''

indices = self._get_indices(action_wrapper)
# Get the action values from bin indexes
action = [bins[x]
for x, bins in zip(action_wrapper.flatten(),
for x, bins in zip(indices,
self.val_bins_act)]

action = np.asarray(action).astype(self.env.action_space.dtype)
Expand Down
54 changes: 54 additions & 0 deletions examples/run_multiaction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import random
from stable_baselines3 import DQN
from boptestGymEnv import BoptestGymEnv, NormalizedObservationWrapper, DiscretizedActionWrapper

url = 'http://127.0.0.1:5000'
seed = 123456

# Seed for random starting times of episodes
random.seed(seed)

def train_multiaction():
'''Method to train a DQN agent with a multi-dimensional action environment.
'''

env = BoptestGymEnv(
url=url,
actions=['oveTZonSet_u', 'oveTSupSet_u', 'oveCO2ZonSet_u'],
observations={
'time': (0, 604800),
'reaTZon_y': (280., 310.),
'TDryBul': (265, 303),
'HDirNor': (0, 862),
'InternalGainsRad[1]': (0, 219),
'PriceElectricPowerHighlyDynamic': (-0.4, 0.4),
'LowerSetp[1]': (280., 310.),
'UpperSetp[1]': (280., 310.)
},
predictive_period=24*3600,
regressive_period=6*3600,
max_episode_length=24*3600,
warmup_period=24*3600,
step_period=3600,
random_start_time=False,
start_time=31*24*3600
)

# Normalize observations and discretize action space
env = NormalizedObservationWrapper(env)
env = DiscretizedActionWrapper(env, n_bins_act=10)

# Instantiate an RL agent
model = DQN('MlpPolicy', env, verbose=1, gamma=0.99,
learning_rate=5e-4, batch_size=24, seed=seed,
buffer_size=365*24, learning_starts=24, train_freq=1)

model.learn(total_timesteps=100)

return env, model





9 changes: 8 additions & 1 deletion releasenotes.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Release Notes

BOPTEST-Gym has two main dependencies: BOPTEST and Stable-Baselines3. For simplicity, the first two digits of the version number match the same two digits of the BOPTEST version of which BOPTEST-Gym is compatible with. For example, BOPTEST-Gym v0.3.x is compatible with BOPTEST v0.3.x. The last digit is reserved for other internal edits specific to this repository only. See [here](https://github.com/ibpsa/project1-boptest/blob/master/releasenotes.md) for BOPTEST release notes.
BOPTEST-Gym has two main dependencies: BOPTEST and Stable-Baselines3. For simplicity, the first two digits of the version number match the same two digits of the BOPTEST version of which BOPTEST-Gym is compatible with. For example, BOPTEST-Gym v0.6.x is compatible with BOPTEST v0.6.x. The last digit is reserved for other internal edits specific to this repository only. See [here](https://github.com/ibpsa/project1-boptest/blob/master/releasenotes.md) for BOPTEST release notes.


## BOPTEST-Gym v0.6.0-dev

Released on xx/xx/xxxx.

- Support for multi-dimensional action spaces. A multi-dimensional action space is tested in the `singlezone_commercial_hydronic` test case. This is for [#19](https://github.com/ibpsa/project1-boptest-gym/issues/19).

## BOPTEST-Gym v0.6.0

Expand Down
19 changes: 19 additions & 0 deletions testing/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,19 @@ run-boptest-case:
make download-boptest
cd project1-boptest-${BOPTEST_COMMIT} && \
TESTCASE=bestest_hydronic_heat_pump docker compose up -d
sleep 10

run-boptest-case-no-cache:
make download-boptest
cd project1-boptest-${BOPTEST_COMMIT} && \
TESTCASE=bestest_hydronic_heat_pump docker compose up -d --force-recreate --build
sleep 10

run-boptest-case-commercial:
make download-boptest
cd project1-boptest-${BOPTEST_COMMIT} && \
TESTCASE=singlezone_commercial_hydronic docker compose up -d && \
sleep 10

run-boptest-vectorized:
make download-boptest && \
Expand All @@ -114,6 +122,9 @@ cleanup-boptest:
test-local:
python3 -m unittest test_boptestGymEnv.BoptestGymEnvTest

test-multiaction:
python3 -m unittest test_boptestGymEnv.BoptestGymEnvMultiActTest

# Vectorized needs to run separate since modifies docker-compose.yml to have multiple boptest instances
test-vectorized:
python3 -m unittest test_boptestGymEnv.BoptestGymVecTest project1-boptest-${BOPTEST_COMMIT}
Expand All @@ -130,6 +141,14 @@ test-local-in-container:
make stop-boptest-case
make cleanup-boptest

test-multiaction-in-container:
make run-boptest-case-commercial
make run-boptestgym-detached
make exec-boptestgym ARGS="make test-multiaction"
make stop-boptestgym
make stop-boptest-case
make cleanup-boptest

test-vectorized-in-container:
make run-boptest-vectorized
make run-boptestgym-detached
Expand Down
2 changes: 2 additions & 0 deletions testing/references/multiaction_training.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
keys,value
0,841
2 changes: 1 addition & 1 deletion testing/references/vectorized_training.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
keys,value
0,0
0,0
27 changes: 26 additions & 1 deletion testing/test_boptestGymEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import shutil
from testing import utilities
from examples import run_baseline, run_sample, run_save_callback,\
run_variable_episode, run_vectorized, train_RL
run_variable_episode, run_vectorized, run_multiaction, train_RL
from collections import OrderedDict
from boptestGymEnv import BoptestGymEnv
from stable_baselines3.common.env_checker import check_env
Expand Down Expand Up @@ -576,5 +576,30 @@ def check_from_cell_output(self, cell_output, str_output):
# Check results
self.compare_ref_json(out_json, file_ref)

class BoptestGymEnvMultiActTest(unittest.TestCase, utilities.partialChecks):
''' Test multi-action training with the `singlezone_commercial_hydronic`
test case.
'''


def test_training_multi_action(self):
'''Checks an estimated action after an agent is trained in a multi-action environment.'''

# Train an agent in a multi-action environment.
self.env, model = run_multiaction.train_multiaction()

# Test one step with the trained model
obs = self.env.reset()[0]
df = pd.DataFrame([model.predict(obs)[0]], columns=['value'])
df.index.name = 'keys'
ref_filepath = os.path.join(utilities.get_root_path(),
'testing', 'references', 'multiaction_training.csv')
self.compare_ref_values_df(df, ref_filepath)

def tearDown(self):
'''Clean up after each test.'''
self.env.close()


if __name__ == '__main__':
utilities.run_tests(os.path.basename(__file__))

0 comments on commit 4ebfb9a

Please sign in to comment.