Skip to content

Commit

Permalink
Create test for vectorized environment.
Browse files Browse the repository at this point in the history
  • Loading branch information
javiarrobas committed Nov 14, 2023
1 parent 0c4a63b commit bda4405
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 7 deletions.
19 changes: 14 additions & 5 deletions examples/run_vectorized.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,20 +91,29 @@ def _init():

return _init

def train_DQN_vectorized(venv):
# Define logging directory. Monitoring data and agent model will be stored here
log_dir = os.path.join(utilities.get_root_path(), 'examples', 'agents', 'DQN_vectorized')
def train_DQN_vectorized(venv,
log_dir=os.path.join(utilities.get_root_path(), 'examples', 'agents', 'DQN_vectorized')):
'''Method to train DQN agent using vectorized environment.
Parameters
----------
venv: stable_baselines3.common.vec_env.SubprocVecEnv
vectorized environment to be learned.
'''

# Create logging directory if not exists. Monitoring data and agent model will be stored here
os.makedirs(log_dir, exist_ok=True)

# Modify the environment to include the callback
venv = VecMonitor(venv=venv, filename=os.path.join(log_dir,'monitor.csv'))

# Create the callback: evaluate with one episode after 100 steps for training. We keep it very short for testing.
# When using multiple environments, each call to ``env.step()`` will effectively correspond to ``n_envs`` steps.
# To account for that, you can use ``eval_freq = eval_freq/len(envs)``
# To account for that, you can use ``eval_freq = eval_freq/venv.num_envs``
eval_freq = 100
eval_callback = EvalCallback(venv, best_model_save_path=log_dir, log_path=log_dir,
eval_freq=int(eval_freq/len(envs)), n_eval_episodes=1, deterministic=True)
eval_freq=int(eval_freq/venv.num_envs), n_eval_episodes=1, deterministic=True)

# Try to find CUDA core since it's optimized for parallel computing tasks
device = 'cuda' if torch.cuda.is_available() else 'cpu'
Expand Down
47 changes: 45 additions & 2 deletions testing/test_boptestGymEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
import shutil
from testing import utilities
from examples import run_baseline, run_sample, run_save_callback,\
run_variable_episode, train_RL
run_variable_episode, run_vectorized, train_RL
from collections import OrderedDict
from boptestGymEnv import BoptestGymEnv
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3 import A2C, DQN

url = 'http://127.0.0.1:5000'

Expand Down Expand Up @@ -382,6 +383,48 @@ def test_variable_episode(self):

# Remove model to prove further testing
shutil.rmtree(log_dir, ignore_errors=True)

def test_vectorized(self, boptest_root = "./"):
'''
Instantiates a vectorized environment with two BOPTEST-Gym environment replicas
and learns from them when running in parallel using DQN for 100 timesteps.
It assumes that `generateDockerComposeYml.py` is called first using
`num_services=2` and `TESTCASE=bestest_hydronic_heat_pump docker-compose up`
is invoked after to initialize the two BOPTEST test cases.
Note that this test is also using the `EvalCallback` class from
`stable_baselines3.common.callbacks` instead of the
`boptestGymEnv.SaveAndTestCallback` that we typically use because
the former was more convenient for use with vectorized environments.
'''
# Define logging directory. Monitoring data and agent model will be stored here
log_dir = os.path.join(utilities.get_root_path(), 'examples', 'agents', 'DQN_vectorized')

# Use URLs obtained from docker-compose.yml
urls = run_vectorized.generate_urls_from_yml(boptest_root_dir=boptest_root)

# Create BOPTEST-Gym environment replicas
envs = [run_vectorized.make_env(url) for url in urls]

# Create a vectorized environment using SubprocVecEnv
venv = SubprocVecEnv(envs)

# Perform a short training example with parallel learning
run_vectorized.train_DQN_vectorized(venv, log_dir=log_dir)

# Load the trained agent
model = DQN.load(os.path.join(log_dir, 'best_model'))

# Test one step with the trained model
obs = venv.reset()[0]
df = pd.DataFrame([model.predict(obs)[0]], columns=['value'])
df.index.name = 'keys'
ref_filepath = os.path.join(utilities.get_root_path(),
'testing', 'references', 'vectorized_training.csv')
self.compare_ref_values_df(df, ref_filepath)

# Remove model to prove further testing
shutil.rmtree(log_dir, ignore_errors=True)

def check_obs_act_rew_kpi(self, obs=None, act=None, rew=None, kpi=None,
label='default'):
Expand Down

0 comments on commit bda4405

Please sign in to comment.