From 09b3585a42ae9b499a7322d80eefd6a6fe469b99 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 19 Mar 2020 10:28:17 -0700 Subject: [PATCH 1/3] Visuallizer for rllib fix, add more logging to tensorboard --- .../exp_configs/non_rl/i210_subnetwork.py | 5 +-- examples/train.py | 33 ++++++++++++++++++- flow/core/rewards.py | 2 +- flow/utils/rllib.py | 6 +++- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index dd85c56cf..aa2151bf2 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -100,8 +100,9 @@ edge_id = "119257908#1-AddedOnRampEdge" custom_callables = { - "avg_merge_speed": lambda env: np.nan_to_num(np.mean( - env.k.vehicle.get_speed(env.k.vehicle.get_ids_by_edge(edge_id)))), + "avg_speed": lambda env: np.nan_to_num(np.mean([speed for speed + in env.k.vehicle.get_speed(env.k.vehicle.get_ids()) + if speed > 0])), "avg_outflow": lambda env: np.nan_to_num( env.k.vehicle.get_outflow_rate(120)), # we multiply by 5 to account for the vehicle length and by 1000 to convert diff --git a/examples/train.py b/examples/train.py index a159c13ee..4577d0bfe 100644 --- a/examples/train.py +++ b/examples/train.py @@ -8,11 +8,13 @@ """ import argparse +from copy import deepcopy import json import os import sys from time import strftime +import numpy as np from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv from stable_baselines import PPO2 @@ -25,8 +27,8 @@ from ray.rllib.agents.agent import get_agent_class except ImportError: from ray.rllib.agents.registry import get_agent_class -from copy import deepcopy +from flow.core.rewards import energy_consumption from flow.core.util import ensure_dir from flow.utils.registry import env_constructor from flow.utils.rllib import FlowParamsEncoder, get_flow_params @@ -152,6 +154,35 @@ def setup_exps_rllib(flow_params, config["num_sgd_iter"] = 10 config["horizon"] = horizon + # define some standard and useful callbacks + def on_episode_start(info): + episode = info["episode"] + episode.user_data["avg_speed"] = [] + episode.user_data["energy"] = [] + + def on_episode_step(info): + episode = info["episode"] + env = info["env"].get_unwrapped()[0] + speed = np.mean([speed for speed in env.k.vehicle.get_speed(env.k.vehicle.get_ids()) if speed > 0]) + if not np.isnan(speed): + episode.user_data["avg_speed"].append(speed) + energy = energy_consumption(env) + if not np.isnan(energy): + episode.user_data["energy"].append(energy) + + def on_episode_end(info): + episode = info["episode"] + avg_speed = np.mean(episode.user_data["avg_speed"]) + avg_energy = np.mean(episode.user_data["avg_energy"]) + + episode.custom_metrics["avg_speed"] = avg_speed + episode.custom_metrics["avg_energy"] = avg_energy + + config["callbacks"] = {"on_episode_start": tune.function(on_episode_start), + "on_episode_step": tune.function(on_episode_step), + "on_episode_end": tune.function(on_episode_end)} + + # save the flow params for replay flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) diff --git a/flow/core/rewards.py b/flow/core/rewards.py index 6de472af2..02fcad25b 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -329,4 +329,4 @@ def energy_consumption(env, gain=.001): power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed ** 3 - return -gain * power + return -gain * power / len(env.k.vehicle.get_ids()) diff --git a/flow/utils/rllib.py b/flow/utils/rllib.py index b5abc9a23..9195b524b 100644 --- a/flow/utils/rllib.py +++ b/flow/utils/rllib.py @@ -3,8 +3,9 @@ This includes: environment generation, serialization, and visualization. """ -import json from copy import deepcopy +import json +import sys import os import flow.envs @@ -207,6 +208,9 @@ def get_rllib_config(path): def get_rllib_pkl(path): """Return the data from the specified rllib configuration file.""" + dirname = os.getcwd() + filename = os.path.join(dirname, '../../examples/') + sys.path.append(filename) config_path = os.path.join(path, "params.pkl") if not os.path.exists(config_path): config_path = os.path.join(path, "../params.pkl") From 2b840b386661383579020de568303466aa59d6aa Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 19 Mar 2020 12:44:22 -0700 Subject: [PATCH 2/3] Add outflow to tensorboard --- examples/train.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/train.py b/examples/train.py index 4577d0bfe..225ac53ea 100644 --- a/examples/train.py +++ b/examples/train.py @@ -159,6 +159,7 @@ def on_episode_start(info): episode = info["episode"] episode.user_data["avg_speed"] = [] episode.user_data["energy"] = [] + episode.user_data["outflow"] = [] def on_episode_step(info): episode = info["episode"] @@ -172,11 +173,14 @@ def on_episode_step(info): def on_episode_end(info): episode = info["episode"] + env = info["env"].get_unwrapped()[0] avg_speed = np.mean(episode.user_data["avg_speed"]) avg_energy = np.mean(episode.user_data["avg_energy"]) episode.custom_metrics["avg_speed"] = avg_speed episode.custom_metrics["avg_energy"] = avg_energy + episode.custom_metrics["outflow"] = env.k.vehicle.get_outflow_rate() + config["callbacks"] = {"on_episode_start": tune.function(on_episode_start), "on_episode_step": tune.function(on_episode_step), From 8fe849373d1cdf632b22faef1c4ec5490f817aeb Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 19 Mar 2020 15:17:28 -0700 Subject: [PATCH 3/3] Flake8 --- examples/train.py | 4 +--- flow/utils/rllib.py | 1 - tests/fast_tests/test_rewards.py | 4 ++-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/examples/train.py b/examples/train.py index 225ac53ea..702aad43c 100644 --- a/examples/train.py +++ b/examples/train.py @@ -175,18 +175,16 @@ def on_episode_end(info): episode = info["episode"] env = info["env"].get_unwrapped()[0] avg_speed = np.mean(episode.user_data["avg_speed"]) - avg_energy = np.mean(episode.user_data["avg_energy"]) + avg_energy = np.mean(episode.user_data["energy"]) episode.custom_metrics["avg_speed"] = avg_speed episode.custom_metrics["avg_energy"] = avg_energy episode.custom_metrics["outflow"] = env.k.vehicle.get_outflow_rate() - config["callbacks"] = {"on_episode_start": tune.function(on_episode_start), "on_episode_step": tune.function(on_episode_step), "on_episode_end": tune.function(on_episode_end)} - # save the flow params for replay flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) diff --git a/flow/utils/rllib.py b/flow/utils/rllib.py index af8d8a37c..0206f1b61 100644 --- a/flow/utils/rllib.py +++ b/flow/utils/rllib.py @@ -5,7 +5,6 @@ """ from copy import deepcopy import json -import sys import os import sys diff --git a/tests/fast_tests/test_rewards.py b/tests/fast_tests/test_rewards.py index 3f2e08cde..487bda695 100644 --- a/tests/fast_tests/test_rewards.py +++ b/tests/fast_tests/test_rewards.py @@ -169,13 +169,13 @@ def test_energy_consumption(self): # change the speed of one vehicle env.k.vehicle.test_set_speed("test_0", 1) - self.assertEqual(energy_consumption(env), -12.059337750000001) + self.assertEqual(energy_consumption(env), -1.205933775) # check that stepping change the previous speeds and increases the energy consumption env.step(rl_actions=None) env.step(rl_actions=None) self.assertGreater(env.k.vehicle.get_previous_speed("test_0"), 0.0) - self.assertLess(energy_consumption(env), -12.059337750000001) + self.assertLess(energy_consumption(env), -1.205933775) def test_boolean_action_penalty(self): """Test the boolean_action_penalty method."""