Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Visuallizer for rllib fix, add more logging to tensorboard #879

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions examples/exp_configs/non_rl/i210_subnetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,9 @@

edge_id = "119257908#1-AddedOnRampEdge"
custom_callables = {
"avg_merge_speed": lambda env: np.nan_to_num(np.mean(
env.k.vehicle.get_speed(env.k.vehicle.get_ids_by_edge(edge_id)))),
"avg_speed": lambda env: np.nan_to_num(np.mean([speed for speed
in env.k.vehicle.get_speed(env.k.vehicle.get_ids())
if speed > 0])),
"avg_outflow": lambda env: np.nan_to_num(
env.k.vehicle.get_outflow_rate(120)),
# we multiply by 5 to account for the vehicle length and by 1000 to convert
Expand Down
35 changes: 34 additions & 1 deletion examples/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
"""

import argparse
from copy import deepcopy
import json
import os
import sys
from time import strftime

import numpy as np
from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines import PPO2

Expand All @@ -25,8 +27,8 @@
from ray.rllib.agents.agent import get_agent_class
except ImportError:
from ray.rllib.agents.registry import get_agent_class
from copy import deepcopy

from flow.core.rewards import energy_consumption
from flow.core.util import ensure_dir
from flow.utils.registry import env_constructor
from flow.utils.rllib import FlowParamsEncoder, get_flow_params
Expand Down Expand Up @@ -152,6 +154,37 @@ def setup_exps_rllib(flow_params,
config["num_sgd_iter"] = 10
config["horizon"] = horizon

# define some standard and useful callbacks
def on_episode_start(info):
episode = info["episode"]
episode.user_data["avg_speed"] = []
episode.user_data["energy"] = []
episode.user_data["outflow"] = []

def on_episode_step(info):
episode = info["episode"]
env = info["env"].get_unwrapped()[0]
speed = np.mean([speed for speed in env.k.vehicle.get_speed(env.k.vehicle.get_ids()) if speed > 0])
if not np.isnan(speed):
episode.user_data["avg_speed"].append(speed)
energy = energy_consumption(env)
if not np.isnan(energy):
episode.user_data["energy"].append(energy)

def on_episode_end(info):
episode = info["episode"]
env = info["env"].get_unwrapped()[0]
avg_speed = np.mean(episode.user_data["avg_speed"])
avg_energy = np.mean(episode.user_data["energy"])

episode.custom_metrics["avg_speed"] = avg_speed
episode.custom_metrics["avg_energy"] = avg_energy
episode.custom_metrics["outflow"] = env.k.vehicle.get_outflow_rate()

config["callbacks"] = {"on_episode_start": tune.function(on_episode_start),
"on_episode_step": tune.function(on_episode_step),
"on_episode_end": tune.function(on_episode_end)}

# save the flow params for replay
flow_json = json.dumps(
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
Expand Down
2 changes: 1 addition & 1 deletion flow/core/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,4 +329,4 @@ def energy_consumption(env, gain=.001):

power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed ** 3

return -gain * power
return -gain * power / len(env.k.vehicle.get_ids())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you mean to add this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep!

2 changes: 1 addition & 1 deletion flow/utils/rllib.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

This includes: environment generation, serialization, and visualization.
"""
import json
from copy import deepcopy
import json
import os
import sys

Expand Down
4 changes: 2 additions & 2 deletions tests/fast_tests/test_rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,13 @@ def test_energy_consumption(self):

# change the speed of one vehicle
env.k.vehicle.test_set_speed("test_0", 1)
self.assertEqual(energy_consumption(env), -12.059337750000001)
self.assertEqual(energy_consumption(env), -1.205933775)

# check that stepping change the previous speeds and increases the energy consumption
env.step(rl_actions=None)
env.step(rl_actions=None)
self.assertGreater(env.k.vehicle.get_previous_speed("test_0"), 0.0)
self.assertLess(energy_consumption(env), -12.059337750000001)
self.assertLess(energy_consumption(env), -1.205933775)

def test_boolean_action_penalty(self):
"""Test the boolean_action_penalty method."""
Expand Down