diff --git a/.gitignore b/.gitignore index 29d788c27..6d9ff7a71 100644 --- a/.gitignore +++ b/.gitignore @@ -111,3 +111,5 @@ flow.ang *.ang.old *.sang +# local configuration file for data pipeline +**/data_pipeline_config diff --git a/environment.yml b/environment.yml index f57c8d33d..ecbe5785f 100644 --- a/environment.yml +++ b/environment.yml @@ -1,18 +1,17 @@ name: flow dependencies: - - python==3.6.8 - - scipy==1.1.0 - - lxml==4.4.1 - - six==1.11.0 - - path.py - - python-dateutil==2.7.3 - - pip>=18.0 - - tensorflow==1.9.0 - - cloudpickle==1.2.1 - - setuptools==41.0.0 - - plotly==2.4.0 + - python==3.7.3 - pip: + - scipy==1.1.0 + - lxml==4.4.1 + - six==1.11.0 + - path.py + - python-dateutil==2.7.3 + - pip>=18.0 + - tensorflow==1.15.2 + - setuptools==41.0.0 + - plotly==2.4.0 - gym==0.14.0 - pyprind==2.11.2 - nose2==0.8.0 @@ -21,9 +20,11 @@ dependencies: - matplotlib==3.0.0 - dill - lz4 - - ray==0.7.3 + - ray==0.8.0 - setproctitle - psutil - opencv-python - - boto3==1.4.8 + - boto3==1.10.45 - redis~=2.10.6 + - tabulate + - pytz diff --git a/examples/README.md b/examples/README.md index a9d681131..8156831fe 100644 --- a/examples/README.md +++ b/examples/README.md @@ -53,11 +53,11 @@ trained through RL algorithms provided by *RLlib*. To execute these examples, run ```shell script -python train.py EXP_CONFIG --rl_trainer "rllib" +python train.py EXP_CONFIG --rl_trainer "rllib" --algorithm ``` where `EXP_CONFIG` is the name of the experiment configuration file, as located -in `exp_configs/rl/singleagent` or `exp_configs/rl/multiagent.` - +in `exp_configs/rl/singleagent` or `exp_configs/rl/multiagent.` Here `` +should be the name of your desired algorithm. Currently we support PPO and TD3. ### stable-baselines diff --git a/examples/exp_configs/non_rl/highway_single.py b/examples/exp_configs/non_rl/highway_single.py index 0ced89f27..fcd2f2da4 100644 --- a/examples/exp_configs/non_rl/highway_single.py +++ b/examples/exp_configs/non_rl/highway_single.py @@ -1,5 +1,9 @@ -"""Example of an open network with human-driven vehicles.""" +"""Example of an open network with human-driven vehicles and a wave.""" + +import numpy as np + from flow.controllers import IDMController +from flow.controllers.velocity_controllers import FollowerStopper from flow.core.params import EnvParams from flow.core.params import NetParams from flow.core.params import InitialConfig @@ -7,6 +11,7 @@ from flow.core.params import VehicleParams from flow.core.params import SumoParams from flow.core.params import SumoLaneChangeParams +from flow.core.rewards import instantaneous_mpg from flow.core.params import SumoCarFollowingParams from flow.networks import HighwayNetwork from flow.envs import TestEnv @@ -22,6 +27,8 @@ HORIZON = 1500 # whether to include noise in the car-following models INCLUDE_NOISE = True +# penetration rate of the follower-stopper vehicles +PENETRATION_RATE = 0.0 additional_net_params = ADDITIONAL_NET_PARAMS.copy() additional_net_params.update({ @@ -59,15 +66,38 @@ ), ) +if PENETRATION_RATE > 0.0: + vehicles.add( + "av", + color='red', + num_vehicles=0, + acceleration_controller=(FollowerStopper, { + "v_des": 5.0, + "control_length": [500, 2300] + }), + ) + inflows = InFlows() + inflows.add( veh_type="human", edge="highway_0", - vehs_per_hour=TRAFFIC_FLOW, + vehs_per_hour=int(TRAFFIC_FLOW * (1 - PENETRATION_RATE / 100)), depart_lane="free", depart_speed=TRAFFIC_SPEED, name="idm_highway_inflow") +if PENETRATION_RATE > 0.0: + inflows.add( + veh_type="av", + edge="highway_0", + vehs_per_hour=int(TRAFFIC_FLOW * (PENETRATION_RATE / 100)), + depart_lane="free", + depart_speed=TRAFFIC_SPEED, + name="av_highway_inflow") + +# SET UP FLOW PARAMETERS + flow_params = dict( # name of the experiment exp_tag='highway-single', @@ -111,3 +141,13 @@ # reset (see flow.core.params.InitialConfig) initial=InitialConfig(), ) + +custom_callables = { + "avg_merge_speed": lambda env: np.nan_to_num(np.mean( + env.k.vehicle.get_speed(env.k.vehicle.get_ids()))), + "avg_outflow": lambda env: np.nan_to_num( + env.k.vehicle.get_outflow_rate(120)), + "miles_per_gallon": lambda env: np.nan_to_num( + instantaneous_mpg(env, env.k.vehicle.get_ids(), gain=1.0) + ) +} diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index b0c58c894..0c66f42e7 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -2,8 +2,9 @@ import os import numpy as np -from flow.controllers import IDMController -from flow.controllers import I210Router +from flow.controllers.car_following_models import IDMController +from flow.controllers.velocity_controllers import FollowerStopper +from flow.controllers.routing_controllers import I210Router from flow.core.params import SumoParams from flow.core.params import EnvParams from flow.core.params import NetParams @@ -11,9 +12,11 @@ from flow.core.params import VehicleParams from flow.core.params import InitialConfig from flow.core.params import InFlows -import flow.config as config +from flow.core.rewards import instantaneous_mpg +from flow.networks import I210SubNetwork +from flow.networks.i210_subnetwork import EDGES_DISTRIBUTION from flow.envs import TestEnv -from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION +import flow.config as config # =========================================================================== # # Specify some configurable constants. # @@ -27,8 +30,14 @@ ON_RAMP = False # the inflow rate of vehicles (in veh/hr) INFLOW_RATE = 2050 +# on-ramp inflow_rate +ON_RAMP_INFLOW_RATE = 500 # the speed of inflowing vehicles from the main edge (in m/s) INFLOW_SPEED = 25.5 +# fraction of vehicles that are follower-stoppers. 0.10 corresponds to 10% +PENETRATION_RATE = 0.0 +# desired speed of the follower stopper vehicles +V_DES = 5.0 # horizon over which to run the env HORIZON = 1500 # steps to run before follower-stopper is allowed to take control @@ -63,11 +72,12 @@ # =========================================================================== # vehicles = VehicleParams() + vehicles.add( "human", num_vehicles=0, lane_change_params=SumoLaneChangeParams( - lane_change_mode="strategic", + lane_change_mode="sumo_default", ), acceleration_controller=(IDMController, { "a": 1.3, @@ -77,29 +87,54 @@ routing_controller=(I210Router, {}) if ON_RAMP else None, ) +vehicles.add( + "av", + num_vehicles=0, + color="red", + acceleration_controller=(FollowerStopper, { + "v_des": V_DES, + "no_control_edges": ["ghost0", "119257908#3"] + }), + routing_controller=(I210Router, {}) if ON_RAMP else None, +) + inflow = InFlows() + # main highway +highway_start_edge = "ghost0" if WANT_GHOST_CELL else "119257914" + for lane in [0, 1, 2, 3, 4]: inflow.add( veh_type="human", - edge="ghost0" if WANT_GHOST_CELL else "119257914", - vehs_per_hour=INFLOW_RATE, - departLane=lane, - departSpeed=INFLOW_SPEED) + edge=highway_start_edge, + vehs_per_hour=INFLOW_RATE * (1 - PENETRATION_RATE), + depart_lane=lane, + depart_speed=INFLOW_SPEED) + + if PENETRATION_RATE > 0.0: + inflow.add( + veh_type="av", + edge=highway_start_edge, + vehs_per_hour=INFLOW_RATE * PENETRATION_RATE, + depart_lane=lane, + depart_speed=INFLOW_SPEED) + # on ramp if ON_RAMP: inflow.add( veh_type="human", edge="27414345", - vehs_per_hour=500, - departLane="random", - departSpeed=10) - inflow.add( - veh_type="human", - edge="27414342#0", - vehs_per_hour=500, - departLane="random", - departSpeed=10) + vehs_per_hour=int(ON_RAMP_INFLOW_RATE * (1 - PENETRATION_RATE)), + depart_speed=10, + ) + + if PENETRATION_RATE > 0.0: + inflow.add( + veh_type="av", + edge="27414345", + vehs_per_hour=int(ON_RAMP_INFLOW_RATE * PENETRATION_RATE), + depart_lane="random", + depart_speed=10) # =========================================================================== # # Generate the flow_params dict with all relevant simulation information. # @@ -122,7 +157,7 @@ sim=SumoParams( sim_step=0.4, render=False, - color_by_speed=True, + color_by_speed=False, use_ballistic=True ), @@ -160,14 +195,21 @@ # =========================================================================== # edge_id = "119257908#1-AddedOnRampEdge" + + +def valid_ids(env, veh_ids): + """Return the names of vehicles within the controllable edges.""" + return [ + veh_id for veh_id in veh_ids + if env.k.vehicle.get_edge(veh_id) not in ["ghost0", "119257908#3"] + ] + + custom_callables = { "avg_merge_speed": lambda env: np.nan_to_num(np.mean( - env.k.vehicle.get_speed(env.k.vehicle.get_ids_by_edge(edge_id)))), + env.k.vehicle.get_speed(valid_ids(env, env.k.vehicle.get_ids())))), "avg_outflow": lambda env: np.nan_to_num( env.k.vehicle.get_outflow_rate(120)), - # we multiply by 5 to account for the vehicle length and by 1000 to convert - # into veh/km - "avg_density": lambda env: 5 * 1000 * len(env.k.vehicle.get_ids_by_edge( - edge_id)) / (env.k.network.edge_length(edge_id) - * env.k.network.num_lanes(edge_id)), + "mpg": lambda env: instantaneous_mpg( + env, valid_ids(env, env.k.vehicle.get_ids()), gain=1.0), } diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index a6d194708..3a8207eb8 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -4,118 +4,217 @@ highway with ramps network. """ import os +import numpy as np -from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy from ray.tune.registry import register_env +from flow.controllers import RLController +from flow.controllers.routing_controllers import I210Router +from flow.controllers.car_following_models import IDMController import flow.config as config -from flow.controllers.rlcontroller import RLController from flow.core.params import EnvParams from flow.core.params import NetParams from flow.core.params import InitialConfig from flow.core.params import InFlows from flow.core.params import VehicleParams from flow.core.params import SumoParams +from flow.core.params import SumoCarFollowingParams from flow.core.params import SumoLaneChangeParams -from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION +from flow.core.rewards import energy_consumption from flow.envs.multiagent.i210 import I210MultiEnv, ADDITIONAL_ENV_PARAMS from flow.utils.registry import make_create_env +from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION -# SET UP PARAMETERS FOR THE SIMULATION +# =========================================================================== # +# Specify some configurable constants. # +# =========================================================================== # -# number of training iterations -N_TRAINING_ITERATIONS = 200 -# number of rollouts per training iteration -N_ROLLOUTS = 2 -# number of steps per rollout -HORIZON = 500 -# number of parallel workers -N_CPUS = 1 +# whether to include the downstream slow-down edge in the network as well as a +# ghost cell at the upstream edge +WANT_BOUNDARY_CONDITIONS = True +# whether to include vehicles on the on-ramp +ON_RAMP = False +# the inflow rate of vehicles (in veh/hr) +INFLOW_RATE = 2050 +# the inflow rate on the on-ramp (in veh/hr) +ON_RAMP_INFLOW_RATE = 500 +# the speed of inflowing vehicles from the main edge (in m/s) +INFLOW_SPEED = 25.5 +# fraction of vehicles that are RL vehicles. 0.10 corresponds to 10% +PENETRATION_RATE = 0.05 +# desired speed of the vehicles in the network +V_DES = 5.0 +# horizon over which to run the env +HORIZON = 1000 +# steps to run before follower-stopper is allowed to take control +WARMUP_STEPS = 600 +# whether to turn off the fail safes for the human-driven vehicles +ALLOW_COLLISIONS = False -# percentage of autonomous vehicles compared to human vehicles on highway -PENETRATION_RATE = 10 +# =========================================================================== # +# Specify the path to the network template. # +# =========================================================================== # -# TODO: temporary fix +if WANT_BOUNDARY_CONDITIONS: + NET_TEMPLATE = os.path.join( + config.PROJECT_PATH, + "examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_" + "downstream.xml") +else: + NET_TEMPLATE = os.path.join( + config.PROJECT_PATH, + "examples/exp_configs/templates/sumo/test2.net.xml") edges_distribution = EDGES_DISTRIBUTION.copy() -edges_distribution.remove("ghost0") -# SET UP PARAMETERS FOR THE ENVIRONMENT +# =========================================================================== # +# Set up parameters for the environment. # +# =========================================================================== # + additional_env_params = ADDITIONAL_ENV_PARAMS.copy() additional_env_params.update({ - 'max_accel': 1, - 'max_decel': 1, - # configure the observation space. Look at the I210MultiEnv class for more info. + 'max_accel': 2.6, + 'max_decel': 4.5, + + # configure the observation space. Look at the I210MultiEnv class for more + # info. 'lead_obs': True, + # whether to add in a reward for the speed of nearby vehicles + "local_reward": True, + # whether to use the MPG reward. Otherwise, defaults to a target velocity + # reward + "mpg_reward": False, + # whether to use the MPJ reward. Otherwise, defaults to a target velocity + # reward + "mpj_reward": False, + # how many vehicles to look back for any reward + "look_back_length": 3, + # whether to reroute vehicles once they have exited + "reroute_on_exit": False, + 'target_velocity': 5.0, + # how many AVs there can be at once (this is only for centralized critics) + "max_num_agents": 10, + # which edges we shouldn't apply control on + "no_control_edges": ["ghost0", "119257908#3"], + + # whether to add a slight reward for opening up a gap that will be annealed + # out N iterations in + "headway_curriculum": False, + # how many timesteps to anneal the headway curriculum over + "headway_curriculum_iters": 100, + # weight of the headway reward + "headway_reward_gain": 2.0, + # desired time headway + "min_time_headway": 2.0, + + # whether to add a slight reward for traveling at a desired speed + "speed_curriculum": True, + # how many timesteps to anneal the headway curriculum over + "speed_curriculum_iters": 20, + # weight of the headway reward + "speed_reward_gain": 5.0, + # penalize stopped vehicles + "penalize_stops": False, + "stop_penalty": 0.01, + + # penalize accels + "penalize_accel": False, + "accel_penalty": (1 / 400.0) }) -# CREATE VEHICLE TYPES AND INFLOWS -# no vehicles in the network +# =========================================================================== # +# Specify vehicle-specific information and inflows. # +# =========================================================================== # + +# create the base vehicle types that will be used for inflows vehicles = VehicleParams() -vehicles.add( - "human", - num_vehicles=0, - lane_change_params=SumoLaneChangeParams( - lane_change_mode="strategic", +if ON_RAMP: + vehicles.add( + "human", + num_vehicles=0, + routing_controller=(I210Router, {}), + acceleration_controller=(IDMController, { + 'a': 1.3, + 'b': 2.0, + 'noise': 0.3 + }), + car_following_params=SumoCarFollowingParams( + speed_mode=19 if ALLOW_COLLISIONS else 'right_of_way' + ), + lane_change_params=SumoLaneChangeParams( + lane_change_mode="sumo_default", + ), + ) +else: + vehicles.add( + "human", + num_vehicles=0, + acceleration_controller=(IDMController, { + 'a': 1.3, + 'b': 2.0, + 'noise': 0.3 + }), + car_following_params=SumoCarFollowingParams( + speed_mode=19 if ALLOW_COLLISIONS else 'right_of_way' + ), + lane_change_params=SumoLaneChangeParams( + lane_change_mode="sumo_default", + ), ) -) vehicles.add( "av", - acceleration_controller=(RLController, {}), num_vehicles=0, + acceleration_controller=(RLController, {}), ) inflow = InFlows() -# main highway -pen_rate = PENETRATION_RATE / 100 -assert pen_rate < 1.0, "your penetration rate is over 100%" -assert pen_rate > 0.0, "your penetration rate should be above zero" -inflow.add( - veh_type="human", - edge="119257914", - vehs_per_hour=8378 * pen_rate, - # probability=1.0, - departLane="random", - departSpeed=20) -# on ramp -# inflow.add( -# veh_type="human", -# edge="27414345", -# vehs_per_hour=321 * pen_rate, -# departLane="random", -# departSpeed=20) -# inflow.add( -# veh_type="human", -# edge="27414342#0", -# vehs_per_hour=421 * pen_rate, -# departLane="random", -# departSpeed=20) - -# Now add the AVs -# main highway -inflow.add( - veh_type="av", - edge="119257914", - vehs_per_hour=int(8378 * pen_rate), - # probability=1.0, - departLane="random", - departSpeed=20) -# # on ramp -# inflow.add( -# veh_type="av", -# edge="27414345", -# vehs_per_hour=int(321 * pen_rate), -# departLane="random", -# departSpeed=20) -# inflow.add( -# veh_type="av", -# edge="27414342#0", -# vehs_per_hour=int(421 * pen_rate), -# departLane="random", -# departSpeed=20) - -NET_TEMPLATE = os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/test2.net.xml") +for lane in [0, 1, 2, 3, 4]: + if WANT_BOUNDARY_CONDITIONS: + # Add the inflows from the main highway. + inflow.add( + veh_type="human", + edge="ghost0", + vehs_per_hour=int(INFLOW_RATE * (1 - PENETRATION_RATE)), + departLane=lane, + departSpeed=INFLOW_SPEED) + inflow.add( + veh_type="av", + edge="ghost0", + vehs_per_hour=int(INFLOW_RATE * PENETRATION_RATE), + departLane=lane, + departSpeed=INFLOW_SPEED) + else: + # Add the inflows from the main highway. + inflow.add( + veh_type="human", + edge="119257914", + vehs_per_hour=int(INFLOW_RATE * (1 - PENETRATION_RATE)), + departLane=lane, + departSpeed=INFLOW_SPEED) + inflow.add( + veh_type="av", + edge="119257914", + vehs_per_hour=int(INFLOW_RATE * PENETRATION_RATE), + departLane=lane, + departSpeed=INFLOW_SPEED) + + # Add the inflows from the on-ramps. + if ON_RAMP: + inflow.add( + veh_type="human", + edge="27414345", + vehs_per_hour=int(ON_RAMP_INFLOW_RATE * (1 - PENETRATION_RATE)), + departLane="random", + departSpeed=10) + inflow.add( + veh_type="human", + edge="27414342#0", + vehs_per_hour=int(ON_RAMP_INFLOW_RATE * (1 - PENETRATION_RATE)), + departLane="random", + departSpeed=10) + +# =========================================================================== # +# Generate the flow_params dict with all relevant simulation information. # +# =========================================================================== # flow_params = dict( # name of the experiment @@ -132,17 +231,21 @@ # simulation-related parameters sim=SumoParams( - sim_step=0.8, + sim_step=0.4, render=False, - color_by_speed=True, - restart_instance=True + color_by_speed=False, + restart_instance=True, + use_ballistic=True, + disable_collisions=True ), # environment related parameters (see flow.core.params.EnvParams) env=EnvParams( horizon=HORIZON, - sims_per_step=1, + sims_per_step=3, + warmup_steps=WARMUP_STEPS, additional_params=additional_env_params, + done_at_exit=not additional_env_params["reroute_on_exit"] ), # network-related parameters (see flow.core.params.NetParams and the @@ -151,8 +254,8 @@ inflows=inflow, template=NET_TEMPLATE, additional_params={ - "on_ramp": False, - "ghost_edge": False + "on_ramp": ON_RAMP, + "ghost_edge": WANT_BOUNDARY_CONDITIONS } ), @@ -167,19 +270,21 @@ ), ) -# SET UP RLLIB MULTI-AGENT FEATURES +# =========================================================================== # +# Set up rllib multi-agent features. # +# =========================================================================== # create_env, env_name = make_create_env(params=flow_params, version=0) # register as rllib env register_env(env_name, create_env) -# multiagent configuration +# multi-agent configuration test_env = create_env() obs_space = test_env.observation_space act_space = test_env.action_space -POLICY_GRAPHS = {'av': (PPOTFPolicy, obs_space, act_space, {})} +POLICY_GRAPHS = {'av': (None, obs_space, act_space, {})} POLICIES_TO_TRAIN = ['av'] @@ -187,3 +292,14 @@ def policy_mapping_fn(_): """Map a policy in RLlib.""" return 'av' + + +custom_callables = { + "avg_speed": lambda env: np.mean([speed for speed in + env.k.vehicle.get_speed(env.k.vehicle.get_ids()) if speed >= 0]), + "avg_outflow": lambda env: np.nan_to_num(env.k.vehicle.get_outflow_rate(120)), + "avg_energy": lambda env: -1 * energy_consumption(env, 0.1), + "avg_per_step_energy": lambda env: -1 * energy_consumption(env, 0.1) / env.k.vehicle.num_vehicles + if env.k.vehicle.num_vehicles > 0 + else 0, +} diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py new file mode 100644 index 000000000..73460d656 --- /dev/null +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -0,0 +1,214 @@ +"""Multi-agent highway with ramps example. + +Trains a non-constant number of agents, all sharing the same policy, on the +highway with ramps network. +""" +from flow.controllers import RLController, IDMController +from flow.core.params import EnvParams, NetParams, InitialConfig, InFlows, \ + VehicleParams, SumoParams, SumoLaneChangeParams, SumoCarFollowingParams +from flow.networks import HighwayNetwork +from flow.envs.ring.accel import ADDITIONAL_ENV_PARAMS +from flow.envs.multiagent import MultiStraightRoad +from flow.networks.highway import ADDITIONAL_NET_PARAMS +from flow.utils.registry import make_create_env +from ray.tune.registry import register_env + +# SET UP PARAMETERS FOR THE SIMULATION + +# the speed of vehicles entering the network +TRAFFIC_SPEED = 24.1 +# the maximum speed at the downstream boundary edge +END_SPEED = 6.0 +# the inflow rate of vehicles +HIGHWAY_INFLOW_RATE = 2215 +# the simulation time horizon (in steps) +HORIZON = 1000 +# whether to include noise in the car-following models +INCLUDE_NOISE = True + +PENETRATION_RATE = 10.0 + +additional_net_params = ADDITIONAL_NET_PARAMS.copy() +additional_net_params.update({ + # length of the highway + "length": 2500, + # number of lanes + "lanes": 1, + # speed limit for all edges + "speed_limit": 30, + # number of edges to divide the highway into + "num_edges": 2, + # whether to include a ghost edge + "use_ghost_edge": True, + # speed limit for the ghost edge + "ghost_speed_limit": END_SPEED, + # length of the cell imposing a boundary + "boundary_cell_length": 300, +}) + + +# SET UP PARAMETERS FOR THE ENVIRONMENT + +additional_env_params = ADDITIONAL_ENV_PARAMS.copy() +additional_env_params.update({ + 'max_accel': 2.6, + 'max_decel': 4.5, + 'target_velocity': 6.0, + 'local_reward': True, + 'lead_obs': True, + 'control_range': [500, 2300], + # whether to reroute vehicles once they have exited + "reroute_on_exit": True, + # whether to use the MPG reward. Otherwise, defaults to a target velocity reward + "mpg_reward": False, + # whether to use the joules reward. Otherwise, defaults to a target velocity reward + "mpj_reward": False, + # how many vehicles to look back for the MPG reward + "look_back_length": 3, + # how many AVs there can be at once (this is only for centralized critics) + "max_num_agents": 10, + + # whether to add a slight reward for opening up a gap that will be annealed out N iterations in + "headway_curriculum": False, + # how many timesteps to anneal the headway curriculum over + "headway_curriculum_iters": 100, + # weight of the headway reward + "headway_reward_gain": 2.0, + # desired time headway + "min_time_headway": 2.0, + + # whether to add a slight reward for traveling at a desired speed + "speed_curriculum": True, + # how many timesteps to anneal the headway curriculum over + "speed_curriculum_iters": 20, + # weight of the headway reward + "speed_reward_gain": 1.0, + + # penalize stopped vehicles + "penalize_stops": True, + "stop_penalty": 0.05, + + # penalize accels + "penalize_accel": True, + "accel_penalty": 0.05, + +}) + + +# CREATE VEHICLE TYPES AND INFLOWS + +vehicles = VehicleParams() +inflows = InFlows() +vehicles.add( + "human", + acceleration_controller=(IDMController, { + 'a': 1.3, + 'b': 2.0, + 'noise': 0.3 if INCLUDE_NOISE else 0.0 + }), + car_following_params=SumoCarFollowingParams( + min_gap=0.5 + ), + lane_change_params=SumoLaneChangeParams( + model="SL2015", + lc_sublane=2.0, + ), +) + +# autonomous vehicles +vehicles.add( + color='red', + veh_id='rl', + acceleration_controller=(RLController, {})) + +# add human vehicles on the highway +inflows.add( + veh_type="human", + edge="highway_0", + vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (1 - PENETRATION_RATE / 100)), + depart_lane="free", + depart_speed=TRAFFIC_SPEED, + name="idm_highway_inflow") + +# add autonomous vehicles on the highway +# they will stay on the highway, i.e. they won't exit through the off-ramps +inflows.add( + veh_type="rl", + edge="highway_0", + vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (PENETRATION_RATE / 100)), + depart_lane="free", + depart_speed=TRAFFIC_SPEED, + name="rl_highway_inflow") + +# SET UP FLOW PARAMETERS +warmup_steps = 0 +if additional_env_params['reroute_on_exit']: + warmup_steps = 500 + +flow_params = dict( + # name of the experiment + exp_tag='multiagent_highway', + + # name of the flow environment the experiment is running on + env_name=MultiStraightRoad, + + # name of the network class the experiment is running on + network=HighwayNetwork, + + # simulator that is used by the experiment + simulator='traci', + + # environment related parameters (see flow.core.params.EnvParams) + env=EnvParams( + horizon=HORIZON, + warmup_steps=warmup_steps, + sims_per_step=3, + additional_params=additional_env_params + ), + + # sumo-related parameters (see flow.core.params.SumoParams) + sim=SumoParams( + sim_step=0.4, + render=False, + restart_instance=True, + use_ballistic=True + ), + + # network-related parameters (see flow.core.params.NetParams and the + # network's documentation or ADDITIONAL_NET_PARAMS component) + net=NetParams( + inflows=inflows, + additional_params=additional_net_params + ), + + # vehicles to be placed in the network at the start of a rollout (see + # flow.core.params.VehicleParams) + veh=vehicles, + + # parameters specifying the positioning of vehicles upon initialization/ + # reset (see flow.core.params.InitialConfig) + initial=InitialConfig(), +) + + +# SET UP RLLIB MULTI-AGENT FEATURES + +create_env, env_name = make_create_env(params=flow_params, version=0) + +# register as rllib env +register_env(env_name, create_env) + +# multiagent configuration +test_env = create_env() +obs_space = test_env.observation_space +act_space = test_env.action_space + + +POLICY_GRAPHS = {'av': (None, obs_space, act_space, {})} + +POLICIES_TO_TRAIN = ['av'] + + +def policy_mapping_fn(_): + """Map a policy in RLlib.""" + return 'av' diff --git a/examples/exp_configs/rl/singleagent/singleagent_straight_road.py b/examples/exp_configs/rl/singleagent/singleagent_straight_road.py new file mode 100644 index 000000000..265d34d42 --- /dev/null +++ b/examples/exp_configs/rl/singleagent/singleagent_straight_road.py @@ -0,0 +1,164 @@ +"""Multi-agent highway with ramps example. + +Trains a non-constant number of agents, all sharing the same policy, on the +highway with ramps network. +""" +from flow.controllers import RLController, IDMController +from flow.core.params import EnvParams, NetParams, InitialConfig, InFlows, \ + VehicleParams, SumoParams, SumoLaneChangeParams +from flow.envs.ring.accel import ADDITIONAL_ENV_PARAMS +from flow.networks import HighwayNetwork +from flow.envs import SingleStraightRoad +from flow.networks.highway import ADDITIONAL_NET_PARAMS +from flow.utils.registry import make_create_env +from ray.tune.registry import register_env + + +# SET UP PARAMETERS FOR THE SIMULATION + +# number of steps per rollout +HORIZON = 2000 + +# inflow rate on the highway in vehicles per hour +HIGHWAY_INFLOW_RATE = 10800 / 5 +# percentage of autonomous vehicles compared to human vehicles on highway +PENETRATION_RATE = 10 + + +# SET UP PARAMETERS FOR THE NETWORK + +additional_net_params = ADDITIONAL_NET_PARAMS.copy() +additional_net_params.update({ + # length of the highway + "length": 2000, + # number of lanes + "lanes": 1, + # speed limit for all edges + "speed_limit": 30, + # number of edges to divide the highway into + "num_edges": 2 +}) + + +# SET UP PARAMETERS FOR THE ENVIRONMENT + +additional_env_params = ADDITIONAL_ENV_PARAMS.copy() +additional_env_params.update({ + 'max_accel': 2.6, + 'max_decel': 4.5, + 'target_velocity': 18.0, + 'local_reward': True, + 'lead_obs': True, + "terminate_on_wave": False, + # the environment is not allowed to terminate below this horizon length + 'wave_termination_horizon': 1000, + # the speed below which we consider a wave to have occured + 'wave_termination_speed': 10.0, + # whether the vehicle continues to acquire reward after it exits the system. This causes it to have incentive + # to leave the network in a good state after it leaves + 'reward_after_exit': True +}) + + +# CREATE VEHICLE TYPES AND INFLOWS + +vehicles = VehicleParams() +inflows = InFlows() + +# human vehicles +vehicles.add( + "human", + num_vehicles=0, + lane_change_params=SumoLaneChangeParams( + lane_change_mode="strategic", + ), + acceleration_controller=(IDMController, {"a": .3, "b": 2.0, "noise": 0.5}), +) + +# autonomous vehicles +vehicles.add( + veh_id='rl', + acceleration_controller=(RLController, {})) + +# add human vehicles on the highway +inflows.add( + veh_type="human", + edge="highway_0", + vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (1 - PENETRATION_RATE / 100)), + depart_lane="free", + depart_speed="23.0", + name="idm_highway_inflow") + +# add autonomous vehicles on the highway +# they will stay on the highway, i.e. they won't exit through the off-ramps +inflows.add( + veh_type="rl", + edge="highway_0", + vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (PENETRATION_RATE / 100)), + depart_lane="free", + depart_speed="23.0", + name="rl_highway_inflow") + +# SET UP FLOW PARAMETERS +done_at_exit = True +if additional_env_params['reward_after_exit']: + done_at_exit = False + +flow_params = dict( + # name of the experiment + exp_tag='singleagent_highway', + + # name of the flow environment the experiment is running on + env_name=SingleStraightRoad, + + # name of the network class the experiment is running on + network=HighwayNetwork, + + # simulator that is used by the experiment + simulator='traci', + + # environment related parameters (see flow.core.params.EnvParams) + env=EnvParams( + horizon=HORIZON, + warmup_steps=0, + sims_per_step=1, # do not put more than one + done_at_exit=done_at_exit, + additional_params=additional_env_params, + ), + + # sumo-related parameters (see flow.core.params.SumoParams) + sim=SumoParams( + sim_step=0.5, + render=False, + use_ballistic=True, + restart_instance=True + ), + + # network-related parameters (see flow.core.params.NetParams and the + # network's documentation or ADDITIONAL_NET_PARAMS component) + net=NetParams( + inflows=inflows, + additional_params=additional_net_params + ), + + # vehicles to be placed in the network at the start of a rollout (see + # flow.core.params.VehicleParams) + veh=vehicles, + + # parameters specifying the positioning of vehicles upon initialization/ + # reset (see flow.core.params.InitialConfig) + initial=InitialConfig(), +) + + +# SET UP RLLIB MULTI-AGENT FEATURES + +create_env, env_name = make_create_env(params=flow_params, version=0) + +# register as rllib env +register_env(env_name, create_env) + +# multiagent configuration +test_env = create_env() +obs_space = test_env.observation_space +act_space = test_env.action_space diff --git a/examples/simulate.py b/examples/simulate.py index d1dcc5a79..6bc4e5a4e 100644 --- a/examples/simulate.py +++ b/examples/simulate.py @@ -49,6 +49,23 @@ def parse_args(args): action='store_true', help='Specifies whether to generate an emission file from the ' 'simulation.') + parser.add_argument( + '--to_aws', + type=str, nargs='?', default=None, const="default", + help='Specifies the name of the partition to store the output' + 'file on S3. Putting not None value for this argument' + 'automatically set gen_emission to True.') + parser.add_argument( + '--only_query', + nargs='*', default="[\'all\']", + help='specify which query should be run by lambda' + 'for detail, see upload_to_s3 in data_pipeline.py' + ) + parser.add_argument( + '--is_baseline', + action='store_true', + help='specifies whether this is a baseline run' + ) return parser.parse_known_args(args)[0] @@ -56,6 +73,8 @@ def parse_args(args): if __name__ == "__main__": flags = parse_args(sys.argv[1:]) + flags.gen_emission = flags.gen_emission or flags.to_aws + # Get the flow_params object. module = __import__("exp_configs.non_rl", fromlist=[flags.exp_config]) flow_params = getattr(module, flags.exp_config).flow_params @@ -90,4 +109,5 @@ def parse_args(args): exp = Experiment(flow_params, callables) # Run for the specified number of rollouts. - exp.run(flags.num_runs, convert_to_csv=flags.gen_emission) + exp.run(flags.num_runs, convert_to_csv=flags.gen_emission, to_aws=flags.to_aws, + only_query=flags.only_query, is_baseline=flags.is_baseline) diff --git a/examples/train.py b/examples/train.py index 1b2f22476..b669dc59f 100644 --- a/examples/train.py +++ b/examples/train.py @@ -7,13 +7,17 @@ python train.py EXP_CONFIG """ import argparse +from datetime import datetime import json import os import sys from time import strftime from copy import deepcopy +import numpy as np +import pytz from flow.core.util import ensure_dir +from flow.core.rewards import instantaneous_mpg from flow.utils.registry import env_constructor from flow.utils.rllib import FlowParamsEncoder, get_flow_params from flow.utils.registry import make_create_env @@ -42,19 +46,51 @@ def parse_args(args): parser.add_argument( '--rl_trainer', type=str, default="rllib", help='the RL trainer to use. either rllib or Stable-Baselines') - + parser.add_argument( + '--load_weights_path', type=str, default=None, + help='Path to h5 file containing a pretrained model. Relevent for PPO with RLLib' + ) + parser.add_argument( + '--algorithm', type=str, default="PPO", + help='RL algorithm to use. Options are PPO, TD3, and CENTRALIZEDPPO (which uses a centralized value function)' + ' right now.' + ) parser.add_argument( '--num_cpus', type=int, default=1, help='How many CPUs to use') parser.add_argument( '--num_steps', type=int, default=5000, - help='How many total steps to perform learning over') + help='How many total steps to perform learning over. Relevant for stable-baselines') + parser.add_argument( + '--grid_search', action='store_true', default=False, + help='Whether to grid search over hyperparams') + parser.add_argument( + '--num_iterations', type=int, default=200, + help='How many iterations are in a training run.') + parser.add_argument( + '--checkpoint_freq', type=int, default=20, + help='How often to checkpoint.') + parser.add_argument( + '--num_rollouts', type=int, default=1, + help='How many rollouts are in a training batch') parser.add_argument( '--rollout_size', type=int, default=1000, help='How many steps are in a training batch.') + parser.add_argument('--use_s3', action='store_true', default=False, + help='If true, upload results to s3') + parser.add_argument('--local_mode', action='store_true', default=False, + help='If true only 1 CPU will be used') + parser.add_argument('--render', action='store_true', default=False, + help='If true, we render the display') parser.add_argument( '--checkpoint_path', type=str, default=None, help='Directory with checkpoint to restore training from.') + parser.add_argument( + '--exp_title', type=str, default=None, + help='Name of experiment that results will be stored in') + parser.add_argument('--multi_node', action='store_true', + help='Set to true if this will be run in cluster mode.' + 'Relevant for rllib') return parser.parse_known_args(args)[0] @@ -84,7 +120,6 @@ def run_model_stablebaseline(flow_params, """ from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv from stable_baselines import PPO2 - if num_cpus == 1: constructor = env_constructor(params=flow_params, version=0)() # The algorithms require a vectorized environment to run @@ -101,9 +136,11 @@ def run_model_stablebaseline(flow_params, def setup_exps_rllib(flow_params, n_cpus, n_rollouts, + flags, policy_graphs=None, policy_mapping_fn=None, - policies_to_train=None): + policies_to_train=None, + ): """Return the relevant components of an RLlib experiment. Parameters @@ -114,13 +151,14 @@ def setup_exps_rllib(flow_params, number of CPUs to run the experiment over n_rollouts : int number of rollouts per training iteration + flags : TODO + custom arguments policy_graphs : dict, optional TODO policy_mapping_fn : function, optional TODO policies_to_train : list of str, optional TODO - Returns ------- str @@ -132,6 +170,7 @@ def setup_exps_rllib(flow_params, """ from ray import tune from ray.tune.registry import register_env + from ray.rllib.env.group_agents_wrapper import _GroupAgentsWrapper try: from ray.rllib.agents.agent import get_agent_class except ImportError: @@ -139,20 +178,158 @@ def setup_exps_rllib(flow_params, horizon = flow_params['env'].horizon - alg_run = "PPO" + alg_run = flags.algorithm.upper() + + if alg_run == "PPO": + from flow.algorithms.imitation_learning.custom_ppo import CustomPPOTrainer + from ray.rllib.agents.ppo import DEFAULT_CONFIG + config = deepcopy(DEFAULT_CONFIG) + + + alg_run = CustomPPOTrainer + + horizon = flow_params['env'].horizon + + config["num_workers"] = n_cpus + config["horizon"] = horizon + config["model"].update({"fcnet_hiddens": [32, 32]}) + config["train_batch_size"] = horizon * n_rollouts + config["gamma"] = 0.995 # discount rate + config["use_gae"] = True + config["no_done_at_end"] = False + config["lambda"] = 0.97 + config["kl_target"] = 0.02 + config["num_sgd_iter"] = 10 + if flags.grid_search: + config["lambda"] = tune.grid_search([0.5, 0.9]) + config["lr"] = tune.grid_search([5e-4, 5e-5]) + + if flags.load_weights_path: + from flow.algorithms.imitation_learning.ppo_model import PPONetwork + from flow.algorithms.imitation_learning.custom_trainable import Imitation_PPO_Trainable + from ray.rllib.models import ModelCatalog + + # Register custom model + ModelCatalog.register_custom_model("PPO_loaded_weights", PPONetwork) + # set model to the custom model for run + config['model']['custom_model'] = "PPO_loaded_weights" + config['model']['custom_options'] = {"h5_load_path": flags.load_weights_path} + config['observation_filter'] = 'NoFilter' + # alg run is the Trainable class + alg_run = Imitation_PPO_Trainable + + elif alg_run == "CENTRALIZEDPPO": + from flow.algorithms.centralized_PPO import CCTrainer, CentralizedCriticModel + from ray.rllib.agents.ppo import DEFAULT_CONFIG + from ray.rllib.models import ModelCatalog + alg_run = CCTrainer + config = deepcopy(DEFAULT_CONFIG) + config['model']['custom_model'] = "cc_model" + config["model"]["custom_options"]["max_num_agents"] = flow_params['env'].additional_params['max_num_agents'] + config["model"]["custom_options"]["central_vf_size"] = 100 + + ModelCatalog.register_custom_model("cc_model", CentralizedCriticModel) + + config["num_workers"] = n_cpus + config["horizon"] = horizon + config["model"].update({"fcnet_hiddens": [32, 32]}) + config["train_batch_size"] = horizon * n_rollouts + config["gamma"] = 0.995 # discount rate + config["use_gae"] = True + config["lambda"] = 0.97 + config["kl_target"] = 0.02 + config["num_sgd_iter"] = 10 + if flags.grid_search: + config["lambda"] = tune.grid_search([0.5, 0.9]) + config["lr"] = tune.grid_search([5e-4, 5e-5]) + + elif alg_run == "TD3": + alg_run = get_agent_class(alg_run) + config = deepcopy(alg_run._default_config) + + config["num_workers"] = n_cpus + config["horizon"] = horizon + config["learning_starts"] = 10000 + config["buffer_size"] = 20000 # reduced to test if this is the source of memory problems + if flags.grid_search: + config["prioritized_replay"] = tune.grid_search(['True', 'False']) + config["actor_lr"] = tune.grid_search([1e-3, 1e-4]) + config["critic_lr"] = tune.grid_search([1e-3, 1e-4]) + config["n_step"] = tune.grid_search([1, 10]) - agent_cls = get_agent_class(alg_run) - config = deepcopy(agent_cls._default_config) - - config["num_workers"] = n_cpus - config["train_batch_size"] = horizon * n_rollouts - config["gamma"] = 0.999 # discount rate - config["model"].update({"fcnet_hiddens": [32, 32, 32]}) - config["use_gae"] = True - config["lambda"] = 0.97 - config["kl_target"] = 0.02 - config["num_sgd_iter"] = 10 - config["horizon"] = horizon + else: + sys.exit("We only support PPO, TD3, right now.") + + # define some standard and useful callbacks + def on_episode_start(info): + episode = info["episode"] + episode.user_data["avg_speed"] = [] + episode.user_data["avg_speed_avs"] = [] + episode.user_data["avg_energy"] = [] + episode.user_data["inst_mpg"] = [] + episode.user_data["num_cars"] = [] + episode.user_data["avg_accel_human"] = [] + episode.user_data["avg_accel_avs"] = [] + + def on_episode_step(info): + episode = info["episode"] + env = info["env"].get_unwrapped()[0] + if isinstance(env, _GroupAgentsWrapper): + env = env.env + if hasattr(env, 'no_control_edges'): + veh_ids = [ + veh_id for veh_id in env.k.vehicle.get_ids() + if env.k.vehicle.get_speed(veh_id) >= 0 + and env.k.vehicle.get_edge(veh_id) not in env.no_control_edges + ] + rl_ids = [ + veh_id for veh_id in env.k.vehicle.get_rl_ids() + if env.k.vehicle.get_speed(veh_id) >= 0 + and env.k.vehicle.get_edge(veh_id) not in env.no_control_edges + ] + else: + veh_ids = [veh_id for veh_id in env.k.vehicle.get_ids() if env.k.vehicle.get_speed(veh_id) >= 0] + rl_ids = [veh_id for veh_id in env.k.vehicle.get_rl_ids() if env.k.vehicle.get_speed(veh_id) >= 0] + + speed = np.mean([speed for speed in env.k.vehicle.get_speed(veh_ids)]) + if not np.isnan(speed): + episode.user_data["avg_speed"].append(speed) + av_speed = np.mean([speed for speed in env.k.vehicle.get_speed(rl_ids) if speed >= 0]) + if not np.isnan(av_speed): + episode.user_data["avg_speed_avs"].append(av_speed) + episode.user_data["inst_mpg"].append(instantaneous_mpg(env, veh_ids, gain=1.0)) + episode.user_data["num_cars"].append(len(env.k.vehicle.get_ids())) + episode.user_data["avg_accel_human"].append(np.nan_to_num(np.mean( + [np.abs((env.k.vehicle.get_speed(veh_id) - env.k.vehicle.get_previous_speed(veh_id))/env.sim_step) for + veh_id in veh_ids if veh_id in env.k.vehicle.previous_speeds.keys()] + ))) + episode.user_data["avg_accel_avs"].append(np.nan_to_num(np.mean( + [np.abs((env.k.vehicle.get_speed(veh_id) - env.k.vehicle.get_previous_speed(veh_id))/env.sim_step) for + veh_id in rl_ids if veh_id in env.k.vehicle.previous_speeds.keys()] + ))) + + def on_episode_end(info): + episode = info["episode"] + avg_speed = np.mean(episode.user_data["avg_speed"]) + episode.custom_metrics["avg_speed"] = avg_speed + avg_speed_avs = np.mean(episode.user_data["avg_speed_avs"]) + episode.custom_metrics["avg_speed_avs"] = avg_speed_avs + episode.custom_metrics["avg_accel_avs"] = np.mean(episode.user_data["avg_accel_avs"]) + episode.custom_metrics["avg_energy_per_veh"] = np.mean(episode.user_data["avg_energy"]) + episode.custom_metrics["avg_mpg_per_veh"] = np.mean(episode.user_data["inst_mpg"]) + episode.custom_metrics["num_cars"] = np.mean(episode.user_data["num_cars"]) + + def on_train_result(info): + """Store the mean score of the episode, and increment or decrement the iteration number for curriculum.""" + trainer = info["trainer"] + trainer.workers.foreach_worker( + lambda ev: ev.foreach_env( + lambda env: env.set_iteration_num())) + + config["callbacks"] = {"on_episode_start": tune.function(on_episode_start), + "on_episode_step": tune.function(on_episode_step), + "on_episode_end": tune.function(on_episode_end), + "on_train_result": tune.function(on_train_result)} # save the flow params for replay flow_json = json.dumps( @@ -162,64 +339,71 @@ def setup_exps_rllib(flow_params, # multiagent configuration if policy_graphs is not None: - print("policy_graphs", policy_graphs) config['multiagent'].update({'policies': policy_graphs}) if policy_mapping_fn is not None: - config['multiagent'].update( - {'policy_mapping_fn': tune.function(policy_mapping_fn)}) + config['multiagent'].update({'policy_mapping_fn': tune.function(policy_mapping_fn)}) if policies_to_train is not None: config['multiagent'].update({'policies_to_train': policies_to_train}) create_env, gym_name = make_create_env(params=flow_params) - # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config - def train_rllib(submodule, flags): """Train policies using the PPO algorithm in RLlib.""" import ray - from ray.tune import run_experiments + from ray import tune flow_params = submodule.flow_params - n_cpus = submodule.N_CPUS - n_rollouts = submodule.N_ROLLOUTS + flow_params['sim'].render = flags.render policy_graphs = getattr(submodule, "POLICY_GRAPHS", None) policy_mapping_fn = getattr(submodule, "policy_mapping_fn", None) policies_to_train = getattr(submodule, "policies_to_train", None) alg_run, gym_name, config = setup_exps_rllib( - flow_params, n_cpus, n_rollouts, + flow_params, flags.num_cpus, flags.num_rollouts, flags, policy_graphs, policy_mapping_fn, policies_to_train) - ray.init(num_cpus=n_cpus + 1, object_store_memory=200 * 1024 * 1024) - exp_config = { - "run": alg_run, - "env": gym_name, - "config": { - **config - }, - "checkpoint_freq": 20, + config['num_workers'] = flags.num_cpus + config['env'] = gym_name + + # create a custom string that makes looking at the experiment names easier + def trial_str_creator(trial): + return "{}_{}".format(trial.trainable_name, trial.experiment_tag) + + if flags.multi_node: + ray.init(redis_address='localhost:6379') + elif flags.local_mode: + ray.init(local_mode=True) + else: + ray.init() + + exp_dict = { + "run_or_experiment": alg_run, + "name": flags.exp_title or flow_params['exp_tag'], + "config": config, + "checkpoint_freq": flags.checkpoint_freq, "checkpoint_at_end": True, - "max_failures": 999, + 'trial_name_creator': trial_str_creator, + "max_failures": 0, "stop": { - "training_iteration": flags.num_steps, + "training_iteration": flags.num_iterations, }, } - - if flags.checkpoint_path is not None: - exp_config['restore'] = flags.checkpoint_path - run_experiments({flow_params["exp_tag"]: exp_config}) + date = datetime.now(tz=pytz.utc) + date = date.astimezone(pytz.timezone('US/Pacific')).strftime("%m-%d-%Y") + if flags.use_s3: + s3_string = "s3://i210.experiments/i210/" \ + + date + '/' + flags.exp_title + exp_dict['upload_dir'] = s3_string + tune.run(**exp_dict, queue_trials=False, raise_on_failed_trial=False) -def train_h_baselines(flow_params, args, multiagent): +def train_h_baselines(env_name, args, multiagent): """Train policies using SAC and TD3 with h-baselines.""" from hbaselines.algorithms import OffPolicyRLAlgorithm from hbaselines.utils.train import parse_options, get_hyperparameters - from hbaselines.envs.mixed_autonomy import FlowEnv - - flow_params = deepcopy(flow_params) # Get the command-line arguments that are relevant here args = parse_options(description="", example_usage="", args=args) @@ -227,31 +411,6 @@ def train_h_baselines(flow_params, args, multiagent): # the base directory that the logged data will be stored in base_dir = "training_data" - # Create the training environment. - env = FlowEnv( - flow_params, - multiagent=multiagent, - shared=args.shared, - maddpg=args.maddpg, - render=args.render, - version=0 - ) - - # Create the evaluation environment. - if args.evaluate: - eval_flow_params = deepcopy(flow_params) - eval_flow_params['env'].evaluate = True - eval_env = FlowEnv( - eval_flow_params, - multiagent=multiagent, - shared=args.shared, - maddpg=args.maddpg, - render=args.render_eval, - version=1 - ) - else: - eval_env = None - for i in range(args.n_training): # value of the next seed seed = args.seed + i @@ -299,8 +458,8 @@ def train_h_baselines(flow_params, args, multiagent): # Create the algorithm object. alg = OffPolicyRLAlgorithm( policy=policy, - env=env, - eval_env=eval_env, + env="flow:{}".format(env_name), + eval_env="flow:{}".format(env_name) if args.evaluate else None, **hp ) @@ -320,7 +479,6 @@ def train_stable_baselines(submodule, flags): """Train policies using the PPO algorithm in stable-baselines.""" from stable_baselines.common.vec_env import DummyVecEnv from stable_baselines import PPO2 - flow_params = submodule.flow_params # Path to the saved files exp_tag = flow_params['exp_tag'] @@ -393,8 +551,7 @@ def main(args): elif flags.rl_trainer.lower() == "stable-baselines": train_stable_baselines(submodule, flags) elif flags.rl_trainer.lower() == "h-baselines": - flow_params = submodule.flow_params - train_h_baselines(flow_params, args, multiagent) + train_h_baselines(flags.exp_config, args, multiagent) else: raise ValueError("rl_trainer should be either 'rllib', 'h-baselines', " "or 'stable-baselines'.") diff --git a/flow/algorithms/__init__.py b/flow/algorithms/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py new file mode 100644 index 000000000..133d7c8bf --- /dev/null +++ b/flow/algorithms/centralized_PPO.py @@ -0,0 +1,553 @@ +"""An example of customizing PPO to leverage a centralized critic.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import numpy as np + +from ray.rllib.agents.ppo.ppo import PPOTrainer +from flow.algorithms.custom_ppo import CustomPPOTFPolicy, KLCoeffMixin +from ray.rllib.evaluation.postprocessing import compute_advantages, \ + Postprocessing +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.policy.tf_policy import LearningRateSchedule, \ + EntropyCoeffSchedule, ACTION_LOGP +from ray.rllib.models.modelv2 import ModelV2 +from ray.rllib.models.tf.tf_modelv2 import TFModelV2 +from ray.rllib.models.tf.recurrent_tf_modelv2 import RecurrentTFModelV2 +from ray.rllib.utils.annotations import override +from ray.rllib.models.tf.fcnet_v2 import FullyConnectedNetwork +from ray.rllib.utils.explained_variance import explained_variance +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() + +# Frozen logits of the policy that computed the action +BEHAVIOUR_LOGITS = "behaviour_logits" + +CENTRAL_OBS = "central_obs" +OPPONENT_ACTION = "opponent_action" + +parser = argparse.ArgumentParser() +parser.add_argument("--stop", type=int, default=100000) + + +class CentralizedCriticModel(TFModelV2): + """Multi-agent model that implements a centralized VF.""" + + # TODO(@evinitsky) make this work with more than boxes + + def __init__(self, obs_space, action_space, num_outputs, model_config, + name): + super(CentralizedCriticModel, self).__init__( + obs_space, action_space, num_outputs, model_config, name) + # Base of the model + self.model = FullyConnectedNetwork(obs_space, action_space, + num_outputs, model_config, name) + self.register_variables(self.model.variables()) + + # Central VF maps (obs, opp_ops, opp_act) -> vf_pred + self.max_num_agents = model_config['custom_options']['max_num_agents'] + self.obs_space_shape = obs_space.shape[0] + self.obs_space = obs_space + other_obs = tf.keras.layers.Input( + shape=(obs_space.shape[0] * self.max_num_agents,), + name="central_obs") + central_vf_dense = tf.keras.layers.Dense( + model_config['custom_options']['central_vf_size'], + activation=tf.nn.tanh, name="c_vf_dense")(other_obs) + central_vf_out = tf.keras.layers.Dense( + 1, activation=None, name="c_vf_out")(central_vf_dense) + self.central_vf = tf.keras.Model( + inputs=[other_obs], outputs=central_vf_out) + self.register_variables(self.central_vf.variables) + + def forward(self, input_dict, state, seq_lens): + """Run forward inference.""" + return self.model.forward(input_dict, state, seq_lens) + + def central_value_function(self, central_obs): + """Compute the centralized value function.""" + return tf.reshape( + self.central_vf( + [central_obs]), [-1]) + + def value_function(self): + """Compute the normal value function; this is only here to make the code run.""" + return self.model.value_function() # not used + + +# TODO(@evinitsky) support recurrence +class CentralizedCriticModelRNN(RecurrentTFModelV2): + """Example of using the Keras functional API to define a RNN model.""" + + def __init__(self, + obs_space, + action_space, + num_outputs, + model_config, + name, + hiddens_size=64, + cell_size=64): + super(CentralizedCriticModelRNN, self).__init__( + obs_space, action_space, num_outputs, model_config, name) + self.cell_size = cell_size + + # Define input layers + input_layer = tf.keras.layers.Input( + shape=(None, obs_space.shape[0]), name="inputs") + state_in_h = tf.keras.layers.Input(shape=(cell_size,), name="h") + state_in_c = tf.keras.layers.Input(shape=(cell_size,), name="c") + seq_in = tf.keras.layers.Input(shape=(), name="seq_in") + + # Preprocess observation with a hidden layer and send to LSTM cell + dense1 = tf.keras.layers.Dense( + hiddens_size, activation=tf.nn.relu, name="dense1")(input_layer) + lstm_out, state_h, state_c = tf.keras.layers.LSTM( + cell_size, return_sequences=True, return_state=True, name="lstm")( + inputs=dense1, + mask=tf.sequence_mask(seq_in), + initial_state=[state_in_h, state_in_c]) + + # Postprocess LSTM output with another hidden layer and compute values + logits = tf.keras.layers.Dense( + self.num_outputs, + activation=tf.keras.activations.linear, + name="logits")(lstm_out) + values = tf.keras.layers.Dense( + 1, activation=None, name="values")(lstm_out) + + # Create the RNN model + self.model = tf.keras.Model( + inputs=[input_layer, seq_in, state_in_h, state_in_c], + outputs=[logits, values, state_h, state_c]) + self.register_variables(self.model.variables) + self.model.summary() + + # TODO(@evinitsky) add layer sharing to the VF + # Create the centralized VF + # Central VF maps (obs, opp_ops, opp_act) -> vf_pred + self.max_num_agents = model_config.get("max_num_agents", 120) + self.obs_space_shape = obs_space.shape[0] + other_obs = tf.keras.layers.Input( + shape=(obs_space.shape[0] * self.max_num_agents,), + name="all_agent_obs") + central_vf_dense = tf.keras.layers.Dense( + model_config.get("central_vf_size", 64), activation=tf.nn.tanh, + name="c_vf_dense")(other_obs) + central_vf_dense2 = tf.keras.layers.Dense( + model_config.get("central_vf_size", 64), activation=tf.nn.tanh, + name="c_vf_dense")(central_vf_dense) + central_vf_out = tf.keras.layers.Dense( + 1, activation=None, name="c_vf_out")(central_vf_dense2) + self.central_vf = tf.keras.Model( + inputs=[other_obs], outputs=central_vf_out) + self.register_variables(self.central_vf.variables) + + @override(RecurrentTFModelV2) + def forward_rnn(self, inputs, state, seq_lens): + """Forward inference on the RNN.""" + model_out, self._value_out, h, c = self.model( + [inputs, seq_lens] + state) + return model_out, [h, c] + + @override(ModelV2) + def get_initial_state(self): + """Set up the initial RNN state.""" + return [ + np.zeros(self.cell_size, np.float32), + np.zeros(self.cell_size, np.float32), + ] + + def central_value_function(self, central_obs): + """Compute the central value function.""" + return tf.reshape( + self.central_vf( + [central_obs]), [-1]) + + def value_function(self): + """Compute the normal value function; this is only here to make the code run.""" + return tf.reshape(self._value_out, [-1]) # not used + + +class CentralizedValueMixin(object): + """Add methods to evaluate the central value function from the model.""" + + def __init__(self): + # TODO(@evinitsky) clean up naming + self.central_value_function = self.model.central_value_function( + self.get_placeholder(CENTRAL_OBS) + ) + + def compute_central_vf(self, central_obs): + """Run forward inference on the model.""" + feed_dict = { + self.get_placeholder(CENTRAL_OBS): central_obs, + } + return self.get_session().run(self.central_value_function, feed_dict) + + +def centralized_critic_postprocessing(policy, + sample_batch, + other_agent_batches=None, + episode=None): + """Find all other agents that overlapped with you and stack their obs to be passed to the central VF.""" + if policy.loss_initialized(): + assert other_agent_batches is not None + + # time_span = (sample_batch['t'][0], sample_batch['t'][-1]) + # # there's a new problem here, namely that a segment might not be continuous due to the rerouting + # other_agent_timespans = {agent_id: + # (other_agent_batches[agent_id][1]["t"][0], + # other_agent_batches[agent_id][1]["t"][-1]) + # for agent_id in other_agent_batches.keys()} + other_agent_times = {agent_id: other_agent_batches[agent_id][1]["t"] + for agent_id in other_agent_batches.keys()} + agent_time = sample_batch['t'] + # # find agents whose time overlaps with the current agent + rel_agents = {agent_id: other_agent_time for agent_id, other_agent_time + in other_agent_times.items()} + # if len(rel_agents) > 0: + other_obs = { + agent_id: other_agent_batches[agent_id][1]["obs"].copy() + for agent_id in other_agent_batches.keys() + } + padded_agent_obs = { + agent_id: fill_missing( + agent_time, + other_agent_times[agent_id], + other_obs[agent_id] + ) + for agent_id, rel_agent_time in rel_agents.items() + } + # okay, now we need to stack and sort + central_obs_list = [padded_obs for padded_obs in + padded_agent_obs.values()] + try: + central_obs_batch = np.hstack( + (sample_batch["obs"], np.hstack(central_obs_list))) + except Exception as e: + print("Error in centralized PPO: ", e) + # TODO(@ev) this is a bug and needs to be fixed + central_obs_batch = sample_batch["obs"] + max_vf_agents = policy.model.max_num_agents + num_agents = len(rel_agents) + 1 + if num_agents < max_vf_agents: + diff = max_vf_agents - num_agents + zero_pad = np.zeros((central_obs_batch.shape[0], + policy.model.obs_space_shape * diff)) + central_obs_batch = np.hstack((central_obs_batch, + zero_pad)) + elif num_agents > max_vf_agents: + print("Too many agents!") + + # also record the opponent obs and actions in the trajectory + sample_batch[CENTRAL_OBS] = central_obs_batch + + # overwrite default VF prediction with the central VF + sample_batch[SampleBatch.VF_PREDS] = policy.compute_central_vf( + sample_batch[CENTRAL_OBS]) + else: + # policy hasn't initialized yet, use zeros + # TODO(evinitsky) put in the right shape + obs_shape = sample_batch[SampleBatch.CUR_OBS].shape[1] + obs_shape = (1, obs_shape * (policy.model.max_num_agents)) + sample_batch[CENTRAL_OBS] = np.zeros(obs_shape) + # TODO(evinitsky) put in the right shape. Will break if actions aren't 1 + sample_batch[SampleBatch.VF_PREDS] = np.zeros(1, dtype=np.float32) + + # TODO (ak): this was not being used, so commented + # completed = sample_batch["dones"][-1] + + # if not completed and policy.loss_initialized(): + # last_r = 0.0 + # else: + # next_state = [] + # for i in range(policy.num_state_tensors()): + # next_state.append([sample_batch["state_out_{}".format(i)][-1]]) + # last_r = policy.compute_central_vf(sample_batch[CENTRAL_OBS][-1][np.newaxis, ...])[0] + + batch = compute_advantages( + sample_batch, + 0.0, + policy.config["gamma"], + policy.config["lambda"], + use_gae=policy.config["use_gae"]) + return batch + + +def time_overlap(time_span, agent_time): + """Check if agent_time overlaps with time_span.""" + if agent_time[0] <= time_span[1] and agent_time[1] >= time_span[0]: + return True + else: + return False + + +def fill_missing(agent_time, other_agent_time, obs): + """Pad the obs to the appropriate length for agents that don't overlap perfectly in time.""" + # shortcut, the two overlap perfectly + if np.sum(agent_time == other_agent_time) == agent_time.shape[0]: + return obs + new_obs = np.zeros((agent_time.shape[0], obs.shape[1])) + other_agent_time_set = set(other_agent_time) + for i, time in enumerate(agent_time): + if time in other_agent_time_set: + new_obs[i] = obs[np.where(other_agent_time == time)] + return new_obs + + +def overlap_and_pad_agent(time_span, agent_time, obs): + """Take the part of obs that overlaps, pad to length time_span. + + Parameters + ---------- + time_span : tuple + tuple of the first and last time that the agent of interest is in the + system + agent_time : tuple + tuple of the first and last time that the agent whose obs we are + padding is in the system + obs : array_like + observations of the agent whose time is agent_time + """ + assert time_overlap(time_span, agent_time) + print(time_span) + print(agent_time) + # FIXME(ev) some of these conditions can be combined + # no padding needed + if agent_time[0] == time_span[0] and agent_time[1] == time_span[1]: + return obs + # agent enters before time_span starts and exits before time_span end + if agent_time[0] < time_span[0] and agent_time[1] < time_span[1]: + non_overlap_time = time_span[0] - agent_time[0] + missing_time = time_span[1] - agent_time[1] + overlap_obs = obs[non_overlap_time:] + padding = np.zeros((missing_time, obs.shape[1])) + obs_concat = np.concatenate((overlap_obs, padding)) + return obs_concat + # agent enters after time_span starts and exits after time_span ends + elif agent_time[0] > time_span[0] and agent_time[1] > time_span[1]: + non_overlap_time = agent_time[1] - time_span[1] + overlap_obs = obs[:-non_overlap_time] + missing_time = agent_time[0] - time_span[0] + padding = np.zeros((missing_time, obs.shape[1])) + obs_concat = np.concatenate((padding, overlap_obs)) + return obs_concat + # agent time is entirely contained in time_span + elif agent_time[0] >= time_span[0] and agent_time[1] <= time_span[1]: + missing_left = agent_time[0] - time_span[0] + missing_right = time_span[1] - agent_time[1] + obs_concat = obs + if missing_left > 0: + padding = np.zeros((missing_left, obs.shape[1])) + obs_concat = np.concatenate((padding, obs_concat)) + if missing_right > 0: + padding = np.zeros((missing_right, obs.shape[1])) + obs_concat = np.concatenate((obs_concat, padding)) + return obs_concat + # agent time totally contains time_span + elif agent_time[0] <= time_span[0] and agent_time[1] >= time_span[1]: + non_overlap_left = time_span[0] - agent_time[0] + non_overlap_right = agent_time[1] - time_span[1] + overlap_obs = obs + if non_overlap_left > 0: + overlap_obs = overlap_obs[non_overlap_left:] + if non_overlap_right > 0: + overlap_obs = overlap_obs[:-non_overlap_right] + return overlap_obs + + +def loss_with_central_critic(policy, model, dist_class, train_batch): + """Set up the PPO loss but replace the VF loss with the centralized VF loss.""" + CentralizedValueMixin.__init__(policy) + + logits, state = model.from_batch(train_batch) + action_dist = dist_class(logits, model) + + policy.loss_obj = PPOLoss( + policy.action_space, + dist_class, + model, + train_batch[Postprocessing.VALUE_TARGETS], + train_batch[Postprocessing.ADVANTAGES], + train_batch[SampleBatch.ACTIONS], + train_batch[BEHAVIOUR_LOGITS], + train_batch[ACTION_LOGP], + train_batch[SampleBatch.VF_PREDS], + action_dist, + policy.central_value_function, + policy.kl_coeff, + tf.ones_like(train_batch[Postprocessing.ADVANTAGES], dtype=tf.bool), + entropy_coeff=policy.entropy_coeff, + clip_param=policy.config["clip_param"], + vf_clip_param=policy.config["vf_clip_param"], + vf_loss_coeff=policy.config["vf_loss_coeff"], + use_gae=policy.config["use_gae"], + model_config=policy.config["model"]) + + return policy.loss_obj.loss + + +class PPOLoss(object): + """Object containing the PPO loss function.""" + + def __init__(self, + action_space, + dist_class, + model, + value_targets, + advantages, + actions, + prev_logits, + prev_actions_logp, + vf_preds, + curr_action_dist, + value_fn, + cur_kl_coeff, + valid_mask, + entropy_coeff=0, + clip_param=0.1, + vf_clip_param=0.1, + vf_loss_coeff=1.0, + use_gae=True, + model_config=None): + """Construct the loss for Proximal Policy Objective. + + Parameters + ---------- + action_space : TODO + Environment observation space specification. + dist_class : TODO + action distribution class for logits. + value_targets : tf.placeholder + Placeholder for target values; used for GAE. + actions : tf.placeholder + Placeholder for actions taken from previous model evaluation. + advantages : tf.placeholder + Placeholder for calculated advantages from previous model + evaluation. + prev_logits : tf.placeholder + Placeholder for logits output from previous model evaluation. + prev_actions_logp : tf.placeholder + Placeholder for prob output from previous model evaluation. + vf_preds : tf.placeholder + Placeholder for value function output from previous model + evaluation. + curr_action_dist : ActionDistribution + ActionDistribution of the current model. + value_fn : tf.Tensor + Current value function output Tensor. + cur_kl_coeff : tf.Variable + Variable holding the current PPO KL coefficient. + valid_mask : tf.Tensor + A bool mask of valid input elements (#2992). + entropy_coeff : float + Coefficient of the entropy regularizer. + clip_param : float + Clip parameter + vf_clip_param : float + Clip parameter for the value function + vf_loss_coeff : float + Coefficient of the value function loss + use_gae : bool + If true, use the Generalized Advantage Estimator. + model_config : dict, optional + model config for use in specifying action distributions. + """ + + def reduce_mean_valid(t): + return tf.reduce_mean(tf.boolean_mask(t, valid_mask)) + + prev_dist = dist_class(prev_logits, model) + # Make loss functions. + logp_ratio = tf.exp(curr_action_dist.logp(actions) - prev_actions_logp) + action_kl = prev_dist.kl(curr_action_dist) + self.mean_kl = reduce_mean_valid(action_kl) + + curr_entropy = curr_action_dist.entropy() + self.mean_entropy = reduce_mean_valid(curr_entropy) + + surrogate_loss = tf.minimum( + advantages * logp_ratio, + advantages * tf.clip_by_value(logp_ratio, 1 - clip_param, + 1 + clip_param)) + self.mean_policy_loss = reduce_mean_valid(-surrogate_loss) + + if use_gae: + vf_loss1 = tf.square(value_fn - value_targets) + vf_clipped = vf_preds + tf.clip_by_value( + value_fn - vf_preds, -vf_clip_param, vf_clip_param) + vf_loss2 = tf.square(vf_clipped - value_targets) + vf_loss = tf.maximum(vf_loss1, vf_loss2) + self.mean_vf_loss = reduce_mean_valid(vf_loss) + loss = reduce_mean_valid( + -surrogate_loss + + vf_loss_coeff * vf_loss - entropy_coeff * curr_entropy) + else: + self.mean_vf_loss = tf.constant(0.0) + loss = reduce_mean_valid(-surrogate_loss - + entropy_coeff * curr_entropy) + self.loss = loss + + +def new_ppo_surrogate_loss(policy, model, dist_class, train_batch): + """Return the PPO loss with the centralized value function.""" + loss = loss_with_central_critic(policy, model, dist_class, train_batch) + return loss + + +def setup_mixins(policy, obs_space, action_space, config): + """Construct additional classes that add on to PPO.""" + KLCoeffMixin.__init__(policy, config) + + EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], + config["entropy_coeff_schedule"]) + LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) + # hack: put in a noop VF so some of the inherited PPO code runs + policy.value_function = tf.zeros( + tf.shape(policy.get_placeholder(SampleBatch.CUR_OBS))[0]) + + +def central_vf_stats(policy, train_batch, grads): + """Report the explained variance of the centralized value function.""" + return { + "vf_explained_var": explained_variance( + train_batch[Postprocessing.VALUE_TARGETS], + policy.central_value_function), + } + + +def kl_and_loss_stats(policy, train_batch): + """Trianing stats to pass to the tensorboard.""" + return { + "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64), + "cur_lr": tf.cast(policy.cur_lr, tf.float64), + "total_loss": policy.loss_obj.loss, + "policy_loss": policy.loss_obj.mean_policy_loss, + "vf_loss": policy.loss_obj.mean_vf_loss, + "vf_explained_var": explained_variance( + train_batch[Postprocessing.VALUE_TARGETS], + policy.model.value_function()), + "vf_preds": train_batch[Postprocessing.VALUE_TARGETS], + "kl": policy.loss_obj.mean_kl, + "entropy": policy.loss_obj.mean_entropy, + "entropy_coeff": tf.cast(policy.entropy_coeff, tf.float64), + } + + +CCPPO = CustomPPOTFPolicy.with_updates( + name="CCPPO", + postprocess_fn=centralized_critic_postprocessing, + loss_fn=new_ppo_surrogate_loss, + stats_fn=kl_and_loss_stats, + before_loss_init=setup_mixins, + grad_stats_fn=central_vf_stats, + mixins=[ + LearningRateSchedule, EntropyCoeffSchedule, + CentralizedValueMixin, KLCoeffMixin + ]) + +CCTrainer = PPOTrainer.with_updates(name="CCPPOTrainer", default_policy=CCPPO) diff --git a/flow/algorithms/custom_ppo.py b/flow/algorithms/custom_ppo.py new file mode 100644 index 000000000..47a4459aa --- /dev/null +++ b/flow/algorithms/custom_ppo.py @@ -0,0 +1,344 @@ +"""PPO but without the adaptive KL term that RLlib added.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging + +import ray +from ray.rllib.evaluation.postprocessing import compute_advantages, \ + Postprocessing +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.policy.tf_policy import LearningRateSchedule, \ + EntropyCoeffSchedule, ACTION_LOGP +from ray.rllib.policy.tf_policy_template import build_tf_policy +from ray.rllib.utils.explained_variance import explained_variance +from ray.rllib.utils.tf_ops import make_tf_callable +from ray.rllib.utils import try_import_tf + +from ray.rllib.agents.trainer_template import build_trainer +from ray.rllib.agents.ppo.ppo import choose_policy_optimizer, DEFAULT_CONFIG +from ray.rllib.agents.ppo.ppo import warn_about_bad_reward_scales + +tf = try_import_tf() + +logger = logging.getLogger(__name__) + +# Frozen logits of the policy that computed the action +BEHAVIOUR_LOGITS = "behaviour_logits" + + +class PPOLoss(object): + """PPO Loss object.""" + + def __init__(self, + action_space, + dist_class, + model, + value_targets, + advantages, + actions, + prev_logits, + prev_actions_logp, + vf_preds, + curr_action_dist, + value_fn, + cur_kl_coeff, + valid_mask, + entropy_coeff=0, + clip_param=0.1, + vf_clip_param=0.1, + vf_loss_coeff=1.0, + use_gae=True, + model_config=None): + """Construct the loss for Proximal Policy Objective. + + Parameters + ---------- + action_space : TODO + Environment observation space specification. + dist_class : TODO + action distribution class for logits. + value_targets : tf.placeholder + Placeholder for target values; used for GAE. + actions : tf.placeholder + Placeholder for actions taken from previous model evaluation. + advantages : tf.placeholder + Placeholder for calculated advantages from previous model + evaluation. + prev_logits : tf.placeholder + Placeholder for logits output from previous model evaluation. + prev_actions_logp : tf.placeholder + Placeholder for prob output from previous model evaluation. + vf_preds : tf.placeholder + Placeholder for value function output from previous model + evaluation. + curr_action_dist : ActionDistribution + ActionDistribution of the current model. + value_fn : tf.Tensor + Current value function output Tensor. + cur_kl_coeff : tf.Variable + Variable holding the current PPO KL coefficient. + valid_mask : tf.Tensor + A bool mask of valid input elements (#2992). + entropy_coeff : float + Coefficient of the entropy regularizer. + clip_param : float + Clip parameter + vf_clip_param : float + Clip parameter for the value function + vf_loss_coeff : float + Coefficient of the value function loss + use_gae : bool + If true, use the Generalized Advantage Estimator. + model_config : dict, optional + model config for use in specifying action distributions. + """ + + def reduce_mean_valid(t): + return tf.reduce_mean(tf.boolean_mask(t, valid_mask)) + + prev_dist = dist_class(prev_logits, model) + # Make loss functions. + logp_ratio = tf.exp(curr_action_dist.logp(actions) - prev_actions_logp) + action_kl = prev_dist.kl(curr_action_dist) + self.mean_kl = reduce_mean_valid(action_kl) + + curr_entropy = curr_action_dist.entropy() + self.mean_entropy = reduce_mean_valid(curr_entropy) + + surrogate_loss = tf.minimum( + advantages * logp_ratio, + advantages * tf.clip_by_value(logp_ratio, 1 - clip_param, + 1 + clip_param)) + self.mean_policy_loss = reduce_mean_valid(-surrogate_loss) + + if use_gae: + vf_loss1 = tf.square(value_fn - value_targets) + vf_clipped = vf_preds + tf.clip_by_value( + value_fn - vf_preds, -vf_clip_param, vf_clip_param) + vf_loss2 = tf.square(vf_clipped - value_targets) + vf_loss = tf.maximum(vf_loss1, vf_loss2) + self.mean_vf_loss = reduce_mean_valid(vf_loss) + loss = reduce_mean_valid( + -surrogate_loss + + vf_loss_coeff * vf_loss - entropy_coeff * curr_entropy) + else: + self.mean_vf_loss = tf.constant(0.0) + loss = reduce_mean_valid(-surrogate_loss - entropy_coeff * curr_entropy) + self.loss = loss + + +def ppo_surrogate_loss(policy, model, dist_class, train_batch): + """Construct and return the PPO loss.""" + logits, state = model.from_batch(train_batch) + action_dist = dist_class(logits, model) + + if state: + max_seq_len = tf.reduce_max(train_batch["seq_lens"]) + mask = tf.sequence_mask(train_batch["seq_lens"], max_seq_len) + mask = tf.reshape(mask, [-1]) + else: + mask = tf.ones_like( + train_batch[Postprocessing.ADVANTAGES], dtype=tf.bool) + + policy.loss_obj = PPOLoss( + policy.action_space, + dist_class, + model, + train_batch[Postprocessing.VALUE_TARGETS], + train_batch[Postprocessing.ADVANTAGES], + train_batch[SampleBatch.ACTIONS], + train_batch[BEHAVIOUR_LOGITS], + train_batch[ACTION_LOGP], + train_batch[SampleBatch.VF_PREDS], + action_dist, + model.value_function(), + policy.kl_coeff, + mask, + entropy_coeff=policy.entropy_coeff, + clip_param=policy.config["clip_param"], + vf_clip_param=policy.config["vf_clip_param"], + vf_loss_coeff=policy.config["vf_loss_coeff"], + use_gae=policy.config["use_gae"], + model_config=policy.config["model"]) + + return policy.loss_obj.loss + + +def kl_and_loss_stats(policy, train_batch): + """Return statistics for the tensorboard.""" + return { + "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64), + "cur_lr": tf.cast(policy.cur_lr, tf.float64), + "total_loss": policy.loss_obj.loss, + "policy_loss": policy.loss_obj.mean_policy_loss, + "vf_loss": policy.loss_obj.mean_vf_loss, + "vf_explained_var": explained_variance( + train_batch[Postprocessing.VALUE_TARGETS], + policy.model.value_function()), + "vf_preds": train_batch[Postprocessing.VALUE_TARGETS], + "kl": policy.loss_obj.mean_kl, + "entropy": policy.loss_obj.mean_entropy, + "entropy_coeff": tf.cast(policy.entropy_coeff, tf.float64), + "advantages": train_batch[Postprocessing.ADVANTAGES], + "rewards": train_batch["rewards"] + } + + +def vf_preds_and_logits_fetches(policy): + """Add value function and logits outputs to experience train_batches.""" + return { + SampleBatch.VF_PREDS: policy.model.value_function(), + BEHAVIOUR_LOGITS: policy.model.last_output(), + } + + +def postprocess_ppo_gae(policy, + sample_batch, + other_agent_batches=None, + episode=None): + """Add the policy logits, VF preds, and advantages to the trajectory.""" + completed = sample_batch["dones"][-1] + if completed: + last_r = 0.0 + else: + next_state = [] + for i in range(policy.num_state_tensors()): + next_state.append([sample_batch["state_out_{}".format(i)][-1]]) + last_r = policy._value(sample_batch[SampleBatch.NEXT_OBS][-1], + sample_batch[SampleBatch.ACTIONS][-1], + sample_batch[SampleBatch.REWARDS][-1], + *next_state) + + batch = compute_advantages( + sample_batch, + last_r, + policy.config["gamma"], + policy.config["lambda"], + use_gae=policy.config["use_gae"]) + return batch + + +def clip_gradients(policy, optimizer, loss): + """If grad_clip is not None, clip the gradients.""" + variables = policy.model.trainable_variables() + if policy.config["grad_clip"] is not None: + grads_and_vars = optimizer.compute_gradients(loss, variables) + grads = [g for (g, v) in grads_and_vars] + policy.grads, _ = tf.clip_by_global_norm(grads, + policy.config["grad_clip"]) + clipped_grads = list(zip(policy.grads, variables)) + return clipped_grads + else: + return optimizer.compute_gradients(loss, variables) + + +class ValueNetworkMixin(object): + """Construct the value function.""" + + def __init__(self, obs_space, action_space, config): + if config["use_gae"]: + + @make_tf_callable(self.get_session()) + def value(ob, prev_action, prev_reward, *state): + model_out, _ = self.model({ + SampleBatch.CUR_OBS: tf.convert_to_tensor([ob]), + SampleBatch.PREV_ACTIONS: tf.convert_to_tensor( + [prev_action]), + SampleBatch.PREV_REWARDS: tf.convert_to_tensor( + [prev_reward]), + "is_training": tf.convert_to_tensor(False), + }, [tf.convert_to_tensor([s]) for s in state], + tf.convert_to_tensor([1])) + return self.model.value_function()[0] + + else: + + @make_tf_callable(self.get_session()) + def value(ob, prev_action, prev_reward, *state): + return tf.constant(0.0) + + self._value = value + + +def setup_config(policy, obs_space, action_space, config): + """Add additional custom options from the config.""" + # auto set the model option for layer sharing + config["model"]["vf_share_layers"] = config["vf_share_layers"] + + +def setup_mixins(policy, obs_space, action_space, config): + """Construct additional classes that add on to PPO.""" + KLCoeffMixin.__init__(policy, config) + ValueNetworkMixin.__init__(policy, obs_space, action_space, config) + EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], + config["entropy_coeff_schedule"]) + LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) + + +class KLCoeffMixin(object): + """Update the KL Coefficient. This is intentionally disabled to match the PPO paper better.""" + + def __init__(self, config): + # KL Coefficient + self.kl_coeff_val = config["kl_coeff"] + self.kl_target = config["kl_target"] + self.kl_coeff = tf.get_variable( + initializer=tf.constant_initializer(self.kl_coeff_val), + name="kl_coeff", + shape=(), + trainable=False, + dtype=tf.float32) + + def update_kl(self, blah): + """Disabled to match the PPO paper better.""" + pass + + +CustomPPOTFPolicy = build_tf_policy( + name="CustomPPOTFPolicy", + get_default_config=lambda: ray.rllib.agents.ppo.ppo.DEFAULT_CONFIG, + loss_fn=ppo_surrogate_loss, + stats_fn=kl_and_loss_stats, + extra_action_fetches_fn=vf_preds_and_logits_fetches, + postprocess_fn=postprocess_ppo_gae, + gradients_fn=clip_gradients, + before_init=setup_config, + before_loss_init=setup_mixins, + mixins=[ + LearningRateSchedule, EntropyCoeffSchedule, + ValueNetworkMixin, KLCoeffMixin + ]) + + +def validate_config(config): + """Check that the config is set up properly.""" + if config["entropy_coeff"] < 0: + raise DeprecationWarning("entropy_coeff must be >= 0") + if isinstance(config["entropy_coeff"], int): + config["entropy_coeff"] = float(config["entropy_coeff"]) + if config["batch_mode"] == "truncate_episodes" and not config["use_gae"]: + raise ValueError( + "Episode truncation is not supported without a value " + "function. Consider setting batch_mode=complete_episodes.") + if config["multiagent"]["policies"] and not config["simple_optimizer"]: + logger.info( + "In multi-agent mode, policies will be optimized sequentially " + "by the multi-GPU optimizer. Consider setting " + "simple_optimizer=True if this doesn't work for you.") + if config["simple_optimizer"]: + logger.warning( + "Using the simple minibatch optimizer. This will significantly " + "reduce performance, consider simple_optimizer=False.") + elif tf and tf.executing_eagerly(): + config["simple_optimizer"] = True # multi-gpu not supported + + +CustomPPOTrainer = build_trainer( + name="CustomPPOTrainer", + default_config=DEFAULT_CONFIG, + default_policy=CustomPPOTFPolicy, + make_policy_optimizer=choose_policy_optimizer, + validate_config=validate_config, + after_train_result=warn_about_bad_reward_scales) diff --git a/flow/algorithms/imitation_learning/__init__.py b/flow/algorithms/imitation_learning/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/flow/algorithms/imitation_learning/custom_ppo.py b/flow/algorithms/imitation_learning/custom_ppo.py new file mode 100644 index 000000000..c7e81e13c --- /dev/null +++ b/flow/algorithms/imitation_learning/custom_ppo.py @@ -0,0 +1,230 @@ +""" +Copied from RLLib's PPO, but uses CustomPPOTFPolicy, which tracks value function predictions in Tensorboard. +""" + +import logging +import os + +from ray.rllib.agents import with_common_config +from ray.tune.trial import ExportFormat + +from flow.algorithms.imitation_learning.custom_ppo_tf_policy import CustomPPOTFPolicy +from ray.rllib.agents.trainer_template import build_trainer +from ray.rllib.optimizers import SyncSamplesOptimizer, LocalMultiGPUOptimizer +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() + +logger = logging.getLogger(__name__) + +# yapf: disable +# __sphinx_doc_begin__ +DEFAULT_CONFIG = with_common_config({ + # Should use a critic as a baseline (otherwise don't use value baseline; + # required for using GAE). + "use_critic": True, + # If true, use the Generalized Advantage Estimator (GAE) + # with a value function, see https://arxiv.org/pdf/1506.02438.pdf. + "use_gae": True, + # The GAE(lambda) parameter. + "lambda": 1.0, + # Initial coefficient for KL divergence. + "kl_coeff": 0.2, + # Size of batches collected from each worker. + "rollout_fragment_length": 200, + # Number of timesteps collected for each SGD round. This defines the size + # of each SGD epoch. + "train_batch_size": 4000, + # Total SGD batch size across all devices for SGD. This defines the + # minibatch size within each epoch. + "sgd_minibatch_size": 128, + # Whether to shuffle sequences in the batch when training (recommended). + "shuffle_sequences": True, + # Number of SGD iterations in each outer loop (i.e., number of epochs to + # execute per train batch). + "num_sgd_iter": 30, + # Stepsize of SGD. + "lr": 5e-5, + # Learning rate schedule. + "lr_schedule": None, + # Share layers for value function. If you set this to True, it's important + # to tune vf_loss_coeff. + "vf_share_layers": False, + # Coefficient of the value function loss. IMPORTANT: you must tune this if + # you set vf_share_layers: True. + "vf_loss_coeff": 1.0, + # Coefficient of the entropy regularizer. + "entropy_coeff": 0.0, + # Decay schedule for the entropy regularizer. + "entropy_coeff_schedule": None, + # PPO clip parameter. + "clip_param": 0.3, + # Clip param for the value function. Note that this is sensitive to the + # scale of the rewards. If your expected V is large, increase this. + "vf_clip_param": 10.0, + # If specified, clip the global norm of gradients by this amount. + "grad_clip": None, + # Target value for KL divergence. + "kl_target": 0.01, + # Whether to rollout "complete_episodes" or "truncate_episodes". + "batch_mode": "truncate_episodes", + # Which observation filter to apply to the observation. + "observation_filter": "NoFilter", + # Uses the sync samples optimizer instead of the multi-gpu one. This is + # usually slower, but you might want to try it if you run into issues with + # the default optimizer. + "simple_optimizer": False, + # Use PyTorch as framework? + "use_pytorch": False +}) +# __sphinx_doc_end__ +# yapf: enable + + +def choose_policy_optimizer(workers, config): + if config["simple_optimizer"]: + return SyncSamplesOptimizer( + workers, + num_sgd_iter=config["num_sgd_iter"], + train_batch_size=config["train_batch_size"], + sgd_minibatch_size=config["sgd_minibatch_size"], + standardize_fields=["advantages"]) + + return LocalMultiGPUOptimizer( + workers, + sgd_batch_size=config["sgd_minibatch_size"], + num_sgd_iter=config["num_sgd_iter"], + num_gpus=config["num_gpus"], + rollout_fragment_length=config["rollout_fragment_length"], + num_envs_per_worker=config["num_envs_per_worker"], + train_batch_size=config["train_batch_size"], + standardize_fields=["advantages"], + shuffle_sequences=config["shuffle_sequences"]) + + +def update_kl(trainer, fetches): + # Single-agent. + if "kl" in fetches: + trainer.workers.local_worker().for_policy( + lambda pi: pi.update_kl(fetches["kl"])) + + # Multi-agent. + else: + + def update(pi, pi_id): + if pi_id in fetches: + pi.update_kl(fetches[pi_id]["kl"]) + else: + logger.debug("No data for {}, not updating kl".format(pi_id)) + + trainer.workers.local_worker().foreach_trainable_policy(update) + + +def warn_about_bad_reward_scales(trainer, result): + if result["policy_reward_mean"]: + return # Punt on handling multiagent case. + + # Warn about excessively high VF loss. + learner_stats = result["info"]["learner"] + if "default_policy" in learner_stats: + scaled_vf_loss = (trainer.config["vf_loss_coeff"] * + learner_stats["default_policy"]["vf_loss"]) + policy_loss = learner_stats["default_policy"]["policy_loss"] + if trainer.config["vf_share_layers"] and scaled_vf_loss > 100: + logger.warning( + "The magnitude of your value function loss is extremely large " + "({}) compared to the policy loss ({}). This can prevent the " + "policy from learning. Consider scaling down the VF loss by " + "reducing vf_loss_coeff, or disabling vf_share_layers.".format( + scaled_vf_loss, policy_loss)) + + # Warn about bad clipping configs + if trainer.config["vf_clip_param"] <= 0: + rew_scale = float("inf") + else: + rew_scale = round( + abs(result["episode_reward_mean"]) / + trainer.config["vf_clip_param"], 0) + if rew_scale > 200: + logger.warning( + "The magnitude of your environment rewards are more than " + "{}x the scale of `vf_clip_param`. ".format(rew_scale) + + "This means that it will take more than " + "{} iterations for your value ".format(rew_scale) + + "function to converge. If this is not intended, consider " + "increasing `vf_clip_param`.") + + +def validate_config(config): + if config["entropy_coeff"] < 0: + raise DeprecationWarning("entropy_coeff must be >= 0") + if isinstance(config["entropy_coeff"], int): + config["entropy_coeff"] = float(config["entropy_coeff"]) + if config["sgd_minibatch_size"] > config["train_batch_size"]: + raise ValueError( + "Minibatch size {} must be <= train batch size {}.".format( + config["sgd_minibatch_size"], config["train_batch_size"])) + if config["batch_mode"] == "truncate_episodes" and not config["use_gae"]: + raise ValueError( + "Episode truncation is not supported without a value " + "function. Consider setting batch_mode=complete_episodes.") + if config["multiagent"]["policies"] and not config["simple_optimizer"]: + logger.info( + "In multi-agent mode, policies will be optimized sequentially " + "by the multi-GPU optimizer. Consider setting " + "simple_optimizer=True if this doesn't work for you.") + if config["simple_optimizer"]: + logger.warning( + "Using the simple minibatch optimizer. This will significantly " + "reduce performance, consider simple_optimizer=False.") + elif config["use_pytorch"] or (tf and tf.executing_eagerly()): + config["simple_optimizer"] = True # multi-gpu not supported + + +def get_policy_class(config): + if config.get("use_pytorch") is True: + from ray.rllib.agents.ppo.ppo_torch_policy import PPOTorchPolicy + return PPOTorchPolicy + else: + return CustomPPOTFPolicy + + +CustomPPOTrainer = build_trainer( + name="PPO", + default_config=DEFAULT_CONFIG, + default_policy=CustomPPOTFPolicy, + get_policy_class=get_policy_class, + make_policy_optimizer=choose_policy_optimizer, + validate_config=validate_config, + after_optimizer_step=update_kl, + after_train_result=warn_about_bad_reward_scales) + + +from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID +def import_model(self, import_file, policy_id=DEFAULT_POLICY_ID): + """Imports a model from import_file. + + Note: Currently, only h5 files are supported. + + Args: + import_file (str): The file to import the model from. + + Returns: + A dict that maps ExportFormats to successfully exported models. + """ + # Check for existence. + if not os.path.exists(import_file): + raise FileNotFoundError( + "`import_file` '{}' does not exist! Can't import Model.". + format(import_file)) + # Get the format of the given file. + import_format = "h5" # TODO(sven): Support checkpoint loading. + + ExportFormat.validate([import_format]) + if import_format != ExportFormat.H5: + raise NotImplementedError + else: + return self.import_policy_model_from_h5(import_file, policy_id=policy_id) + +from ray.rllib.agents import Trainer +setattr(Trainer, 'import_model', import_model) \ No newline at end of file diff --git a/flow/algorithms/imitation_learning/custom_ppo_tf_policy.py b/flow/algorithms/imitation_learning/custom_ppo_tf_policy.py new file mode 100644 index 000000000..680b7cf76 --- /dev/null +++ b/flow/algorithms/imitation_learning/custom_ppo_tf_policy.py @@ -0,0 +1,288 @@ +""" +Copied from RLLIb's ppo_tf_policy, but additionally tracks value function predictions in kl_and_loss_stats. Used +to evaluate the value function learned after imitation. +""" + +import logging + +import ray +from ray.rllib.agents.impala.vtrace_policy import BEHAVIOUR_LOGITS +from ray.rllib.evaluation.postprocessing import compute_advantages, \ + Postprocessing +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.policy.policy import ACTION_LOGP +from ray.rllib.policy.tf_policy import LearningRateSchedule, \ + EntropyCoeffSchedule +from ray.rllib.policy.tf_policy_template import build_tf_policy +from ray.rllib.utils.explained_variance import explained_variance +from ray.rllib.utils.tf_ops import make_tf_callable +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() + +logger = logging.getLogger(__name__) + + +class PPOLoss: + def __init__(self, + dist_class, + model, + value_targets, + advantages, + actions, + prev_logits, + prev_actions_logp, + vf_preds, + curr_action_dist, + value_fn, + cur_kl_coeff, + valid_mask, + entropy_coeff=0, + clip_param=0.1, + vf_clip_param=0.1, + vf_loss_coeff=1.0, + use_gae=True): + """Constructs the loss for Proximal Policy Objective. + + Arguments: + dist_class: action distribution class for logits. + value_targets (Placeholder): Placeholder for target values; used + for GAE. + actions (Placeholder): Placeholder for actions taken + from previous model evaluation. + advantages (Placeholder): Placeholder for calculated advantages + from previous model evaluation. + prev_logits (Placeholder): Placeholder for logits output from + previous model evaluation. + prev_actions_logp (Placeholder): Placeholder for action prob output + from the previous (before update) Model evaluation. + vf_preds (Placeholder): Placeholder for value function output + from the previous (before update) Model evaluation. + curr_action_dist (ActionDistribution): ActionDistribution + of the current model. + value_fn (Tensor): Current value function output Tensor. + cur_kl_coeff (Variable): Variable holding the current PPO KL + coefficient. + valid_mask (Optional[tf.Tensor]): An optional bool mask of valid + input elements (for max-len padded sequences (RNNs)). + entropy_coeff (float): Coefficient of the entropy regularizer. + clip_param (float): Clip parameter + vf_clip_param (float): Clip parameter for the value function + vf_loss_coeff (float): Coefficient of the value function loss + use_gae (bool): If true, use the Generalized Advantage Estimator. + """ + if valid_mask is not None: + + def reduce_mean_valid(t): + return tf.reduce_mean(tf.boolean_mask(t, valid_mask)) + + else: + + def reduce_mean_valid(t): + return tf.reduce_mean(t) + + prev_dist = dist_class(prev_logits, model) + # Make loss functions. + logp_ratio = tf.exp(curr_action_dist.logp(actions) - prev_actions_logp) + action_kl = prev_dist.kl(curr_action_dist) + self.mean_kl = reduce_mean_valid(action_kl) + + curr_entropy = curr_action_dist.entropy() + self.mean_entropy = reduce_mean_valid(curr_entropy) + + surrogate_loss = tf.minimum( + advantages * logp_ratio, + advantages * tf.clip_by_value(logp_ratio, 1 - clip_param, + 1 + clip_param)) + self.mean_policy_loss = reduce_mean_valid(-surrogate_loss) + + if use_gae: + vf_loss1 = tf.square(value_fn - value_targets) + vf_clipped = vf_preds + tf.clip_by_value( + value_fn - vf_preds, -vf_clip_param, vf_clip_param) + vf_loss2 = tf.square(vf_clipped - value_targets) + vf_loss = tf.maximum(vf_loss1, vf_loss2) + self.mean_vf_loss = reduce_mean_valid(vf_loss) + loss = reduce_mean_valid( + -surrogate_loss + cur_kl_coeff * action_kl + + vf_loss_coeff * vf_loss - entropy_coeff * curr_entropy) + else: + self.mean_vf_loss = tf.constant(0.0) + loss = reduce_mean_valid(-surrogate_loss + + cur_kl_coeff * action_kl - + entropy_coeff * curr_entropy) + self.loss = loss + + +def ppo_surrogate_loss(policy, model, dist_class, train_batch): + logits, state = model.from_batch(train_batch) + action_dist = dist_class(logits, model) + + mask = None + if state: + max_seq_len = tf.reduce_max(train_batch["seq_lens"]) + mask = tf.sequence_mask(train_batch["seq_lens"], max_seq_len) + mask = tf.reshape(mask, [-1]) + + policy.loss_obj = PPOLoss( + dist_class, + model, + train_batch[Postprocessing.VALUE_TARGETS], + train_batch[Postprocessing.ADVANTAGES], + train_batch[SampleBatch.ACTIONS], + train_batch[BEHAVIOUR_LOGITS], + train_batch[ACTION_LOGP], + train_batch[SampleBatch.VF_PREDS], + action_dist, + model.value_function(), + policy.kl_coeff, + mask, + entropy_coeff=policy.entropy_coeff, + clip_param=policy.config["clip_param"], + vf_clip_param=policy.config["vf_clip_param"], + vf_loss_coeff=policy.config["vf_loss_coeff"], + use_gae=policy.config["use_gae"], + ) + + return policy.loss_obj.loss + + +def kl_and_loss_stats(policy, train_batch): + return { + "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64), + "cur_lr": tf.cast(policy.cur_lr, tf.float64), + "total_loss": policy.loss_obj.loss, + "policy_loss": policy.loss_obj.mean_policy_loss, + "vf_loss": policy.loss_obj.mean_vf_loss, + "vf_preds": policy.model.value_function(), + "vf_targets": train_batch[Postprocessing.VALUE_TARGETS], + "vf_explained_var": explained_variance( + train_batch[Postprocessing.VALUE_TARGETS], + policy.model.value_function()), + "kl": policy.loss_obj.mean_kl, + "entropy": policy.loss_obj.mean_entropy, + "entropy_coeff": tf.cast(policy.entropy_coeff, tf.float64), + } + + +def vf_preds_and_logits_fetches(policy): + """Adds value function and logits outputs to experience train_batches.""" + return { + SampleBatch.VF_PREDS: policy.model.value_function(), + BEHAVIOUR_LOGITS: policy.model.last_output(), + } + + +def postprocess_ppo_gae(policy, + sample_batch, + other_agent_batches=None, + episode=None): + """Adds the policy logits, VF preds, and advantages to the trajectory.""" + + completed = sample_batch["dones"][-1] + if completed: + last_r = 0.0 + else: + next_state = [] + for i in range(policy.num_state_tensors()): + next_state.append([sample_batch["state_out_{}".format(i)][-1]]) + last_r = policy._value(sample_batch[SampleBatch.NEXT_OBS][-1], + sample_batch[SampleBatch.ACTIONS][-1], + sample_batch[SampleBatch.REWARDS][-1], + *next_state) + batch = compute_advantages( + sample_batch, + last_r, + policy.config["gamma"], + policy.config["lambda"], + use_gae=policy.config["use_gae"]) + return batch + + +def clip_gradients(policy, optimizer, loss): + variables = policy.model.trainable_variables() + if policy.config["grad_clip"] is not None: + grads_and_vars = optimizer.compute_gradients(loss, variables) + grads = [g for (g, v) in grads_and_vars] + policy.grads, _ = tf.clip_by_global_norm(grads, + policy.config["grad_clip"]) + clipped_grads = list(zip(policy.grads, variables)) + return clipped_grads + else: + return optimizer.compute_gradients(loss, variables) + + +class KLCoeffMixin: + def __init__(self, config): + # KL Coefficient + self.kl_coeff_val = config["kl_coeff"] + self.kl_target = config["kl_target"] + self.kl_coeff = tf.get_variable( + initializer=tf.constant_initializer(self.kl_coeff_val), + name="kl_coeff", + shape=(), + trainable=False, + dtype=tf.float32) + + def update_kl(self, sampled_kl): + if sampled_kl > 2.0 * self.kl_target: + self.kl_coeff_val *= 1.5 + elif sampled_kl < 0.5 * self.kl_target: + self.kl_coeff_val *= 0.5 + self.kl_coeff.load(self.kl_coeff_val, session=self.get_session()) + return self.kl_coeff_val + + +class ValueNetworkMixin: + def __init__(self, obs_space, action_space, config): + if config["use_gae"]: + + @make_tf_callable(self.get_session()) + def value(ob, prev_action, prev_reward, *state): + model_out, _ = self.model({ + SampleBatch.CUR_OBS: tf.convert_to_tensor([ob]), + SampleBatch.PREV_ACTIONS: tf.convert_to_tensor( + [prev_action]), + SampleBatch.PREV_REWARDS: tf.convert_to_tensor( + [prev_reward]), + "is_training": tf.convert_to_tensor(False), + }, [tf.convert_to_tensor([s]) for s in state], + tf.convert_to_tensor([1])) + return self.model.value_function()[0] + + else: + + @make_tf_callable(self.get_session()) + def value(ob, prev_action, prev_reward, *state): + return tf.constant(0.0) + + self._value = value + + +def setup_config(policy, obs_space, action_space, config): + # auto set the model option for layer sharing + config["model"]["vf_share_layers"] = config["vf_share_layers"] + + +def setup_mixins(policy, obs_space, action_space, config): + ValueNetworkMixin.__init__(policy, obs_space, action_space, config) + KLCoeffMixin.__init__(policy, config) + EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], + config["entropy_coeff_schedule"]) + LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) + + +CustomPPOTFPolicy = build_tf_policy( + name="PPOTFPolicy", + get_default_config=lambda: ray.rllib.agents.ppo.ppo.DEFAULT_CONFIG, + loss_fn=ppo_surrogate_loss, + stats_fn=kl_and_loss_stats, + extra_action_fetches_fn=vf_preds_and_logits_fetches, + postprocess_fn=postprocess_ppo_gae, + gradients_fn=clip_gradients, + before_init=setup_config, + before_loss_init=setup_mixins, + mixins=[ + LearningRateSchedule, EntropyCoeffSchedule, KLCoeffMixin, + ValueNetworkMixin + ]) diff --git a/flow/algorithms/imitation_learning/custom_trainable.py b/flow/algorithms/imitation_learning/custom_trainable.py new file mode 100644 index 000000000..993113607 --- /dev/null +++ b/flow/algorithms/imitation_learning/custom_trainable.py @@ -0,0 +1,65 @@ +from ray import tune +try: + from ray.rllib.agents.agent import get_agent_class +except ImportError: + from ray.rllib.agents.registry import get_agent_class +import flow.algorithms.imitation_learning.custom_ppo as custom_ppo + +class Imitation_PPO_Trainable(tune.Trainable): + """ + Class to train PPO with imitation, with Tune. Extends Trainable. + """ + + def _setup(self, config): + """ + Sets up trainable. See superclass definition. + """ + + env_name = config['env'] + self.trainer = custom_ppo.CustomPPOTrainer(env=env_name, config=config) + # kind of hacky, but don't know a better solution to the default policy not existing + policy_id = list(self.trainer.get_weights().keys())[0] + print("test: ", list(self.trainer.get_weights().keys())) + self.trainer.import_model(config['model']['custom_options']['h5_load_path'], policy_id=policy_id) + + def _train(self): + """ + Executes one training iteration on trainer. See superclass definition. + """ + + return self.trainer.train() + + def _save(self, tmp_checkpoint_dir): + """ + Saves trainer. See superclass definition. + """ + return self.trainer._save(tmp_checkpoint_dir) + + def _restore(self, checkpoint): + """ + Restores trainer from checkpoint. See superclass definition. + """ + self.trainer.restore(checkpoint) + + def _log_result(self, result): + """ + Logs results of trainer. See superclass definition. + """ + self.trainer._log_result(result) + + def _stop(self): + """ + Stops trainer. See superclass definition. + """ + self.trainer.stop() + + def _export_model(self, export_formats, export_dir): + """ + Exports trainer model. See superclass definition. + """ + return self.trainer.export_model(export_formats, export_dir=export_dir) + + + + + diff --git a/flow/algorithms/imitation_learning/imitating_controller.py b/flow/algorithms/imitation_learning/imitating_controller.py new file mode 100644 index 000000000..115930744 --- /dev/null +++ b/flow/algorithms/imitation_learning/imitating_controller.py @@ -0,0 +1,69 @@ +from flow.controllers.base_controller import BaseController + + +class ImitatingController(BaseController): + """ + Controller which uses a given neural net to imitate an expert. Subclasses BaseController + """ + + # Implementation in Tensorflow Keras + + def __init__(self, veh_id, action_network, multiagent, car_following_params=None, time_delay=0.0, noise=0, fail_safe=None): + """ + Parameters + __________ + veh_id: String + ID of vehicle to control + action_network: ImitatingNetwork + Instance of imitating_network class; neural net that gives action given state + multiagent: bool + boolean indicating if env is multiagent or singleagent + """ + + BaseController.__init__(self, veh_id, car_following_params, delay=time_delay, fail_safe=fail_safe, noise=noise) + self.action_network = action_network + self.multiagent = multiagent + self.veh_id = veh_id + + + def get_accel(self, env): + """ + Get acceleration for vehicle in the environment. Overrides superclass method. + Parameters + __________ + env: Gym Env + instance of environment being used + """ + # observation is a dictionary for multiagent envs, list for singleagent envs + + if self.multiagent: + # if vehicle is in non-control edge, it will not be in observation, so return None to default control to Sumo + if self.veh_id not in env.get_state().keys(): + return None + observation = env.get_state()[self.veh_id] + else: + observation = env.get_state() + + # get action from neural net + action = self.action_network.get_accel_from_observation(observation)[0] + + # handles singleagent case in which there are multiple RL vehicles sharing common state + # if action space is multidimensional, obtain the corresponding action for the vehicle + if not self.multiagent and self.action_network.action_dim > 1: + + # get_sorted_rl_ids used for singleagent_straight_road; use get_rl_ids if method does not exist + if hasattr(env, 'get_sorted_rl_ids'): + rl_ids = env.get_sorted_rl_ids() + else: + rl_ids = env.get_rl_ids() + + if not (self.veh_id in rl_ids): + # vehicle in non-control edge, so return None to default control to Sumo + return None + + # return the action taken by the vehicle + ind = rl_ids.index(self.veh_id) + return action[ind] + + # in other cases, acceleration is the output of the network + return action diff --git a/flow/algorithms/imitation_learning/imitating_network.py b/flow/algorithms/imitation_learning/imitating_network.py new file mode 100644 index 000000000..6e9e9c3c7 --- /dev/null +++ b/flow/algorithms/imitation_learning/imitating_network.py @@ -0,0 +1,255 @@ +import numpy as np +import tensorflow as tf +from flow.algorithms.imitation_learning.keras_utils import build_neural_net_deterministic, build_neural_net_stochastic, get_loss, negative_log_likelihood_loss +from flow.algorithms.imitation_learning.replay_buffer import ReplayBuffer + + +class ImitatingNetwork(): + """ + Class containing neural network which learns to imitate a given expert controller. + """ + + def __init__(self, sess, action_dim, obs_dim, fcnet_hiddens, replay_buffer_size, learning_rate, stochastic=False, variance_regularizer = 0, load_model=False, load_path='', tensorboard_path=''): + + """Initializes and constructs neural network. + Parameters + ---------- + sess : tf.Session + Tensorflow session variable + action_dim : int + action_space dimension + obs_dim : int + dimension of observation space (size of network input) + fcnet_hiddens : list + list of hidden layer sizes for fully connected network (length of list is number of hidden layers) + replay_buffer_size: int + maximum size of replay buffer used to hold data for training + stochastic: bool + indicates if network outputs a stochastic (MV Gaussian) or deterministic policy + variance_regularizer: float + regularization hyperparameter to penalize high variance policies + load_model: bool + if True, load model from path specified in load_path + load_path: String + path to h5 file containing model to load. + + """ + + self.sess = sess + self.action_dim = action_dim + self.obs_dim = obs_dim + self.fcnet_hiddens = fcnet_hiddens + self.stochastic=stochastic + self.variance_regularizer = variance_regularizer + self.learning_rate = learning_rate + + self.train_steps = 0 + self.action_steps = 0 + + self.writer = tf.summary.FileWriter(tensorboard_path, tf.get_default_graph()) + + # load network if specified, or construct network + if load_model: + self.load_network(load_path) + else: + self.build_network() + self.compile_network() + + self.replay_buffer = ReplayBuffer(replay_buffer_size) + + def build_network(self): + """ + Defines neural network for choosing actions. Defines placeholders and forward pass + """ + # setup placeholders for network input and labels for training, and hidden layers/output + if self.stochastic: + self.model = build_neural_net_stochastic(self.obs_dim, self.action_dim, self.fcnet_hiddens) + else: + self.model = build_neural_net_deterministic(self.obs_dim, self.action_dim, self.fcnet_hiddens) + + + def compile_network(self): + """ + Compiles Keras network with appropriate loss and optimizer + """ + loss = get_loss(self.stochastic, self.variance_regularizer) + self.model.compile(loss=loss, optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate)) + + + def train(self, observation_batch, action_batch): + """ + Executes one training (gradient) step for the given batch of observation and action data + + Parameters + ---------- + observation_batch : numpy array + numpy array containing batch of observations (inputs) + action_batch : numpy array + numpy array containing batch of actions (labels) + """ + + # reshape action_batch to ensure a shape (batch_size, action_dim) + action_batch = action_batch.reshape(action_batch.shape[0], self.action_dim) + # one gradient step on batch + loss = self.model.train_on_batch(observation_batch, action_batch) + + # tensorboard + summary = tf.Summary(value=[tf.Summary.Value(tag="imitation training loss", simple_value=loss), ]) + self.writer.add_summary(summary, global_step=self.train_steps) + self.train_steps += 1 + + def get_accel_from_observation(self, observation): + """ + Gets the network's acceleration prediction based on given observation/state + + Parameters + ---------- + observation : numpy array + numpy array containing a single observation + + Returns + ------- + numpy array + one element numpy array containing acceleration + """ + + # network expects an array of arrays (matrix); if single observation (no batch), convert to array of arrays + if len(observation.shape)<=1: + observation = observation[None] + # "batch size" is 1, so just get single acceleration/acceleration vector + network_output = self.model.predict(observation) + if self.stochastic: + mean, log_std = network_output[:, :self.action_dim], network_output[:, self.action_dim:] + var = np.exp(2 * log_std) + + # track variance norm on tensorboard + variance_norm = np.linalg.norm(var) + summary = tf.Summary(value=[tf.Summary.Value(tag="Variance norm", simple_value=variance_norm), ]) + self.writer.add_summary(summary, global_step=self.action_steps) + + # var is a 1 x d numpy array, where d is the dimension of the action space, so get the first element and form cov matrix + cov_matrix = np.diag(var[0]) + action = np.random.multivariate_normal(mean[0], cov_matrix) + + self.action_steps += 1 + return action + else: + self.action_steps += 1 + return network_output + + + def get_accel(self, env): + """ + Get network's acceleration prediction(s) based on given env + + Parameters + ---------- + env : + environment object + + Returns + ------- + numpy array + one element numpy array containing accleeration + + """ + observation = env.get_state() + return self.get_accel_from_observation(observation) + + + def add_to_replay_buffer(self, rollout_list): + """ + Add data to a replay buffer + + Parameters + ---------- + rollout_list : list + list of rollout dictionaries + """ + + self.replay_buffer.add_rollouts(rollout_list) + + + def sample_data(self, batch_size): + """ + Sample a batch of data from replay buffer. + + Parameters + ---------- + batch_size : int + size of batch to sample + """ + + return self.replay_buffer.sample_batch(batch_size) + + def save_network(self, save_path): + """ + Save imitation network as a h5 file in save_path + + Parameters + ---------- + save_path : String + path to h5 file to save to + """ + + self.model.save(save_path) + # tensorboard + + # writer = tf.summary.FileWriter('./graphs2', tf.get_default_graph()) + + def load_network(self, load_path): + """ + Load imitation network from a h5 file in load_path + + Parameters + ---------- + load_path : String + path to h5 file containing model to load from + """ + if self.stochastic: + self.model = tf.keras.models.load_model(load_path, custom_objects={'nll_loss': negative_log_likelihood_loss(self.variance_regularizer)}) + else: + self.model = tf.keras.models.load_model(load_path) + + + def save_network_PPO(self, save_path): + """ + Build a model, with same policy architecture as imitation network, to run PPO, copy weights from imitation, and save this model. + + Parameters + ---------- + load_path : save_path + path to h5 file to save to + """ + + input = tf.keras.layers.Input(self.model.input.shape[1].value) + curr_layer = input + + # number of hidden layers + num_layers = len(self.model.layers) - 2 + + # build layers for policy + for i in range(num_layers): + size = self.model.layers[i + 1].output.shape[1].value + activation = tf.keras.activations.serialize(self.model.layers[i + 1].activation) + curr_layer = tf.keras.layers.Dense(size, activation=activation, name="policy_hidden_layer_{}".format(i + 1))(curr_layer) + output_layer_policy = tf.keras.layers.Dense(self.model.output.shape[1].value, activation=None, name="policy_output_layer")(curr_layer) + + # build layers for value function + curr_layer = input + for i in range(num_layers): + size = self.fcnet_hiddens[i] + curr_layer = tf.keras.layers.Dense(size, activation="tanh", name="vf_hidden_layer_{}".format(i+1))(curr_layer) + output_layer_vf = tf.keras.layers.Dense(1, activation=None, name="vf_output_layer")(curr_layer) + + ppo_model = tf.keras.Model(inputs=input, outputs=[output_layer_policy, output_layer_vf], name="ppo_model") + + # set the policy weights to those learned from imitation + for i in range(num_layers): + policy_layer = ppo_model.get_layer(name="policy_hidden_layer_{}".format(i + 1)) + policy_layer.set_weights(self.model.layers[i + 1].get_weights()) + policy_output = ppo_model.get_layer("policy_output_layer") + policy_output.set_weights(self.model.layers[-1].get_weights()) + + # save the model (as a h5 file) + ppo_model.save(save_path) diff --git a/flow/algorithms/imitation_learning/keras_utils.py b/flow/algorithms/imitation_learning/keras_utils.py new file mode 100644 index 000000000..f5d9924b8 --- /dev/null +++ b/flow/algorithms/imitation_learning/keras_utils.py @@ -0,0 +1,133 @@ +import tensorflow as tf +import tensorflow_probability as tfp +from tensorflow.keras import Input +from tensorflow.keras.layers import Dense + +def build_neural_net_deterministic(input_dim, action_dim, fcnet_hiddens): + """Build a keras model to output a deterministic policy. + Parameters + ---------- + input_dim : int + dimension of input layer + action_dim : int + action_space dimension + fcnet_hiddens : list + list containing size of each hidden layer (length of list is number of hidden layers) + + Returns + ------- + Keras model (untrained) + """ + + input_layer = Input(shape=(input_dim, )) + curr_layer = input_layer + + for i in range(len(fcnet_hiddens)): + size = fcnet_hiddens[i] + dense = Dense(size, activation="tanh") + curr_layer = dense(curr_layer) + output_layer = Dense(action_dim, activation=None)(curr_layer) + model = tf.keras.Model(inputs=input_layer, outputs=output_layer, name="policy_network") + + return model + +def build_neural_net_stochastic(input_dim, action_dim, fcnet_hiddens): + """Build a keras model to output a stochastic policy. + Parameters + ---------- + input_dim : int + dimension of input layer + action_dim : int + action_space dimension + fcnet_hiddens : list + list containing size of each hidden layer (length of list is number of hidden layers) + + Returns + ------- + Keras model (untrained) + """ + input_layer = Input(shape=(input_dim, )) + curr_layer = input_layer + + for i in range(len(fcnet_hiddens)): + size = fcnet_hiddens[i] + dense = Dense(size, activation="tanh") + curr_layer = dense(curr_layer) + + out = Dense(2 * action_dim, activation=None)(curr_layer) + model = tf.keras.Model(inputs=input_layer, outputs=out, name="policy_network") + + return model + +def get_loss(stochastic, variance_regularizer): + """Get appropriate loss function for training. + Parameters + ---------- + stochastic : bool + determines if policy to be learned is deterministic or stochastic + variance_regularizer : float + regularization hyperparameter to penalize high variance policies + + Returns + ------- + Keras loss function to use for imitation learning. + """ + if stochastic: + return negative_log_likelihood_loss(variance_regularizer) + else: + return tf.keras.losses.mean_squared_error + +def negative_log_likelihood_loss(variance_regularizer): + """Negative log likelihood loss for learning stochastic policies. + + Parameters + ---------- + variance_regularizer : float + regularization hyperparameter to penalize high variance policies + Returns + ------- + Negative log likelihood loss function with variance regularization. + """ + + def nll_loss(y, network_output): + assert network_output.shape[1] % 2 == 0, "Stochastic policies must output vectors of even length" + + action_dim = network_output.shape[1] // 2 + + # first half of network_output is mean, second half is log_std + means, log_stds = tf.split(network_output, 2, axis=1) + stds = tf.math.exp(log_stds) + # variances = tf.math.square(stds) + + # Multivariate Gaussian distribution + dist = tfp.distributions.MultivariateNormalDiag(loc=means, scale_diag=stds) + loss = dist.log_prob(y) + loss = tf.negative(loss) + loss = tf.reduce_mean(loss) + (variance_regularizer * tf.norm(stds)) + return loss + + return nll_loss + +def compare_weights(ppo_model, imitation_path): + imitation_model = tf.keras.models.load_model(imitation_path, custom_objects={'nll_loss': negative_log_likelihood_loss(0.5)}) + + for i in range(len(imitation_model.layers) - 2): + ppo_name = 'policy_hidden_layer_' + str(i + 1) + ppo_layer = ppo_model.get_layer(ppo_name) + im_layer = imitation_model.layers[i + 1] + + ppo_weights = ppo_layer.get_weights() + im_weights = im_layer.get_weights() + for i in range(len(ppo_weights)): + assert (ppo_weights[i] == im_weights[i]).all(), "Weights don't match!" + + ppo_layer = ppo_model.get_layer('policy_output_layer') + im_layer = imitation_model.layers[-1] + ppo_weights = ppo_layer.get_weights() + im_weights = im_layer.get_weights() + for i in range(len(ppo_weights)): + assert (ppo_weights[i] == im_weights[i]).all(), "Weights don't match!" + + print("\n\nWeights properly loaded\n\n") + + diff --git a/flow/algorithms/imitation_learning/ppo_model.py b/flow/algorithms/imitation_learning/ppo_model.py new file mode 100644 index 000000000..47ae61f77 --- /dev/null +++ b/flow/algorithms/imitation_learning/ppo_model.py @@ -0,0 +1,130 @@ + +from ray.rllib.models.tf.tf_modelv2 import TFModelV2 +from flow.algorithms.imitation_learning.keras_utils import * + + +class PPONetwork(TFModelV2): + """ + Custom RLLib PPOModel (using tensorflow keras) to load weights from a pre-trained policy model (e.g. from imitation learning) and start RL training with loaded weights. + Subclass of TFModelV2. See https://docs.ray.io/en/master/rllib-models.html. + """ + + def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): + """ + Parameters + __________ + obs_space: gym.Space + observation space of gym environment + action_space: gym.Space + action_space of gym environment + num_outputs: int + number of outputs for policy network. For deterministic policies, this is dimension of the action space. For continuous stochastic policies, this is 2 * dimension of the action space + model_config: dict + configuration of model + name: str + name of model + + """ + + super(PPONetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) + + h5_path = kwargs.get("h5_load_path", "") + + # setup model with weights loaded in from model in h5 path + self.setup_model(obs_space, action_space, model_config, num_outputs, h5_path) + self.register_variables(self.base_model.variables) + + + + def setup_model(self, obs_space, action_space, model_config, num_outputs, imitation_h5_path): + """ + Loads/builds model for both policy and value function + Parameters + __________ + + obs_space: gym.Space + observation space of env + action_space: gym.Space + action space of env + model_config: dict + configuration parameters for model + num_outputs: int + number of outputs expected for policy + imitation_h5_path: str + path to h5 file containing weights of a pretrained network (empty string if no such file) + """ + + activation = model_config.get("fcnet_activation") + hiddens = model_config.get("fcnet_hiddens", []) + vf_share_layers = model_config.get("vf_share_layers") + + # set up model + inp_layer = tf.keras.layers.Input(shape=obs_space.shape, name="input_layer") + curr_layer = inp_layer + + # hidden layers and output for policy + i = 1 + for size in hiddens: + curr_layer = tf.keras.layers.Dense(size, name="policy_hidden_layer_{}".format(i), + activation=activation)(curr_layer) + i += 1 + + output_layer_policy = tf.keras.layers.Dense(num_outputs, name="policy_output_layer", activation=None)( + curr_layer) + + # set up value function + if not vf_share_layers: + curr_layer = inp_layer + i = 1 + for size in hiddens: + curr_layer = tf.keras.layers.Dense(size, name="vf_hidden_layer_{}".format(i), + activation=activation)(curr_layer) + i += 1 + + output_layer_vf = tf.keras.layers.Dense(1, name="vf_output_layer", activation=None)(curr_layer) + + # build model from layers + self.base_model = tf.keras.Model(inp_layer, [output_layer_policy, output_layer_vf]) + + def forward(self, input_dict, state, seq_lens): + """ + Overrides parent class's method. Used to pass a input through model and get policy/vf output. + Parameters + __________ + input_dict: dict + dictionary of input tensors, including “obs”, “obs_flat”, “prev_action”, “prev_reward”, “is_training” + state: list + list of state tensors with sizes matching those returned by get_initial_state + the batch dimension + seq_lens: tensor + 1d tensor holding input sequence lengths + + Returns + _______ + (outputs, state) + Tuple, first element is policy output, second element state + """ + policy_out, value_out = self.base_model(input_dict["obs_flat"]) + self.value_out = value_out + return policy_out, state + + def value_function(self): + """ + Returns the value function output for the most recent forward pass. + + Returns + _______ + tensor + value estimate tensor of shape [BATCH]. + """ + return tf.reshape(self.value_out, [-1]) + + + def import_from_h5(self, import_file): + """ + Overrides parent class method. Import base_model from h5 import_file. + Parameters: + __________ + import_file: str + filepath to h5 file + """ + self.base_model.load_weights(import_file) diff --git a/flow/algorithms/imitation_learning/replay_buffer.py b/flow/algorithms/imitation_learning/replay_buffer.py new file mode 100644 index 000000000..47ebebaa6 --- /dev/null +++ b/flow/algorithms/imitation_learning/replay_buffer.py @@ -0,0 +1,106 @@ +import time +import numpy as np +import os + + +class ReplayBuffer(object): + """ Replay buffer class to store state, action, expert_action, reward, next_state, terminal tuples""" + + def __init__(self, max_size=100000): + """ + Parameters + __________ + max_size: int + maximum size of replay buffer + """ + + # max size of buffer + self.max_size = max_size + + # store each rollout + self.rollouts = [] + + # store component arrays from each rollout + self.observations = None + self.actions = None + self.expert_actions = None + self.rewards = None + self.next_observations = None + self.terminals = None + + + def add_rollouts(self, rollouts_list): + """ + Add a list of rollouts to the replay buffer + + Parameters + __________ + rollouts_list: list + list of rollout dictionaries + + """ + + for rollout in rollouts_list: + self.rollouts.append(rollout) + + observations, actions, expert_actions, rewards, next_observations, terminals = self.unpack_rollouts(rollouts_list) + + assert (not np.any(np.isnan(expert_actions))), "Invalid actions added to replay buffer" + + # only keep max_size tuples in buffer + if self.observations is None: + self.observations = observations[-self.max_size:] + self.actions = actions[-self.max_size:] + self.expert_actions = expert_actions[-self.max_size:] + self.rewards = rewards[-self.max_size:] + self.next_observations = next_observations[-self.max_size:] + self.terminals = terminals[-self.max_size:] + else: + self.observations = np.concatenate([self.observations, observations])[-self.max_size:] + self.actions = np.concatenate([self.actions, actions])[-self.max_size:] + self.expert_actions = np.concatenate([self.expert_actions, expert_actions])[-self.max_size:] + self.rewards = np.concatenate([self.rewards, rewards])[-self.max_size:] + self.next_observations = np.concatenate([self.next_observations, next_observations])[-self.max_size:] + self.terminals = np.concatenate([self.terminals, terminals])[-self.max_size:] + + def sample_batch(self, batch_size): + """ + Sample a batch of data (with size batch_size) from replay buffer. + + Parameters + ---------- + batch_size: int + size of batch to sample + + Returns + _______ + Data in separate numpy arrays of observations, actions, and expert actionis + """ + assert self.observations is not None and self.actions is not None and self.expert_actions is not None + + size = len(self.observations) + rand_inds = np.random.randint(0, size, batch_size) + return self.observations[rand_inds], self.actions[rand_inds], self.expert_actions[rand_inds] + + + + def unpack_rollouts(self, rollouts_list): + """ + Convert list of rollout dictionaries to individual observation, action, rewards, next observation, terminal arrays + Parameters + ---------- + rollouts: list + list of rollout dictionaries + + Returns + ---------- + separate numpy arrays of observations, actions, rewards, next_observations, and is_terminals + """ + observations = np.concatenate([rollout["observations"] for rollout in rollouts_list]) + actions = np.concatenate([rollout["actions"] for rollout in rollouts_list]) + expert_actions = np.concatenate([rollout["expert_actions"] for rollout in rollouts_list]) + rewards = np.concatenate([rollout["rewards"] for rollout in rollouts_list]) + next_observations = np.concatenate([rollout["next_observations"] for rollout in rollouts_list]) + terminals = np.concatenate([rollout["terminals"] for rollout in rollouts_list]) + + return observations, actions, expert_actions, rewards, next_observations, terminals diff --git a/flow/algorithms/imitation_learning/run.py b/flow/algorithms/imitation_learning/run.py new file mode 100644 index 000000000..ed8717a5a --- /dev/null +++ b/flow/algorithms/imitation_learning/run.py @@ -0,0 +1,126 @@ +""" +Runner file for imitation learning. This script performs imitation learning using DAgger and also configures the trained +model to conduct further training with Reinforcement Learning (see train_with_imitation.py). + +Usage: + python run.py EXP_CONFIG +""" +from flow.algorithms.imitation_learning.trainer import Trainer + + +class Runner(object): + """ Class to run imitation learning (training and evaluation) """ + + def __init__(self, params): + """ + Parameters + __________ + params: dict + dictionary of parameters relevent to running imitation learning. + """ + + # initialize trainer class instance and params + self.params = params + + # import appropriate exp_config module + if self.params['multiagent']: + module = __import__("examples.exp_configs.rl.multiagent", fromlist=[self.params['exp_config']]) + else: + module = __import__("examples.exp_configs.rl.singleagent", fromlist=[self.params['exp_config']]) + + submodule = getattr(module, self.params['exp_config']) + self.trainer = Trainer(params, submodule) + + def run_training_loop(self): + """ + Runs training for imitation learning for number of iterations specified in params. + """ + self.trainer.run_training_loop() + + def evaluate(self): + """ + Evaluates a trained controller over a specified number trajectories; compares average action per step and average reward per trajectory between imitator and expert + """ + self.trainer.evaluate_controller() + + def save_controller_network(self): + """ + Saves the tensorflow keras model of the imitation policy to a h5 file, whose path is specified in params + """ + self.trainer.save_controller_network() + + def save_controller_for_PPO(self): + """ + Creates and saves (in h5 file format) new tensorflow keras model to run PPO with weighs loaded from imitation learning. This model encapsulates both a policy network and a value function network. + """ + self.trainer.save_controller_for_PPO() + + +def main(): + """ + Parse args, run training, and evaluate. + """ + + import argparse + parser = argparse.ArgumentParser() + + # required input parameters + parser.add_argument( + 'exp_config', type=str, + help='Name of the experiment configuration file, as located in ' + 'exp_configs/rl/singleagent or exp_configs/rl/multiagent.') + + + # rollout collection params + parser.add_argument('--ep_len', type=int, default=5000, help='Max length of episodes for rollouts.') + parser.add_argument('--num_agent_train_steps_per_iter', type=int, default=1000, help='Number of gradient steps for training policy.') # number of gradient steps for training policy + parser.add_argument('--n_iter', type=int, default=3, help='Number of DAgger iterations to run (1st iteration is behavioral cloning') + parser.add_argument('--multiagent', type=bool, default=False, help='If true, env is multiagent.') + parser.add_argument('--v_des', type=float, default=15, help='Desired velocity for follower-stopper') + parser.add_argument('--num_eval_episodes', type=int, default=0, help='Number of episodes on which to evaluate imitation model') + + # imitation training params + parser.add_argument('--batch_size', type=int, default=1000, help='Number of environment steps to collect in iteration of DAgger') + parser.add_argument('--init_batch_size', type=int, default=2000, help='Number of environment steps to collect on 1st iteration of DAgger (behavioral cloning iteration)') + parser.add_argument('--vf_batch_size', type=int, default=2000, help='Number of environment steps to collect to learn value function for a policy') + parser.add_argument('--num_vf_iters', type=int, default=100, help='Number of iterations to run vf training') # TODO: better help description for this + parser.add_argument('--train_batch_size', type=int, default=100, help='Batch size for SGD') + parser.add_argument('--stochastic', type=bool, default=False, help='If true, learn a stochastic policy (MV Gaussian)') + parser.add_argument('--variance_regularizer', type=float, default=0.5, help='Regularization hyperparameter to penalize variance in imitation learning loss, for stochastic policies.') + parser.add_argument('--replay_buffer_size', type=int, default=1000000, help='Max size of replay buffer') + parser.add_argument('--lr', type=float, default=0.001, help='Learning rate for imitation learning and value function learning') + + parser.add_argument('--load_imitation_model', type=bool, default=False, help='Whether to load an existin imitation neural net') + parser.add_argument('--load_imitation_path', type=str, default='', help='Path to h5 file from which to load existing imitation neural net') + parser.add_argument('--save_model', type=int, default=0, help='If true, save both imitation model and PPO model in h5 format') + parser.add_argument('--imitation_save_path', type=str, default='', help='Filepath to h5 file in which imitation model should be saved') + parser.add_argument('--PPO_save_path', type=str, default='', help='Filepath to h5 file in which PPO model with copied weights should be saved') + + # misc params + parser.add_argument('--tensorboard_path', type=str, default='/tensorboard/', help='Path to which tensorboard events should be written.') + + args = parser.parse_args() + + # convert args to dictionary + params = vars(args) + + # change this to determine number and size of hidden layers + params["fcnet_hiddens"] = [32, 32, 32] + + + # run training + runner = Runner(params) + runner.run_training_loop() + + # save model after training + if params['save_model'] == 1: + runner.save_controller_network() + runner.save_controller_for_PPO() + + # evaluate controller on difference, compared to expert, in action taken and average reward accumulated per rollout + if params['num_eval_episodes'] > 0: + runner.evaluate() + + +if __name__ == "__main__": + main() diff --git a/flow/algorithms/imitation_learning/train_with_imitation.py b/flow/algorithms/imitation_learning/train_with_imitation.py new file mode 100644 index 000000000..2aae7c2e8 --- /dev/null +++ b/flow/algorithms/imitation_learning/train_with_imitation.py @@ -0,0 +1,171 @@ +from flow.algorithms.imitation_learning.run import * +from examples.train import * + +def parse_args(args): + """Parse training options user can specify in command line. + + Returns + ------- + argparse.Namespace + the output parser object + dict_args + dictionary version of the argparse + """ + + # **** TRAIN.PY ARGS **** + + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description="Parse argument used when running a Flow simulation.", + epilog="python train.py EXP_CONFIG EXP_TITLE") + + # required input parameters + parser.add_argument( + 'exp_config', type=str, + help='Name of the experiment configuration file, as located in ' + 'exp_configs/rl/singleagent or exp_configs/rl/multiagent.') + + + parser.add_argument( + 'exp_title', type=str, + help='Title to give the run.') + + + # optional input parameters + parser.add_argument( + '--rl_trainer', type=str, default="rllib", + help='the RL trainer to use. either rllib or Stable-Baselines') + parser.add_argument( + '--algorithm', type=str, default="PPO", + help='RL algorithm to use. Options are PPO, TD3, MATD3 (MADDPG w/ TD3) right now.' + ) + parser.add_argument( + '--num_cpus', type=int, default=1, + help='How many CPUs to use') + parser.add_argument( + '--num_steps', type=int, default=5000, + help='How many total steps to perform learning over. Relevant for stable-baselines') + parser.add_argument( + '--grid_search', action='store_true', default=False, + help='Whether to grid search over hyperparams') + parser.add_argument( + '--num_iterations', type=int, default=200, + help='How many iterations are in a training run.') + parser.add_argument( + '--checkpoint_freq', type=int, default=20, + help='How often to checkpoint.') + parser.add_argument( + '--num_rollouts', type=int, default=1, + help='How many rollouts are in a training batch') + parser.add_argument( + '--rollout_size', type=int, default=1000, + help='How many steps are in a training batch.') + parser.add_argument('--use_s3', action='store_true', help='If true, upload results to s3') + parser.add_argument('--local_mode', action='store_true', default=False, + help='If true only 1 CPU will be used') + parser.add_argument('--render', action='store_true', default=False, + help='If true, we render the display') + parser.add_argument( + '--checkpoint_path', type=str, default=None, + help='Directory with checkpoint to restore training from.') + + + # *** IMITATION LEARNING ARGS *** + + # rollout collection params: + parser.add_argument('--ep_len', type=int, default=5000, help='Max length of episodes for rollouts.') + parser.add_argument('--num_agent_train_steps_per_iter', type=int, default=1000, help='Number of gradient steps for training imitation policy.') + parser.add_argument('--n_iter', type=int, default=3, help='Number of DAgger iterations to run (1st iteration is behavioral cloning') + parser.add_argument('--multiagent', type=bool, default=False, help='If true, env is multiagent.') + parser.add_argument('--v_des', type=float, default=15, help='Desired velocity for follower-stopper') + parser.add_argument('--num_eval_episodes', type=int, default=0, help='Number of episodes on which to evaluate imitation model') + + # imitation training params: + parser.add_argument('--batch_size', type=int, default=1000, help='Number of environment steps to collect in iteration of DAgger') + parser.add_argument('--init_batch_size', type=int, default=2000, help='Number of environment steps to collect on 1st iteration of DAgger (behavioral cloning iteration)') + parser.add_argument('--vf_batch_size', type=int, default=2000, help='Number of environment steps to collect to learn value function for a policy') + parser.add_argument('--num_vf_iters', type=int, default=100, help='Number of iterations to run value function learning, after imitation') + parser.add_argument('--train_batch_size', type=int, default=100, help='Batch size to run SGD on during imitation learning.') + parser.add_argument('--variance_regularizer', type=float, default=0.5, help='Regularization hyperparameter to penalize variance in imitation learning negative log-likelihood loss, for stochastic policies.') + parser.add_argument('--stochastic', type=bool, default=True, help='If true, learn a stochastic policy (MV Gaussian). Must be true to continue with PPO training.') + parser.add_argument('--replay_buffer_size', type=int, default=1000000, help='Max size of replay buffer') + parser.add_argument('--lr', type=float, default=0.001, help='Learning rate for imitation learning and value function learning') + + # loading and saving params: + parser.add_argument('--load_imitation_model', type=bool, default=False, help='Whether to load an existing imitation neural network.') + parser.add_argument('--load_imitation_path', type=str, default='', help='Path to h5 file from which to load existing imitation neural net. load_imitation_model must be True') + parser.add_argument('--imitation_save_path', type=str, default='', help='Filepath to h5 file in which imitation model should be saved') + parser.add_argument('--PPO_save_path', type=str, default='', help="Filepath to h5 file in which the ppo model should be saved. Before starting PPO training, weights (for both policy and value function) will be loaded from this model") + + # misc + parser.add_argument('--tensorboard_path', type=str, default='/tensorboard/', help='Path to which tensorboard events should be written.') + + parsed_args = parser.parse_known_args(args)[0] + dict_args = vars(parsed_args) + + return parsed_args, dict_args + + + +def main(args): + + # Parse args, train imitation learning + + flags, params = parse_args(args) + + # depth and size of MLP layers + params["fcnet_hiddens"] = [32, 32, 32] + + # load_weights_path for PPO must be set to same path as PPO_save_path (a result from imitation) + params['load_weights_path'] = params["PPO_save_path"] + + + print("\n\n********** IMITATION LEARNING ************ \n") + # run training + imitation_runner = Runner(params) + imitation_runner.run_training_loop() + + # save imitation network + imitation_runner.save_controller_network() + + # save PPO network (contains policy and value function) + imitation_runner.save_controller_for_PPO() + + # Imitation Done, start RL + print("\n\n********** RL ************ \n") + + # Import relevant information from the exp_config script. + module = __import__( + "examples.exp_configs.rl.singleagent", fromlist=[flags.exp_config]) + module_ma = __import__( + "examples.exp_configs.rl.multiagent", fromlist=[flags.exp_config]) + + # Import the sub-module containing the specified exp_config and determine + # whether the environment is single agent or multi-agent. + if hasattr(module, flags.exp_config): + submodule = getattr(module, flags.exp_config) + multiagent = False + elif hasattr(module_ma, flags.exp_config): + submodule = getattr(module_ma, flags.exp_config) + assert flags.rl_trainer.lower() in ["rllib", "h-baselines"], \ + "Currently, multiagent experiments are only supported through "\ + "RLlib. Try running this experiment using RLlib: " \ + "'python train.py EXP_CONFIG'" + multiagent = True + else: + raise ValueError("Unable to find experiment config.") + + # Perform the training operation. + if flags.rl_trainer.lower() == "rllib": + train_rllib(submodule, flags) + elif flags.rl_trainer.lower() == "stable-baselines": + train_stable_baselines(submodule, flags) + elif flags.rl_trainer.lower() == "h-baselines": + flow_params = submodule.flow_params + train_h_baselines(flow_params, args, multiagent) + else: + raise ValueError("rl_trainer should be either 'rllib', 'h-baselines', " + "or 'stable-baselines'.") + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/flow/algorithms/imitation_learning/trainer.py b/flow/algorithms/imitation_learning/trainer.py new file mode 100644 index 000000000..203eee0b1 --- /dev/null +++ b/flow/algorithms/imitation_learning/trainer.py @@ -0,0 +1,320 @@ +from flow.algorithms.imitation_learning.utils import sample_n_trajectories, sample_trajectories +from flow.utils.registry import make_create_env +from flow.algorithms.imitation_learning.imitating_controller import ImitatingController +from flow.algorithms.imitation_learning.imitating_network import ImitatingNetwork +from flow.algorithms.imitation_learning.utils_tensorflow import * +from flow.algorithms.imitation_learning.keras_utils import * +from flow.controllers.velocity_controllers import FollowerStopper +from flow.core.params import SumoCarFollowingParams + +class Trainer(object): + """ + Class to initialize and run training for imitation learning (with DAgger) + """ + + def __init__(self, params, submodule): + """ + Parameters + __________ + params: dict + Dictionary of parameters used to run imitation learning + submodule: Module + Python module for file containing flow_params + """ + + # get flow params + self.flow_params = submodule.flow_params + + # setup parameters for training + self.params = params + self.sess = create_tf_session() + + # environment setup + create_env, _ = make_create_env(self.flow_params) + self.env = create_env() + + # vehicle setup + self.multiagent = self.params['multiagent'] # multiagent or singleagent env + + if not self.multiagent and self.env.action_space.shape[0] > 1: + # use sorted rl ids if the method exists (e.g.. singlagent straightroad) + try: + self.vehicle_ids = self.env.get_sorted_rl_ids() + except: + self.vehicle_ids = self.k.vehicle.get_rl_ids() + else: + # use get_rl_ids if sorted_rl_ids doesn't exist + self.vehicle_ids = self.env.k.vehicle.get_rl_ids() + + # neural net setup + obs_dim = self.env.observation_space.shape[0] + action_dim = self.env.action_space.shape[0] + + self.params['action_dim'] = action_dim + self.params['obs_dim'] = obs_dim + + # initialize neural network class and tf variables + self.action_network = ImitatingNetwork(self.sess, self.params['action_dim'], self.params['obs_dim'], self.params['fcnet_hiddens'], self.params['replay_buffer_size'], self.params['lr'], stochastic=self.params['stochastic'], variance_regularizer=self.params['variance_regularizer'], load_model=self.params['load_imitation_model'], load_path=self.params['load_imitation_path'], tensorboard_path=self.params['tensorboard_path']) + + + # controllers setup + v_des = self.params['v_des'] # for FollowerStopper + car_following_params = SumoCarFollowingParams() + self.controllers = dict() + # initialize controllers: save in a dictionary to avoid re-initializing a controller for a vehicle + for vehicle_id in self.vehicle_ids: + expert = FollowerStopper(vehicle_id, car_following_params=car_following_params, v_des=v_des) + imitator = ImitatingController(vehicle_id, self.action_network, self.multiagent, car_following_params=car_following_params) + self.controllers[vehicle_id] = (imitator, expert) + + + def run_training_loop(self): + """ + Trains imitator for self.params['n_iter'] iterations (each iteration collects new trajectories to put in replay buffer) + """ + + # number of imitation learning iterations (1st iteration is behavioral cloning + n_iter = self.params['n_iter'] + # init vars at beginning of training + # number of environment steps taken throughout training + + self.total_envsteps = 0 + + for itr in range(n_iter): + print("\n\n********** Iteration %i ************"%itr) + + # collect trajectories, to be used for training + if itr == 0: + # first iteration is behavioral cloning + training_returns = self.collect_training_trajectories(itr, self.params['init_batch_size']) + else: + # other iterations use DAgger (trajectories collected by running imitator policy) + training_returns = self.collect_training_trajectories(itr, self.params['batch_size']) + + paths, envsteps_this_batch = training_returns + self.total_envsteps += envsteps_this_batch + + # add collected data to replay buffer in neural network class + self.action_network.add_to_replay_buffer(paths) + + # train controller + self.train_controller() + + def collect_training_trajectories(self, itr, batch_size): + """ + Collect (state, action, reward, next_state, terminal) tuples for training + + Parameters + __________ + itr: int + iteration of training during which function is called. Used to determine whether to run behavioral cloning or DAgger + batch_size: int + number of tuples to collect + Returns + _______ + paths: list + list of trajectories + envsteps_this_batch: int + the sum over the numbers of environment steps in paths (total number of env transitions in trajectories collected) + """ + + print("\nCollecting data to be used for training...") + max_decel = self.flow_params['env'].additional_params['max_decel'] + trajectories, envsteps_this_batch = sample_trajectories(self.env, self.controllers, self.action_network, batch_size, self.params['ep_len'], self.multiagent, use_expert=itr==0, v_des=self.params['v_des'], max_decel=max_decel) + + return trajectories, envsteps_this_batch + + def train_controller(self): + """ + Trains controller for specified number of steps, using data sampled from replay buffer; each step involves running optimizer (i.e. Adam) once + """ + + print("Training controller using sampled data from replay buffer...") + for train_step in range(self.params['num_agent_train_steps_per_iter']): + # sample data from replay buffer + ob_batch, ac_batch, expert_ac_batch = self.action_network.sample_data(self.params['train_batch_size']) + # train network on sampled data + self.action_network.train(ob_batch, expert_ac_batch) + + def evaluate_controller(self): + """ + Evaluates a trained imitation controller on similarity with expert with respect to action taken and total reward per rollout. + """ + + print("\n\n********** Evaluation ************ \n") + + + # number of trajectories to evaluate performance on + num_trajs = self.params['num_eval_episodes'] + + # collect imitator driven trajectories (along with corresponding expert actions) + trajectories = sample_n_trajectories(self.env, self.controllers, self.action_network, num_trajs, self.params['ep_len'], self.multiagent, False, v_des=self.params['v_des']) + + # initialize metrics + total_imitator_steps = 0 # total number of environment steps taken across the n trajectories + average_imitator_reward_per_rollout = 0 # average reward per rollout achieved by imitator + + action_errors = np.array([]) # difference in action (acceleration) taken between expert and imitator + average_action_expert = 0 # average action taken, across all timesteps, by expert (used to compute % average) + average_action_imitator = 0 # average action taken, across all timesteps, by imitator (used to compute % average) + + # compare actions taken in each step of trajectories (trajectories are controlled by imitator) + for traj_tuple in trajectories: + traj = traj_tuple[0] + traj_len = traj_tuple[1] + + imitator_actions = traj['actions'] + expert_actions = traj['expert_actions'] + + average_action_expert += np.sum(expert_actions) + average_action_imitator += np.sum(imitator_actions) + + # use RMSE as action error metric + action_error = (np.linalg.norm(imitator_actions - expert_actions)) / len(imitator_actions) + action_errors = np.append(action_errors, action_error) + + total_imitator_steps += traj_len + average_imitator_reward_per_rollout += np.sum(traj['rewards']) + + # compute averages for metrics + average_imitator_reward_per_rollout = average_imitator_reward_per_rollout / len(trajectories) + + average_action_expert = average_action_expert / total_imitator_steps + + # collect expert driven trajectories (these trajectories are only used to compare average reward per rollout) + expert_trajectories = sample_n_trajectories(self.env, self.controllers, self.action_network, num_trajs, self.params['ep_len'], self.multiagent, True, v_des=self.params['v_des']) + + # initialize metrics + total_expert_steps = 0 + average_expert_reward_per_rollout = 0 + + # compare reward accumulated in trajectories collected via expert vs. via imitator + for traj_tuple in expert_trajectories: + traj = traj_tuple[0] + traj_len = traj_tuple[1] + total_expert_steps += traj_len + average_expert_reward_per_rollout += np.sum(traj['rewards']) + + average_expert_reward_per_rollout = average_expert_reward_per_rollout / len(expert_trajectories) + + # compute percent errors (using expert values as 'ground truth') + percent_error_average_reward = (np.abs(average_expert_reward_per_rollout - average_imitator_reward_per_rollout) / average_expert_reward_per_rollout) * 100 + + percent_error_average_action = (np.abs(np.mean(action_errors)) / np.abs(average_action_expert)) * 100 + + # Print results + print("\nAverage reward per rollout, expert: ", average_expert_reward_per_rollout) + print("Average reward per rollout, imitator: ", average_imitator_reward_per_rollout) + print("% Difference, average reward per rollout: ", percent_error_average_reward, "\n") + + + print(" Average RMSE action error per rollout: ", np.mean(action_errors)) + print("Average Action Taken by Expert: ", average_action_expert) + print("% Action Error: ", percent_error_average_action, "\n") + print("Total imitator steps: ", total_imitator_steps) + print("Total expert steps: ", total_expert_steps) + + def learn_value_function(self, num_samples, num_iterations, num_grad_steps): + """ + Learn the value function under imitation policy. + Parameters + __________ + num_samples: number of environment transition samples to collect to learn from + num_iterations: number of iterations to relabel data, and train + num_grad_steps: number of gradient steps per training iteration + + Returns + _______ + Value function neural net + """ + + print("\n\n********** Learning value function of imitation policy ************ \n") + # init value function neural net + vf_net = build_neural_net_deterministic(self.params['obs_dim'], 1, self.params['fcnet_hiddens']) + vf_net.compile(loss='mean_squared_error', optimizer = tf.keras.optimizers.Adam(learning_rate=self.params['lr'])) + + max_decel = self.flow_params['env'].additional_params['max_decel'] + # collect trajectory samples to train on + trajectories, envsteps_this_batch = sample_trajectories(self.env, self.controllers, self.action_network, + num_samples, self.params['ep_len'], self.multiagent, + use_expert=False, v_des=self.params['v_des'], + max_decel=max_decel) + + # combine trajectories into one + observations = np.concatenate([traj['observations'] for traj in trajectories]) + rewards = np.concatenate([traj['rewards'] for traj in trajectories]) + next_observations = np.concatenate([traj['next_observations'] for traj in trajectories]) + + # iterate over data multiple times (labels change every iteration) + for i in range(num_iterations): + # form labels + next_state_value_preds = vf_net.predict(next_observations).flatten() + next_state_value_preds[np.isnan(next_state_value_preds)] = 0 + labels = rewards + next_state_value_preds + vf_net.fit(observations, labels, verbose=0) + + return vf_net + + + + def save_controller_for_PPO(self): + """ + Build a model, with same policy architecture as imitation network, to run PPO, copy weights from imitation, and save this model. + + """ + + # filepath to h5 file in which keras model will be saved + PPO_save_path = self.params['PPO_save_path'] + + vf_net = self.learn_value_function(self.params['vf_batch_size'], self.params['num_vf_iters'], self.params['num_agent_train_steps_per_iter']) + + input = tf.keras.layers.Input(self.action_network.model.input.shape[1].value) + curr_layer = input + + # number of hidden layers + num_layers = len(self.action_network.model.layers) - 2 + + # build layers for policy + for i in range(num_layers): + size = self.action_network.model.layers[i + 1].output.shape[1].value + activation = tf.keras.activations.serialize(self.action_network.model.layers[i + 1].activation) + curr_layer = tf.keras.layers.Dense(size, activation=activation, name="policy_hidden_layer_{}".format(i + 1))(curr_layer) + output_layer_policy = tf.keras.layers.Dense(self.action_network.model.output.shape[1].value, activation=None, name="policy_output_layer")(curr_layer) + + # build layers for value function + curr_layer = input + for i in range(num_layers): + size = self.params['fcnet_hiddens'][i] + curr_layer = tf.keras.layers.Dense(size, activation="tanh", name="vf_hidden_layer_{}".format(i+1))(curr_layer) + output_layer_vf = tf.keras.layers.Dense(1, activation=None, name="vf_output_layer")(curr_layer) + + ppo_model = tf.keras.Model(inputs=input, outputs=[output_layer_policy, output_layer_vf], name="ppo_model") + + # set the policy weights to those learned from imitation + for i in range(num_layers): + policy_layer = ppo_model.get_layer(name="policy_hidden_layer_{}".format(i + 1)) + policy_layer.set_weights(self.action_network.model.layers[i + 1].get_weights()) + policy_output = ppo_model.get_layer("policy_output_layer") + policy_output.set_weights(self.action_network.model.layers[-1].get_weights()) + + # set value function weights to those learned + num_vf_layers = len(vf_net.layers) - 2 + for i in range(num_vf_layers): + vf_layer = ppo_model.get_layer('vf_hidden_layer_{}'.format(i + 1)) + vf_layer.set_weights(vf_net.layers[i + 1].get_weights()) + vf_output = ppo_model.get_layer("vf_output_layer") + vf_output.set_weights(vf_net.layers[-1].get_weights()) + + + # save the model (as a h5 file) + ppo_model.save(PPO_save_path) + + + def save_controller_network(self): + """ + Saves a keras tensorflow model to the specified path given in the command line params. Path must end with .h5. + """ + + imitation_save_path = self.params['imitation_save_path'] + print("Saving tensorflow model to: ", imitation_save_path) + self.action_network.save_network(imitation_save_path) diff --git a/flow/algorithms/imitation_learning/utils.py b/flow/algorithms/imitation_learning/utils.py new file mode 100644 index 000000000..cb75ccc19 --- /dev/null +++ b/flow/algorithms/imitation_learning/utils.py @@ -0,0 +1,369 @@ +import tensorflow as tf +import os +import numpy as np +import math +from flow.core.params import SumoCarFollowingParams +from flow.algorithms.imitation_learning.imitating_controller import ImitatingController +from flow.algorithms.imitation_learning.imitating_network import ImitatingNetwork +from flow.controllers.car_following_models import IDMController +from flow.controllers.velocity_controllers import FollowerStopper +from flow.core.rewards import * + +""" Class agnostic helper functions """ + +def sample_trajectory_singleagent(env, controllers, action_network, max_trajectory_length, use_expert, v_des, max_decel): + """ + Samples a single trajectory from a singleagent environment. + Parameters + __________ + env: gym.Env + environment + controllers: dict + Dictionary of 2-tuples (Imitating_Controller, Expert_Controller), with keys of vehicle_ids + action_network: ImitatingNetwork + ImitatingNetwork class containing neural net for action prediction + max_trajectory_length: int + maximum steps in a trajectory + use_expert: bool + if True, trajectory is collected using expert policy (for behavioral cloning) + v_des: float + v_des parameter for follower-stopper + max_decel: float + maximum deceleration of environment. Used to determine dummy values to put as labels when environment has less vehicles than the maximum amount. + Returns + _______ + dict + Dictionary of numpy arrays, where matching indeces of each array given (state, action, expert_action, reward, next_state, terminal) tuples + """ + + # reset and initialize arrays to store trajectory + observation = env.reset() + + observations, actions, expert_actions, rewards, next_observations, terminals = [], [], [], [], [], [] + traj_length = 0 + + while True: + + # update vehicle ids: if multidimensional action space, check if env has a sorted_rl_ids method + if env.action_space.shape[0] > 1: + try: + vehicle_ids = env.get_sorted_rl_ids() + except: + vehicle_ids = env.k.vehicle.get_rl_ids() + else: + vehicle_ids = env.k.vehicle.get_rl_ids() + + # no RL actions if no RL vehicles + if len(vehicle_ids) == 0: + observation, reward, done, _ = env.step(None) + if done: + break + continue + + # init controllers if any of vehicle ids are new + # there could be multiple vehicle ids if they all share one state but have different actions + car_following_params = SumoCarFollowingParams() + + for vehicle_id in vehicle_ids: + if vehicle_id not in set(controllers.keys()): + expert = FollowerStopper(vehicle_id, car_following_params=car_following_params, v_des=v_des) + imitator = ImitatingController(vehicle_id, action_network, False, car_following_params=car_following_params) + controllers[vehicle_id] = (imitator, expert) + + + # get the actions given by controllers + action_dim = env.action_space.shape[0] + rl_actions = [] + actions_expert = [] + + invalid_expert_action = False + for i in range(action_dim): + # if max number of RL vehicles is not reached, insert dummy values + if i >= len(vehicle_ids): + # dummy value is -2 * max_decel + ignore_accel = -2 * max_decel + rl_actions.append(ignore_accel) + actions_expert.append(ignore_accel) + else: + imitator = controllers[vehicle_ids[i]][0] + expert = controllers[vehicle_ids[i]][1] + + expert_action = expert.get_action(env) + # catch invalid expert actions + if (expert_action is None or math.isnan(expert_action)): + invalid_expert_action = True + + actions_expert.append(expert_action) + + if use_expert: + if traj_length == 0 and i == 0: + print("Controller collecting trajectory: ", type(expert)) + rl_actions.append(expert_action) + else: + if traj_length == 0 and i == 0: + print("Controller collecting trajectory: ", type(imitator)) + imitator_action = imitator.get_action(env) + rl_actions.append(imitator_action) + + + # invalid action in rl_actions; default to Sumo, ignore sample + if None in rl_actions or np.nan in rl_actions: + observation, reward, done, _ = env.step(None) + terminate_rollout = traj_length == max_trajectory_length or done + if terminate_rollout: + break + continue + # invalid expert action (if rl_actions is expert actions then this would have been caught above)) + if not use_expert and invalid_expert_action: + # throw away sample, but step according to rl_actions + observation, reward, done, _ = env.step(rl_actions) + terminate_rollout = traj_length == max_trajectory_length or done + if terminate_rollout: + break + continue + + # update collected data + observations.append(observation) + actions.append(rl_actions) + expert_actions.append(actions_expert) + observation, reward, done, _ = env.step(rl_actions) + + traj_length += 1 + next_observations.append(observation) + rewards.append(reward) + terminate_rollout = (traj_length == max_trajectory_length) or done + terminals.append(terminate_rollout) + + if terminate_rollout: + break + + return traj_dict(observations, actions, expert_actions, rewards, next_observations, terminals), traj_length + + +def sample_trajectory_multiagent(env, controllers, action_network, max_trajectory_length, use_expert, v_des): + """ + Samples a single trajectory from a multiagent environment. + + Parameters + __________ + env: gym.Env + environment + controllers: dict + Dictionary of 2-tuples (Imitating_Controller, Expert_Controller), with keys of vehicle_ids + action_network: ImitatingNetwork + ImitatingNetwork class containing neural net for action prediction + max_trajectory_length: int + maximum steps in a trajectory + use_expert: bool + if True, trajectory is collected using expert policy (for behavioral cloning) + v_des: float + v_des parameter for follower-stopper + Returns + _______ + dict + Dictionary of numpy arrays, where matching indeces of each array given (state, action, expert_action, reward, next_state, terminal) tuples + """ + + observation_dict = env.reset() + + observations, actions, expert_actions, rewards, next_observations, terminals = [], [], [], [], [], [] + traj_length = 0 + + while True: + + vehicle_ids = list(observation_dict.keys()) + # add nothing to replay buffer if no vehicles + if len(vehicle_ids) == 0: + observation_dict, reward, done, _ = env.step(None) + if done['__all__']: + break + continue + + # actions taken by collecting controller + rl_actions = dict() + invalid_expert_action = False + # actions taken by expert + expert_action_dict= dict() + + for i in range(len(vehicle_ids)): + vehicle_id = vehicle_ids[i] + + if vehicle_id not in set(controllers.keys()): + car_following_params = SumoCarFollowingParams() + + expert = FollowerStopper(vehicle_id, car_following_params=car_following_params, v_des=v_des) + imitator = ImitatingController(vehicle_id, action_network, True, car_following_params=car_following_params) + controllers[vehicle_id] = (imitator, expert) + + expert_controller = controllers[vehicle_id][1] + if use_expert: + controller = expert_controller + else: + controller = controllers[vehicle_id][0] + + if traj_length == 0 and i == 0: + print("Controller collecting trajectory: ", controller) + + action = controller.get_action(env) + + + # action should be a scalar acceleration + if type(action) == np.ndarray: + action = action.flatten()[0] + + expert_action = expert_controller.get_action(env) + expert_action_dict[vehicle_id] = expert_action + + if (expert_action is None or math.isnan(expert_action)): + invalid_expert_action = True + + rl_actions[vehicle_id] = action + + if invalid_expert_action: + # invalid action in rl_actions, so default control to SUMO + observation_dict, reward_dict, done_dict, _ = env.step(None) + terminate_rollout = traj_length == max_trajectory_length or done_dict['__all__'] + if terminate_rollout: + break + continue + + for vehicle_id in vehicle_ids: + observations.append(observation_dict[vehicle_id]) + actions.append(rl_actions[vehicle_id]) + expert_actions.append(expert_action_dict[vehicle_id]) + + observation_dict, reward_dict, done_dict, _ = env.step(rl_actions) + terminate_rollout = done_dict['__all__'] or (traj_length == max_trajectory_length) + + for vehicle_id in vehicle_ids: + # default next observation to nans + next_observations.append(observation_dict.get(vehicle_id, np.empty((env.observation_space.shape[0], )))) + rewards.append(reward_dict.get(vehicle_id, 0)) + terminals.append(terminate_rollout) + + traj_length += 1 + + if terminate_rollout: + break + + return traj_dict(observations, actions, expert_actions, rewards, next_observations, terminals), traj_length + + +def sample_trajectories(env, controllers, action_network, min_batch_timesteps, max_trajectory_length, multiagent, use_expert, v_des=15, max_decel=4.5): + """ + Samples trajectories from environment. + + Parameters + __________ + env: gym.Env + environment + controllers: dict + Dictionary of 2-tuples (Imitating_Controller, Expert_Controller), with keys of vehicle_ids + action_network: ImitatingNetwork + ImitatingNetwork class containing neural net for action prediction + min_batch_timesteps: int + minimum number of env transitions to collect + max_trajectory_length: int + maximum steps in a trajectory + multiagent: bool + if True, env is a multiagent env + use_expert: bool + if True, trajectory is collected using expert policy (for behavioral cloning) + v_des: float + v_des parameter for follower-stopper + max_decel: float + maximum deceleration of environment. Used to determine dummy values to put as labels when environment has less vehicles than the maximum amount. + + Returns + _______ + dict, int + Dictionary of trajectory numpy arrays, where matching indeces of each array given (state, action, expert_action, reward, next_state, terminal) tuples + Total number of env transitions seen over trajectories + """ + total_envsteps = 0 + trajectories = [] + + while total_envsteps < min_batch_timesteps: + + if multiagent: + trajectory, traj_length = sample_trajectory_multiagent(env, controllers, action_network, max_trajectory_length, use_expert, v_des) + else: + trajectory, traj_length = sample_trajectory_singleagent(env, controllers, action_network, max_trajectory_length, use_expert, v_des, max_decel) + + trajectories.append(trajectory) + + total_envsteps += traj_length + + return trajectories, total_envsteps + +def sample_n_trajectories(env, controllers, action_network, n, max_trajectory_length, multiagent, use_expert, v_des=15, max_decel=4.5): + """ + Samples n trajectories from environment. + + Parameters + __________ + env: gym.Env + environment + controllers: dict + Dictionary of 2-tuples (Imitating_Controller, Expert_Controller), with keys of vehicle_ids + action_network: ImitatingNetwork + ImitatingNetwork class containing neural net for action prediction + n: int + number of trajectories to collect + max_trajectory_length: int + maximum steps in a trajectory + multiagent: bool + if True, env is a multiagent env + use_expert: bool + if True, trajectory is collected using expert policy (for behavioral cloning) + v_des: float + v_des parameter for follower-stopper + max_decel: float + maximum deceleration of environment. Used to determine dummy values to put as labels when environment has less vehicles than the maximum amount. + + Returns + _______ + dict + Dictionary of trajectory numpy arrays, where matching indeces of each array given (state, action, expert_action, reward, next_state, terminal) tuples + """ + + trajectories = [] + for _ in range(n): + + if multiagent: + trajectory, length = sample_trajectory_multiagent(env, controllers, action_network, max_trajectory_length, use_expert, v_des) + else: + trajectory, length = sample_trajectory_singleagent(env, controllers, action_network, max_trajectory_length, use_expert, v_des, max_decel) + + trajectories.append((trajectory, length)) + + return trajectories + + +def traj_dict(observations, actions, expert_actions, rewards, next_observations, terminals): + """ + Collects observation, action, expert_action, rewards, next observation, terminal lists (collected over a rollout) into a single rollout dictionary. + Parameters + __________ + observations: list + list of observations; ith entry is ith observation + actions: list + list of actions; ith entry is action taken at ith timestep + rewards: list + list of rewards; ith entry is reward received at ith timestep + next_observations: list + list of next observations; ith entry is the observation transitioned to due to state and action at ith timestep + terminals: list + list of booleans indicating if rollout ended at that timestep + + Returns + _______ + dict + dictionary containing above lists in numpy array form. + """ + return {"observations" : np.array(observations), + "actions" : np.array(actions), + "expert_actions": np.array(expert_actions), + "rewards" : np.array(rewards), + "next_observations": np.array(next_observations), + "terminals": np.array(terminals)} diff --git a/flow/algorithms/imitation_learning/utils_tensorflow.py b/flow/algorithms/imitation_learning/utils_tensorflow.py new file mode 100644 index 000000000..cbbfa633d --- /dev/null +++ b/flow/algorithms/imitation_learning/utils_tensorflow.py @@ -0,0 +1,49 @@ +import numpy as np +import tensorflow as tf + + +""" Class agnostic helper functions related to tensorflow""" + +def build_neural_net(input_placeholder, output_size, scope, n_layers, size, activation=tf.tanh, output_activation=None): + """ + Builds a feedfoward neural network for action prediction + Parameters + __________ + input_placeholder: tensor + placeholder variable for the state (batch_size, input_size) + scope: str + variable scope of the network + n_layers: int + number of hidden layers + size: int + dimension of each hidden layer + activation: str + activation function of each hidden layer + output_size: int + size of the output layer + output_activation: str + activation function of the output layer + + Returns + _______ + output_placeholder: tensor + the result of pass through Neural Network + """ + output_placeholder = input_placeholder + with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): + for _ in range(n_layers): + output_placeholder = tf.layers.dense(output_placeholder, size, activation=activation) + output_placeholder = tf.layers.dense(output_placeholder, output_size, activation=output_activation,name='Output_Layer') + return output_placeholder + +def create_tf_session(): + """ + Creates a tf session + Returns + _______ + tf.Session + new tensorflow session + """ + config = tf.compat.v1.ConfigProto(device_count={'GPU': 0}) + sess = tf.compat.v1.Session(config=config) + return sess diff --git a/flow/controllers/base_controller.py b/flow/controllers/base_controller.py index cef92d573..a657bf87c 100755 --- a/flow/controllers/base_controller.py +++ b/flow/controllers/base_controller.py @@ -34,8 +34,12 @@ class BaseController(metaclass=ABCMeta): specified to in this model are as desired. delay : int delay in applying the action (time) - fail_safe : str - Should be either "instantaneous" or "safe_velocity" + fail_safe : list of str or str + List of failsafes which can be "instantaneous", "safe_velocity", + "feasible_accel", or "obey_speed_limit". The order of applying the + falsafes will be based on the order in the list. + display_warnings : bool + Flag for toggling on/off printing failsafe warnings to screen. noise : double variance of the gaussian from which to sample a noisy acceleration """ @@ -45,6 +49,7 @@ def __init__(self, car_following_params, delay=0, fail_safe=None, + display_warnings=False, noise=0): """Instantiate the base class for acceleration behavior.""" self.veh_id = veh_id @@ -56,7 +61,28 @@ def __init__(self, self.delay = delay # longitudinal failsafe used by the vehicle - self.fail_safe = fail_safe + if isinstance(fail_safe, str): + failsafe_list = [fail_safe] + elif isinstance(fail_safe, list) or fail_safe is None: + failsafe_list = fail_safe + else: + raise ValueError("fail_safe should be string or list of strings. Setting fail_safe to None\n") + + failsafe_map = { + 'instantaneous': self.get_safe_action_instantaneous, + 'safe_velocity': self.get_safe_velocity_action, + 'feasible_accel': lambda _, accel: self.get_feasible_action(accel), + 'obey_speed_limit': self.get_obey_speed_limit_action + } + self.failsafes = [] + if failsafe_list: + for check in failsafe_list: + if check in failsafe_map: + self.failsafes.append(failsafe_map.get(check)) + else: + raise ValueError('Skipping {}, as it is not a valid failsafe.'.format(check)) + + self.display_warnings = display_warnings self.max_accel = car_following_params.controller_params['accel'] # max deaccel should always be a positive @@ -77,8 +103,8 @@ def get_action(self, env): time step. This method also augments the controller with the desired level of - stochastic noise, and utlizes the "instantaneous" or "safe_velocity" - failsafes if requested. + stochastic noise, and utlizes the "instantaneous", "safe_velocity", + "feasible_accel", and/or "obey_speed_limit" failsafes if requested. Parameters ---------- @@ -90,6 +116,12 @@ def get_action(self, env): float the modified form of the acceleration """ + # clear the current stored accels of this vehicle to None + env.k.vehicle.update_accel(self.veh_id, None, noise=False, failsafe=False) + env.k.vehicle.update_accel(self.veh_id, None, noise=False, failsafe=True) + env.k.vehicle.update_accel(self.veh_id, None, noise=True, failsafe=False) + env.k.vehicle.update_accel(self.veh_id, None, noise=True, failsafe=True) + # this is to avoid abrupt decelerations when a vehicle has just entered # a network and it's data is still not subscribed if len(env.k.vehicle.get_edge(self.veh_id)) == 0: @@ -107,16 +139,26 @@ def get_action(self, env): if accel is None: return None + # store the acceleration without noise to each vehicle + # run fail safe if requested + env.k.vehicle.update_accel(self.veh_id, accel, noise=False, failsafe=False) + accel_no_noise_with_failsafe = accel + + for failsafe in self.failsafes: + accel_no_noise_with_failsafe = failsafe(env, accel_no_noise_with_failsafe) + + env.k.vehicle.update_accel(self.veh_id, accel_no_noise_with_failsafe, noise=False, failsafe=True) + # add noise to the accelerations, if requested if self.accel_noise > 0: - accel += np.random.normal(0, self.accel_noise) + accel += np.sqrt(env.sim_step) * np.random.normal(0, self.accel_noise) + env.k.vehicle.update_accel(self.veh_id, accel, noise=True, failsafe=False) - # run the failsafes, if requested - if self.fail_safe == 'instantaneous': - accel = self.get_safe_action_instantaneous(env, accel) - elif self.fail_safe == 'safe_velocity': - accel = self.get_safe_velocity_action(env, accel) + # run the fail-safes, if requested + for failsafe in self.failsafes: + accel = failsafe(env, accel) + env.k.vehicle.update_accel(self.veh_id, accel, noise=True, failsafe=True) return accel def get_safe_action_instantaneous(self, env, action): @@ -162,6 +204,13 @@ def get_safe_action_instantaneous(self, env, action): # if the vehicle will crash into the vehicle ahead of it in the # next time step (assuming the vehicle ahead of it is not # moving), then stop immediately + if self.display_warnings: + print( + "=====================================\n" + "Vehicle {} is about to crash. Instantaneous acceleration " + "clipping applied.\n" + "=====================================".format(self.veh_id)) + return -this_vel / sim_step else: # if the vehicle is not in danger of crashing, continue with @@ -223,8 +272,8 @@ def safe_velocity(self, env): Returns ------- float - maximum safe velocity given a maximum deceleration and delay in - performing the breaking action + maximum safe velocity given a maximum deceleration, delay in + performing the breaking action, and speed limit """ lead_id = env.k.vehicle.get_leader(self.veh_id) lead_vel = env.k.vehicle.get_speed(lead_id) @@ -235,4 +284,97 @@ def safe_velocity(self, env): v_safe = 2 * h / env.sim_step + dv - this_vel * (2 * self.delay) + # check for speed limit FIXME: this is not called + # this_edge = env.k.vehicle.get_edge(self.veh_id) + # edge_speed_limit = env.k.network.speed_limit(this_edge) + + if this_vel > v_safe: + if self.display_warnings: + print( + "=====================================\n" + "Speed of vehicle {} is greater than safe speed. Safe velocity " + "clipping applied.\n" + "=====================================".format(self.veh_id)) + return v_safe + + def get_obey_speed_limit_action(self, env, action): + """Perform the "obey_speed_limit" failsafe action. + + Checks if the computed acceleration would put us above edge speed limit. + If it would, output the acceleration that would put at the speed limit + velocity. + + Parameters + ---------- + env : flow.envs.Env + current environment, which contains information of the state of the + network at the current time step + action : float + requested acceleration action + + Returns + ------- + float + the requested action clipped by the speed limit + """ + # check for speed limit + this_edge = env.k.vehicle.get_edge(self.veh_id) + edge_speed_limit = env.k.network.speed_limit(this_edge) + + this_vel = env.k.vehicle.get_speed(self.veh_id) + sim_step = env.sim_step + + if this_vel + action * sim_step > edge_speed_limit: + if edge_speed_limit > 0: + if self.display_warnings: + print( + "=====================================\n" + "Speed of vehicle {} is greater than speed limit. Obey " + "speed limit clipping applied.\n" + "=====================================".format(self.veh_id)) + return (edge_speed_limit - this_vel) / sim_step + else: + return -this_vel / sim_step + else: + return action + + def get_feasible_action(self, action): + """Perform the "feasible_accel" failsafe action. + + Checks if the computed acceleration would put us above maximum + acceleration or deceleration. If it would, output the acceleration + equal to maximum acceleration or deceleration. + + Parameters + ---------- + action : float + requested acceleration action + + Returns + ------- + float + the requested action clipped by the feasible acceleration or + deceleration. + """ + if action > self.max_accel: + action = self.max_accel + + if self.display_warnings: + print( + "=====================================\n" + "Acceleration of vehicle {} is greater than the max " + "acceleration. Feasible acceleration clipping applied.\n" + "=====================================".format(self.veh_id)) + + if action < -self.max_deaccel: + action = -self.max_deaccel + + if self.display_warnings: + print( + "=====================================\n" + "Deceleration of vehicle {} is greater than the max " + "deceleration. Feasible acceleration clipping applied.\n" + "=====================================".format(self.veh_id)) + + return action diff --git a/flow/controllers/car_following_models.py b/flow/controllers/car_following_models.py index 42c9b2a9b..2840e291e 100755 --- a/flow/controllers/car_following_models.py +++ b/flow/controllers/car_following_models.py @@ -56,7 +56,8 @@ def __init__(self, v_des=8, time_delay=0.0, noise=0, - fail_safe=None): + fail_safe=None, + display_warnings=False): """Instantiate a CFM controller.""" BaseController.__init__( self, @@ -64,7 +65,9 @@ def __init__(self, car_following_params, delay=time_delay, fail_safe=fail_safe, - noise=noise) + noise=noise, + display_warnings=display_warnings, + ) self.veh_id = veh_id self.k_d = k_d @@ -132,7 +135,8 @@ def __init__(self, v_des=8, time_delay=0.0, noise=0, - fail_safe=None): + fail_safe=None, + display_warnings=False): """Instantiate a Bilateral car-following model controller.""" BaseController.__init__( self, @@ -140,7 +144,9 @@ def __init__(self, car_following_params, delay=time_delay, fail_safe=fail_safe, - noise=noise) + noise=noise, + display_warnings=display_warnings, + ) self.veh_id = veh_id self.k_d = k_d @@ -212,7 +218,8 @@ def __init__(self, a=0, time_delay=0.0, noise=0, - fail_safe=None): + fail_safe=None, + display_warnings=False): """Instantiate a Linear Adaptive Cruise controller.""" BaseController.__init__( self, @@ -220,7 +227,9 @@ def __init__(self, car_following_params, delay=time_delay, fail_safe=fail_safe, - noise=noise) + noise=noise, + display_warnings=display_warnings, + ) self.veh_id = veh_id self.k_1 = k_1 @@ -289,7 +298,8 @@ def __init__(self, v_max=30, time_delay=0, noise=0, - fail_safe=None): + fail_safe=None, + display_warnings=False): """Instantiate an Optimal Vehicle Model controller.""" BaseController.__init__( self, @@ -297,7 +307,9 @@ def __init__(self, car_following_params, delay=time_delay, fail_safe=fail_safe, - noise=noise) + noise=noise, + display_warnings=display_warnings, + ) self.veh_id = veh_id self.v_max = v_max self.alpha = alpha @@ -364,7 +376,8 @@ def __init__(self, h_st=5, time_delay=0.0, noise=0, - fail_safe=None): + fail_safe=None, + display_warnings=False): """Instantiate a Linear OVM controller.""" BaseController.__init__( self, @@ -372,7 +385,9 @@ def __init__(self, car_following_params, delay=time_delay, fail_safe=fail_safe, - noise=noise) + noise=noise, + display_warnings=display_warnings, + ) self.veh_id = veh_id # 4.8*1.85 for case I, 3.8*1.85 for case II, per Nakayama self.v_max = v_max @@ -445,6 +460,7 @@ def __init__(self, time_delay=0.0, noise=0, fail_safe=None, + display_warnings=False, car_following_params=None): """Instantiate an IDM controller.""" BaseController.__init__( @@ -453,7 +469,9 @@ def __init__(self, car_following_params, delay=time_delay, fail_safe=fail_safe, - noise=noise) + noise=noise, + display_warnings=display_warnings, + ) self.v0 = v0 self.T = T self.a = a @@ -546,7 +564,8 @@ def __init__(self, tau=1, delay=0, noise=0, - fail_safe=None): + fail_safe=None, + display_warnings=False): """Instantiate a Gipps' controller.""" BaseController.__init__( self, @@ -554,8 +573,9 @@ def __init__(self, car_following_params, delay=delay, fail_safe=fail_safe, - noise=noise - ) + noise=noise, + display_warnings=display_warnings, + ) self.v_desired = v0 self.acc = acc @@ -627,7 +647,8 @@ def __init__(self, want_max_accel=False, time_delay=0, noise=0, - fail_safe=None): + fail_safe=None, + display_warnings=False): """Instantiate an Bando controller.""" BaseController.__init__( self, @@ -636,6 +657,7 @@ def __init__(self, delay=time_delay, fail_safe=fail_safe, noise=noise, + display_warnings=display_warnings, ) self.veh_id = veh_id self.v_max = v_max @@ -647,6 +669,7 @@ def __init__(self, def get_accel(self, env): """See parent class.""" + # without generating waves. lead_id = env.k.vehicle.get_leader(self.veh_id) if not lead_id: # no car ahead if self.want_max_accel: diff --git a/flow/controllers/velocity_controllers.py b/flow/controllers/velocity_controllers.py index 2e4b7c22a..4d8bfec79 100644 --- a/flow/controllers/velocity_controllers.py +++ b/flow/controllers/velocity_controllers.py @@ -26,10 +26,12 @@ def __init__(self, veh_id, car_following_params, v_des=15, - danger_edges=None): + danger_edges=None, + control_length=None, + no_control_edges=None): """Instantiate FollowerStopper.""" BaseController.__init__( - self, veh_id, car_following_params, delay=1.0, + self, veh_id, car_following_params, delay=0.0, fail_safe='safe_velocity') # desired speed of the vehicle @@ -45,7 +47,10 @@ def __init__(self, self.d_1 = 1.5 self.d_2 = 1.0 self.d_3 = 0.5 + self.danger_edges = danger_edges if danger_edges else {} + self.control_length = control_length + self.no_control_edges = no_control_edges def find_intersection_dist(self, env): """Find distance to intersection. @@ -74,46 +79,53 @@ def find_intersection_dist(self, env): def get_accel(self, env): """See parent class.""" - lead_id = env.k.vehicle.get_leader(self.veh_id) - this_vel = env.k.vehicle.get_speed(self.veh_id) - lead_vel = env.k.vehicle.get_speed(lead_id) - - if self.v_des is None: + if env.time_counter < env.env_params.warmup_steps * env.env_params.sims_per_step: return None - - if lead_id is None: - v_cmd = self.v_des else: - dx = env.k.vehicle.get_headway(self.veh_id) - dv_minus = min(lead_vel - this_vel, 0) - - dx_1 = self.dx_1_0 + 1 / (2 * self.d_1) * dv_minus**2 - dx_2 = self.dx_2_0 + 1 / (2 * self.d_2) * dv_minus**2 - dx_3 = self.dx_3_0 + 1 / (2 * self.d_3) * dv_minus**2 - v = min(max(lead_vel, 0), self.v_des) - # compute the desired velocity - if dx <= dx_1: - v_cmd = 0 - elif dx <= dx_2: - v_cmd = v * (dx - dx_1) / (dx_2 - dx_1) - elif dx <= dx_3: - v_cmd = v + (self.v_des - this_vel) * (dx - dx_2) \ - / (dx_3 - dx_2) - else: - v_cmd = self.v_des + lead_id = env.k.vehicle.get_leader(self.veh_id) + this_vel = env.k.vehicle.get_speed(self.veh_id) + lead_vel = env.k.vehicle.get_speed(lead_id) - edge = env.k.vehicle.get_edge(self.veh_id) - - if edge == "": - return None + if self.v_des is None: + return None - if self.find_intersection_dist(env) <= 10 and \ - env.k.vehicle.get_edge(self.veh_id) in self.danger_edges or \ - env.k.vehicle.get_edge(self.veh_id)[0] == ":": - return None - else: - # compute the acceleration from the desired velocity - return (v_cmd - this_vel) / env.sim_step + if lead_id is None: + v_cmd = self.v_des + else: + dx = env.k.vehicle.get_headway(self.veh_id) + dv_minus = min(lead_vel - this_vel, 0) + + dx_1 = self.dx_1_0 + 1 / (2 * self.d_1) * dv_minus**2 + dx_2 = self.dx_2_0 + 1 / (2 * self.d_2) * dv_minus**2 + dx_3 = self.dx_3_0 + 1 / (2 * self.d_3) * dv_minus**2 + v = min(max(lead_vel, 0), self.v_des) + # compute the desired velocity + if dx <= dx_1: + v_cmd = 0 + elif dx <= dx_2: + v_cmd = v * (dx - dx_1) / (dx_2 - dx_1) + elif dx <= dx_3: + v_cmd = v + (self.v_des - this_vel) * (dx - dx_2) \ + / (dx_3 - dx_2) + else: + v_cmd = self.v_des + + edge = env.k.vehicle.get_edge(self.veh_id) + + if edge == "": + return None + + if (self.find_intersection_dist(env) <= 10 and + env.k.vehicle.get_edge(self.veh_id) in self.danger_edges) or \ + env.k.vehicle.get_edge(self.veh_id)[0] == ":" \ + or (self.control_length and (env.k.vehicle.get_x_by_id(self.veh_id) < self.control_length[0] + or env.k.vehicle.get_x_by_id(self.veh_id) > self.control_length[1])) \ + or (self.no_control_edges is not None and len(self.no_control_edges) > 0 + and edge in self.no_control_edges): + return None + else: + # compute the acceleration from the desired velocity + return np.clip((v_cmd - this_vel) / env.sim_step, -np.abs(self.max_deaccel), self.max_accel) class NonLocalFollowerStopper(FollowerStopper): @@ -154,11 +166,6 @@ def get_accel(self, env): if edge == "": return None - - if self.find_intersection_dist(env) <= 10 and \ - env.k.vehicle.get_edge(self.veh_id) in self.danger_edges or \ - env.k.vehicle.get_edge(self.veh_id)[0] == ":": - return None else: # compute the acceleration from the desired velocity return (v_cmd - this_vel) / env.sim_step @@ -184,7 +191,7 @@ class PISaturation(BaseController): def __init__(self, veh_id, car_following_params): """Instantiate PISaturation.""" - BaseController.__init__(self, veh_id, car_following_params, delay=1.0) + BaseController.__init__(self, veh_id, car_following_params, delay=0.0) # maximum achievable acceleration by the vehicle self.max_accel = car_following_params.controller_params['accel'] diff --git a/flow/core/experiment.py b/flow/core/experiment.py index 69a78cb0e..38599b002 100755 --- a/flow/core/experiment.py +++ b/flow/core/experiment.py @@ -1,11 +1,19 @@ """Contains an experiment class for running simulations.""" -from flow.core.util import emission_to_csv from flow.utils.registry import make_create_env -import datetime +from flow.data_pipeline.data_pipeline import upload_to_s3 +from flow.data_pipeline.data_pipeline import get_configuration +from flow.data_pipeline.data_pipeline import generate_trajectory_table +from flow.data_pipeline.data_pipeline import write_dict_to_csv +from flow.data_pipeline.leaderboard_utils import network_name_translate +from flow.visualize.time_space_diagram import tsd_main +from collections import defaultdict +from datetime import timezone +from datetime import datetime import logging import time -import os import numpy as np +import uuid +import os class Experiment: @@ -81,11 +89,17 @@ def __init__(self, flow_params, custom_callables=None): self.env = create_env() logging.info(" Starting experiment {} at {}".format( - self.env.network.name, str(datetime.datetime.utcnow()))) + self.env.network.name, str(datetime.utcnow()))) logging.info("Initializing environment.") - def run(self, num_runs, rl_actions=None, convert_to_csv=False): + def run(self, + num_runs, + rl_actions=None, + convert_to_csv=False, + to_aws=None, + only_query="", + is_baseline=False): """Run the given network for a set number of runs. Parameters @@ -98,6 +112,16 @@ def run(self, num_runs, rl_actions=None, convert_to_csv=False): convert_to_csv : bool Specifies whether to convert the emission file created by sumo into a csv file + to_aws: str + Specifies the S3 partition you want to store the output file, + will be used to later for query. If NONE, won't upload output + to S3. + only_query: str + Specifies which queries should be automatically run when the + simulation data gets uploaded to S3. If an empty str is passed in, + then it implies no queries should be run on this. + is_baseline: bool + Specifies whether this is a baseline run. Returns ------- @@ -137,6 +161,41 @@ def rl_actions(*_): t = time.time() times = [] + if convert_to_csv and self.env.simulator == "traci": + # data pipeline + source_id = 'flow_{}'.format(uuid.uuid4().hex) + metadata = defaultdict(lambda: []) + + # collect current time + cur_datetime = datetime.now(timezone.utc) + cur_date = cur_datetime.date().isoformat() + cur_time = cur_datetime.time().isoformat() + + if to_aws: + # collecting information for metadata table + metadata['source_id'].append(source_id) + metadata['submission_time'].append(cur_time) + metadata['network'].append( + network_name_translate(self.env.network.name.split('_20')[0])) + metadata['is_baseline'].append(str(is_baseline)) + name, strategy = get_configuration() + metadata['submitter_name'].append(name) + metadata['strategy'].append(strategy) + + # emission-specific parameters + dir_path = self.env.sim_params.emission_path + trajectory_table_path = os.path.join( + dir_path, '{}.csv'.format(source_id)) + metadata_table_path = os.path.join( + dir_path, '{}_METADATA.csv'.format(source_id)) + else: + source_id = None + trajectory_table_path = None + metadata_table_path = None + metadata = None + cur_date = None + + emission_files = [] for i in range(num_runs): ret = 0 vel = [] @@ -157,7 +216,7 @@ def rl_actions(*_): for (key, lambda_func) in self.custom_callables.items(): custom_vals[key].append(lambda_func(self.env)) - if done: + if type(done) is dict and done['__all__'] or done is True: break # Store the information from the run in info_dict. @@ -170,6 +229,11 @@ def rl_actions(*_): print("Round {0}, return: {1}".format(i, ret)) + # Save emission data at the end of every rollout. This is skipped + # by the internal method if no emission path was specified. + if self.env.simulator == "traci": + emission_files.append(self.env.k.simulation.save_emission(run_id=i)) + # Print the averages/std for all variables in the info_dict. for key in info_dict.keys(): print("Average, std {}: {}, {}".format( @@ -179,20 +243,40 @@ def rl_actions(*_): print("steps/second:", np.mean(times)) self.env.terminate() - if convert_to_csv and self.env.simulator == "traci": - # wait a short period of time to ensure the xml file is readable - time.sleep(0.1) - - # collect the location of the emission file - dir_path = self.env.sim_params.emission_path - emission_filename = \ - "{0}-emission.xml".format(self.env.network.name) - emission_path = os.path.join(dir_path, emission_filename) - - # convert the emission file into a csv - emission_to_csv(emission_path) - - # Delete the .xml version of the emission file. - os.remove(emission_path) + if to_aws: + generate_trajectory_table(emission_files, trajectory_table_path, source_id) + write_dict_to_csv(metadata_table_path, metadata, True) + tsd_main( + trajectory_table_path, + { + 'network': self.env.network.__class__, + 'env': self.env.env_params, + 'sim': self.env.sim_params + }, + min_speed=0, + max_speed=10 + ) + upload_to_s3( + 'circles.data.pipeline', + 'metadata_table/date={0}/partition_name={1}_METADATA/' + '{1}_METADATA.csv'.format(cur_date, source_id), + metadata_table_path + ) + upload_to_s3( + 'circles.data.pipeline', + 'fact_vehicle_trace/date={0}/partition_name={1}/' + '{1}.csv'.format(cur_date, source_id), + trajectory_table_path, + {'network': metadata['network'][0], + 'is_baseline': metadata['is_baseline'][0]} + ) + upload_to_s3( + 'circles.data.pipeline', + 'time_space_diagram/date={0}/partition_name={1}/' + '{1}.png'.format(cur_date, source_id), + trajectory_table_path.replace('csv', 'png') + ) + os.remove(trajectory_table_path) + os.remove(metadata_table_path) return info_dict diff --git a/flow/core/kernel/simulation/traci.py b/flow/core/kernel/simulation/traci.py index 8d51b8e25..79fc12eaa 100644 --- a/flow/core/kernel/simulation/traci.py +++ b/flow/core/kernel/simulation/traci.py @@ -11,7 +11,7 @@ import logging import subprocess import signal - +import csv # Number of retries on restarting SUMO before giving up RETRIES_ON_ERROR = 10 @@ -21,6 +21,32 @@ class TraCISimulation(KernelSimulation): """Sumo simulation kernel. Extends flow.core.kernel.simulation.KernelSimulation + + Attributes + ---------- + sumo_proc : subprocess.Popen + contains the subprocess.Popen instance used to start traci + sim_step : float + seconds per simulation step + emission_path : str or None + Path to the folder in which to create the emissions output. Emissions + output is not generated if this value is not specified + time : float + used to internally keep track of the simulation time + stored_data : dict >> + a dict object used to store additional data if an emission file is + provided. The first key corresponds to the name of the vehicle, the + second corresponds to the time the sample was issued, and the final + keys represent the additional data stored at every given time for every + vehicle, and consists of the following keys: + + * acceleration (no noise): the accelerations issued to the vehicle, + excluding noise + * acceleration (requested): the requested acceleration by the vehicle, + including noise + * acceleration (actual): the actual acceleration by the vehicle, + collected by computing the difference between the speeds of the + vehicle and dividing it by the sim_step term """ def __init__(self, master_kernel): @@ -33,8 +59,12 @@ def __init__(self, master_kernel): sub-kernels) """ KernelSimulation.__init__(self, master_kernel) - # contains the subprocess.Popen instance used to start traci + self.sumo_proc = None + self.sim_step = None + self.emission_path = None + self.time = 0 + self.stored_data = dict() def pass_api(self, kernel_api): """See parent class. @@ -62,10 +92,61 @@ def simulation_step(self): def update(self, reset): """See parent class.""" - pass + if reset: + self.time = 0 + else: + self.time += self.sim_step + + # Collect the additional data to store in the emission file. + if self.emission_path is not None: + kv = self.master_kernel.vehicle + for veh_id in self.master_kernel.vehicle.get_ids(): + t = round(self.time, 2) + + # some miscellaneous pre-processing + position = kv.get_2d_position(veh_id) + + # Make sure dictionaries corresponding to the vehicle and + # time are available. + if veh_id not in self.stored_data.keys(): + self.stored_data[veh_id] = dict() + if t not in self.stored_data[veh_id].keys(): + self.stored_data[veh_id][t] = dict() + + # Add the speed, position, and lane data. + self.stored_data[veh_id][t].update({ + "speed": kv.get_speed(veh_id), + "lane_number": kv.get_lane(veh_id), + "edge_id": kv.get_edge(veh_id), + "relative_position": kv.get_position(veh_id), + "x": position[0], + "y": position[1], + "headway": kv.get_headway(veh_id), + "leader_id": kv.get_leader(veh_id), + "follower_id": kv.get_follower(veh_id), + "leader_rel_speed": + kv.get_speed(kv.get_leader(veh_id)) + - kv.get_speed(veh_id), + "target_accel_with_noise_with_failsafe": + kv.get_accel(veh_id, noise=True, failsafe=True), + "target_accel_no_noise_no_failsafe": + kv.get_accel(veh_id, noise=False, failsafe=False), + "target_accel_with_noise_no_failsafe": + kv.get_accel(veh_id, noise=True, failsafe=False), + "target_accel_no_noise_with_failsafe": + kv.get_accel(veh_id, noise=False, failsafe=True), + "realized_accel": + kv.get_realized_accel(veh_id), + "road_grade": kv.get_road_grade(veh_id), + "distance": kv.get_distance(veh_id), + }) def close(self): """See parent class.""" + # Save the emission data to a csv. + if self.emission_path is not None: + self.save_emission() + self.kernel_api.close() def check_collision(self): @@ -75,10 +156,24 @@ def check_collision(self): def start_simulation(self, network, sim_params): """Start a sumo simulation instance. - This method uses the configuration files created by the network class - to initialize a sumo instance. Also initializes a traci connection to - interface with sumo from Python. + This method performs the following operations: + + 1. It collect the simulation step size and the emission path + information. If an emission path is specifies, it ensures that the + path exists. + 2. It also uses the configuration files created by the network class to + initialize a sumo instance. + 3. Finally, It initializes a traci connection to interface with sumo + from Python and returns the connection. """ + # Save the simulation step size (for later use). + self.sim_step = sim_params.sim_step + + # Update the emission path term. + self.emission_path = sim_params.emission_path + if self.emission_path is not None: + ensure_dir(self.emission_path) + error = None for _ in range(RETRIES_ON_ERROR): try: @@ -96,6 +191,11 @@ def start_simulation(self, network, sim_params): "--step-length", str(sim_params.sim_step) ] + # disable all collisions and teleporting in the simulation. + if sim_params.disable_collisions: + sumo_call.extend(["--collision.mingap-factor", str(0), + "--collision.action", str("none")]) + # use a ballistic integration step (if request) if sim_params.use_ballistic: sumo_call.append("--step-method.ballistic") @@ -109,17 +209,6 @@ def start_simulation(self, network, sim_params): sumo_call.append("--lateral-resolution") sumo_call.append(str(sim_params.lateral_resolution)) - # add the emission path to the sumo command (if requested) - if sim_params.emission_path is not None: - ensure_dir(sim_params.emission_path) - emission_out = os.path.join( - sim_params.emission_path, - "{0}-emission.xml".format(network.name)) - sumo_call.append("--emission-output") - sumo_call.append(emission_out) - else: - emission_out = None - if sim_params.overtake_right: sumo_call.append("--lanechange.overtake-right") sumo_call.append("true") @@ -146,7 +235,7 @@ def start_simulation(self, network, sim_params): if sim_params.num_clients > 1: logging.info(" Num clients are" + str(sim_params.num_clients)) - logging.debug(" Emission file: " + str(emission_out)) + logging.debug(" Emission file: " + str(self.emission_path)) logging.debug(" Step length: " + str(sim_params.sim_step)) # Opening the I/O thread to SUMO @@ -180,3 +269,74 @@ def teardown_sumo(self): os.killpg(self.sumo_proc.pid, signal.SIGTERM) except Exception as e: print("Error during teardown: {}".format(e)) + + def save_emission(self, run_id=0): + """Save any collected emission data to a csv file. + + If no data was collected, nothing happens. Moreover, any internally + stored data by this class is cleared whenever data is stored. + + Parameters + ---------- + run_id : int + the rollout number, appended to the name of the emission file. Used + to store emission files from multiple rollouts run sequentially. + + Returns + ------- + emission_file_path: str + the relative path of the emission file + """ + # If there is no stored data, ignore this operation. This is to ensure + # that data isn't deleted if the operation is called twice. + if len(self.stored_data) == 0: + return + + # Get a csv name for the emission file. + name = "{}-{}_emission.csv".format( + self.master_kernel.network.network.name, run_id) + + # The name of all stored data-points (excluding id and time) + stored_ids = [ + "x", + "y", + "speed", + "headway", + "leader_id", + "follower_id", + "leader_rel_speed", + "target_accel_with_noise_with_failsafe", + "target_accel_no_noise_no_failsafe", + "target_accel_with_noise_no_failsafe", + "target_accel_no_noise_with_failsafe", + "realized_accel", + "road_grade", + "edge_id", + "lane_number", + "distance", + "relative_position", + ] + + # Update the stored data to push to the csv file. + final_data = {"time": [], "id": []} + final_data.update({key: [] for key in stored_ids}) + + for veh_id in self.stored_data.keys(): + for t in self.stored_data[veh_id].keys(): + final_data['time'].append(t) + final_data['id'].append(veh_id) + for key in stored_ids: + final_data[key].append(self.stored_data[veh_id][t][key]) + + emission_file_path = os.path.join(self.emission_path, name) + with open(emission_file_path, "w") as f: + print(emission_file_path, self.emission_path) + writer = csv.writer(f, delimiter=',') + writer.writerow(final_data.keys()) + writer.writerows(zip(*final_data.values())) + + # Clear all memory from the stored data. This is useful if this + # function is called in between resets. + self.stored_data.clear() + + return emission_file_path diff --git a/flow/core/kernel/vehicle/aimsun.py b/flow/core/kernel/vehicle/aimsun.py index ce0d026e5..16c94558a 100644 --- a/flow/core/kernel/vehicle/aimsun.py +++ b/flow/core/kernel/vehicle/aimsun.py @@ -65,6 +65,7 @@ def __init__(self, # number of vehicles to exit the network for every time-step self._num_arrived = [] self._arrived_ids = [] + self._arrived_rl_ids = [] # contains conversion from Flow-ID to Aimsun-ID self._id_aimsun2flow = {} @@ -174,11 +175,17 @@ def update(self, reset): added_vehicles = self.kernel_api.get_entered_ids() exited_vehicles = self.kernel_api.get_exited_ids() + # keep track of arrived rl vehicles + arrived_rl_ids = [] + # add the new vehicles if they should be tracked for aimsun_id in added_vehicles: veh_type = self.kernel_api.get_vehicle_type_name(aimsun_id) if veh_type in self.tracked_vehicle_types: self._add_departed(aimsun_id) + if aimsun_id in self.get_rl_ids(): + arrived_rl_ids.append(aimsun_id) + self._arrived_rl_ids.append(arrived_rl_ids) # remove the exited vehicles if they were tracked if not reset: @@ -639,6 +646,16 @@ def get_arrived_ids(self): """See parent class.""" raise NotImplementedError + def get_arrived_rl_ids(self, k=1): + """See parent class.""" + if len(self._arrived_rl_ids) > 0: + arrived = [] + for arr in self._arrived_rl_ids[-k:]: + arrived.extend(arr) + return arrived + else: + return 0 + def get_departed_ids(self): """See parent class.""" raise NotImplementedError diff --git a/flow/core/kernel/vehicle/base.py b/flow/core/kernel/vehicle/base.py index d97ade984..226528259 100644 --- a/flow/core/kernel/vehicle/base.py +++ b/flow/core/kernel/vehicle/base.py @@ -128,15 +128,22 @@ def remove(self, veh_id): pass @abstractmethod - def apply_acceleration(self, veh_id, acc): + def apply_acceleration(self, veh_id, acc, smooth_duration=0): """Apply the acceleration requested by a vehicle in the simulator. + In SUMO, this function applies setSpeed for smooth_duration=0, otherwise + the slowDown method applies acceleration smoothly over the smooth_duration + time (in seconds). For more information, see: + https://sumo.dlr.de/pydoc/traci._vehicle.html#VehicleDomain-slowDown + Parameters ---------- veh_id : str or list of str list of vehicle identifiers acc : float or array_like requested accelerations from the vehicles + smooth_duration : float + duration in seconds over which acceleration should be smoothly applied, default: 0 """ pass @@ -346,6 +353,23 @@ def get_fuel_consumption(self, veh_id, error=-1001): """ pass + @abstractmethod + def get_energy_model(self, veh_id, error=""): + """Return the energy model class object of the specified vehicle. + + Parameters + ---------- + veh_id : str or list of str + vehicle id, or list of vehicle ids + error : str + value that is returned if the vehicle is not found + + Returns + ------- + subclass of BaseEnergyModel + """ + pass + @abstractmethod def get_speed(self, veh_id, error=-1001): """Return the speed of the specified vehicle. @@ -741,3 +765,32 @@ def get_max_speed(self, veh_id, error): float """ pass + + ########################################################################### + # Methods for Datapipeline # + ########################################################################### + + @abstractmethod + def get_accel(self, veh_id): + """Return the acceleration of vehicle with veh_id.""" + pass + + @abstractmethod + def update_accel(self, veh_id, accel, noise=True, failsafe=True): + """Update stored acceleration of vehicle with veh_id.""" + pass + + @abstractmethod + def get_2d_position(self, veh_id, error=-1001): + """Return (x, y) position of vehicle with veh_id.""" + pass + + @abstractmethod + def get_realized_accel(self, veh_id): + """Return the acceleration that the vehicle actually make.""" + pass + + @abstractmethod + def get_road_grade(self, veh_id): + """Return the road-grade of the vehicle with veh_id.""" + pass diff --git a/flow/core/kernel/vehicle/traci.py b/flow/core/kernel/vehicle/traci.py index 134bac49f..8f76b40d0 100644 --- a/flow/core/kernel/vehicle/traci.py +++ b/flow/core/kernel/vehicle/traci.py @@ -87,6 +87,8 @@ def __init__(self, # old speeds used to compute accelerations self.previous_speeds = {} + # The time that previous speed is recorded, used to calculate realized_accel + self.previous_time = 0 def initialize(self, vehicles): """Initialize vehicle state information. @@ -290,6 +292,10 @@ def _add_departed(self, veh_id, veh_type): # specify the type self.__vehicles[veh_id]["type"] = veh_type + # specify energy model + self.__vehicles[veh_id]["energy_model"] = self.type_parameters[ + veh_type]["energy_model"]() + car_following_params = \ self.type_parameters[veh_type]["car_following_params"] @@ -336,7 +342,8 @@ def _add_departed(self, veh_id, veh_type): tc.VAR_POSITION, tc.VAR_ANGLE, tc.VAR_SPEED_WITHOUT_TRACI, - tc.VAR_FUELCONSUMPTION + tc.VAR_FUELCONSUMPTION, + tc.VAR_DISTANCE ]) self.kernel_api.vehicle.subscribeLeader(veh_id, 2000) @@ -521,10 +528,13 @@ def get_arrived_ids(self): """See parent class.""" return self._arrived_ids - def get_arrived_rl_ids(self): + def get_arrived_rl_ids(self, k=1): """See parent class.""" if len(self._arrived_rl_ids) > 0: - return self._arrived_rl_ids[-1] + arrived = [] + for arr in self._arrived_rl_ids[-k:]: + arrived.extend(arr) + return arrived else: return 0 @@ -543,6 +553,16 @@ def get_fuel_consumption(self, veh_id, error=-1001): return [self.get_fuel_consumption(vehID, error) for vehID in veh_id] return self.__sumo_obs.get(veh_id, {}).get(tc.VAR_FUELCONSUMPTION, error) * ml_to_gallons + def get_energy_model(self, veh_id, error=""): + """See parent class.""" + if isinstance(veh_id, (list, np.ndarray)): + return [self.get_energy_model(vehID) for vehID in veh_id] + try: + return self.__vehicles.get(veh_id, {'energy_model': error})['energy_model'] + except KeyError: + print("Energy model not specified for vehicle {}".format(veh_id)) + raise + def get_previous_speed(self, veh_id, error=-1001): """See parent class.""" if isinstance(veh_id, (list, np.ndarray)): @@ -747,7 +767,7 @@ def _multi_lane_headways(self): for lane in range(max_lanes): edge_dict[edge][lane].sort(key=lambda x: x[1]) - for veh_id in self.get_rl_ids(): + for veh_id in self.get_ids(): # collect the lane leaders, followers, headways, and tailways for # each vehicle edge = self.get_edge(veh_id) @@ -949,18 +969,36 @@ def _prev_edge_followers(self, veh_id, edge_dict, lane, num_edges): return tailway, follower - def apply_acceleration(self, veh_ids, acc): + def apply_acceleration(self, veh_ids, acc, smooth_duration=0): """See parent class.""" - # to hand the case of a single vehicle + # to handle the case of a single vehicle if type(veh_ids) == str: veh_ids = [veh_ids] acc = [acc] for i, vid in enumerate(veh_ids): if acc[i] is not None and vid in self.get_ids(): + self.__vehicles[vid]["accel"] = acc[i] this_vel = self.get_speed(vid) next_vel = max([this_vel + acc[i] * self.sim_step, 0]) - self.kernel_api.vehicle.slowDown(vid, next_vel, 1e-3) + if smooth_duration: + self.kernel_api.vehicle.slowDown(vid, next_vel, smooth_duration) + else: + self.kernel_api.vehicle.setSpeed(vid, next_vel) + + def apply_acceleration_not_smooth(self, veh_ids, acc): + """See parent class.""" + # to handle the case of a single vehicle + if type(veh_ids) == str: + veh_ids = [veh_ids] + acc = [acc] + + for i, vid in enumerate(veh_ids): + if acc[i] is not None and vid in self.get_ids(): + self.__vehicles[vid]["accel"] = acc[i] + this_vel = self.get_speed(vid) + next_vel = max([this_vel + acc[i] * self.sim_step, 0]) + self.kernel_api.vehicle.setSpeed(vid, next_vel) def apply_lane_change(self, veh_ids, direction): """See parent class.""" @@ -990,7 +1028,7 @@ def apply_lane_change(self, veh_ids, direction): # perform the requested lane action action in TraCI if target_lane != this_lane: self.kernel_api.vehicle.changeLane( - veh_id, int(target_lane), 100000) + veh_id, int(target_lane), self.sim_step) if veh_id in self.get_rl_ids(): self.prev_last_lc[veh_id] = \ @@ -1010,6 +1048,8 @@ def choose_routes(self, veh_ids, route_choices): def get_x_by_id(self, veh_id): """See parent class.""" + if isinstance(veh_id, (list, np.ndarray)): + return [self.get_x_by_id(vehID) for vehID in veh_id] if self.get_edge(veh_id) == '': # occurs when a vehicle crashes is teleported for some other reason return 0. @@ -1118,3 +1158,51 @@ def get_max_speed(self, veh_id, error=-1001): def set_max_speed(self, veh_id, max_speed): """See parent class.""" self.kernel_api.vehicle.setMaxSpeed(veh_id, max_speed) + + def get_accel(self, veh_id, noise=True, failsafe=True): + """See parent class.""" + metric_name = 'accel' + if noise: + metric_name += '_with_noise' + else: + metric_name += '_no_noise' + if failsafe: + metric_name += '_with_failsafe' + else: + metric_name += '_no_failsafe' + + return self.__vehicles[veh_id].get(metric_name, None) \ + or self.get_realized_accel(veh_id) + + def update_accel(self, veh_id, accel, noise=True, failsafe=True): + """See parent class.""" + metric_name = 'accel' + if noise: + metric_name += '_with_noise' + else: + metric_name += '_no_noise' + if failsafe: + metric_name += '_with_failsafe' + else: + metric_name += '_no_failsafe' + + self.__vehicles[veh_id][metric_name] = accel + + def get_realized_accel(self, veh_id): + """See parent class.""" + if self.get_distance(veh_id) == 0: + return 0 + return (self.get_speed(veh_id) - self.get_previous_speed(veh_id)) / self.sim_step + + def get_2d_position(self, veh_id, error=-1001): + """See parent class.""" + return self.__sumo_obs.get(veh_id, {}).get(tc.VAR_POSITION, error) + + def get_distance(self, veh_id, error=-1001): + """See parent class.""" + return self.__sumo_obs.get(veh_id, {}).get(tc.VAR_DISTANCE, error) + + def get_road_grade(self, veh_id): + """See parent class.""" + # TODO : Brent + return 0 diff --git a/flow/core/params.py b/flow/core/params.py index 79ad8d689..6f3ec2fbc 100755 --- a/flow/core/params.py +++ b/flow/core/params.py @@ -7,6 +7,10 @@ from flow.controllers.car_following_models import SimCarFollowingController from flow.controllers.rlcontroller import RLController from flow.controllers.lane_change_controllers import SimLaneChangeController +from flow.energy_models.toyota_energy import PriusEnergy +from flow.energy_models.toyota_energy import TacomaEnergy +from flow.energy_models.power_demand import PDMCombustionEngine +from flow.energy_models.power_demand import PDMElectric SPEED_MODES = { @@ -39,6 +43,9 @@ "only_right_drive_safe": 576 } +ENERGY_MODELS = set([PriusEnergy, TacomaEnergy, PDMCombustionEngine, PDMElectric]) +DEFAULT_ENERGY_MODEL = PDMCombustionEngine + # Traffic light defaults PROGRAM_ID = 1 MAX_GAP = 3.0 @@ -262,6 +269,7 @@ def add(self, num_vehicles=0, car_following_params=None, lane_change_params=None, + energy_model=DEFAULT_ENERGY_MODEL, color=None): """Add a sequence of vehicles to the list of vehicles in the network. @@ -298,6 +306,12 @@ def add(self, # FIXME: depends on simulator lane_change_params = SumoLaneChangeParams() + if energy_model not in ENERGY_MODELS: + print('{} for vehicle {} is not a valid energy model. Defaulting to {}\n'.format(energy_model, + veh_id, + DEFAULT_ENERGY_MODEL)) + energy_model = DEFAULT_ENERGY_MODEL + type_params = {} type_params.update(car_following_params.controller_params) type_params.update(lane_change_params.controller_params) @@ -311,7 +325,8 @@ def add(self, "routing_controller": routing_controller, "initial_speed": initial_speed, "car_following_params": car_following_params, - "lane_change_params": lane_change_params} + "lane_change_params": lane_change_params, + "energy_model": energy_model} if color: type_params['color'] = color @@ -334,7 +349,9 @@ def add(self, "car_following_params": car_following_params, "lane_change_params": - lane_change_params + lane_change_params, + "energy_model": + energy_model }) # This is used to return the actual headways from the vehicles class. @@ -588,6 +605,8 @@ class SumoParams(SimParams): current time step use_ballistic: bool, optional If true, use a ballistic integration step instead of an euler step + disable_collisions: bool, optional + If true, disables explicit collision checking and teleporting in SUMO """ def __init__(self, @@ -609,7 +628,8 @@ def __init__(self, teleport_time=-1, num_clients=1, color_by_speed=False, - use_ballistic=False): + use_ballistic=False, + disable_collisions=False): """Instantiate SumoParams.""" super(SumoParams, self).__init__( sim_step, render, restart_instance, emission_path, save_render, @@ -624,6 +644,7 @@ def __init__(self, self.num_clients = num_clients self.color_by_speed = color_by_speed self.use_ballistic = use_ballistic + self.disable_collisions = disable_collisions class EnvParams: @@ -657,6 +678,9 @@ class EnvParams: specifies whether to clip actions from the policy by their range when they are inputted to the reward function. Note that the actions are still clipped before they are provided to `apply_rl_actions`. + done_at_exit : bool, optional + If true, done is returned as True when the vehicle exits. This is only + applied to multi-agent environments. """ def __init__(self, @@ -665,7 +689,8 @@ def __init__(self, warmup_steps=0, sims_per_step=1, evaluate=False, - clip_actions=True): + clip_actions=True, + done_at_exit=True): """Instantiate EnvParams.""" self.additional_params = \ additional_params if additional_params is not None else {} @@ -674,6 +699,7 @@ def __init__(self, self.sims_per_step = sims_per_step self.evaluate = evaluate self.clip_actions = clip_actions + self.done_at_exit = done_at_exit def get_additional_param(self, key): """Return a variable from additional_params.""" diff --git a/flow/core/rewards.py b/flow/core/rewards.py index 3cca916f5..20ed1c6a7 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -307,58 +307,26 @@ def punish_rl_lane_changes(env, penalty=1): def energy_consumption(env, gain=.001): - """Calculate power consumption of a vehicle. + """Calculate power consumption for all vehicle. Assumes vehicle is an average sized vehicle. The power calculated here is the lower bound of the actual power consumed by a vehicle. - """ - power = 0 - - M = 1200 # mass of average sized vehicle (kg) - g = 9.81 # gravitational acceleration (m/s^2) - Cr = 0.005 # rolling resistance coefficient - Ca = 0.3 # aerodynamic drag coefficient - rho = 1.225 # air density (kg/m^3) - A = 2.6 # vehicle cross sectional area (m^2) - for veh_id in env.k.vehicle.get_ids(): - speed = env.k.vehicle.get_speed(veh_id) - prev_speed = env.k.vehicle.get_previous_speed(veh_id) - - accel = abs(speed - prev_speed) / env.sim_step - power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed ** 3 - - return -gain * power - - -def veh_energy_consumption(env, veh_id, gain=.001): - """Calculate power consumption of a vehicle. - - Assumes vehicle is an average sized vehicle. - The power calculated here is the lower bound of the actual power consumed - by a vehicle. + Parameters + ---------- + env : flow.envs.Env + the environment variable, which contains information on the current + state of the system. + gain : float + scaling factor for the reward """ - power = 0 - - M = 1200 # mass of average sized vehicle (kg) - g = 9.81 # gravitational acceleration (m/s^2) - Cr = 0.005 # rolling resistance coefficient - Ca = 0.3 # aerodynamic drag coefficient - rho = 1.225 # air density (kg/m^3) - A = 2.6 # vehicle cross sectional area (m^2) - speed = env.k.vehicle.get_speed(veh_id) - prev_speed = env.k.vehicle.get_previous_speed(veh_id) - - accel = abs(speed - prev_speed) / env.sim_step - - power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed ** 3 - - return -gain * power + veh_ids = env.k.vehicle.get_ids() + return veh_energy_consumption(env, veh_ids, gain) -def miles_per_megajoule(env, veh_ids=None, gain=.001): - """Calculate miles per mega-joule of either a particular vehicle or the total average of all the vehicles. +def veh_energy_consumption(env, veh_ids=None, gain=.001): + """Calculate power consumption of a vehicle. Assumes vehicle is an average sized vehicle. The power calculated here is the lower bound of the actual power consumed @@ -369,70 +337,64 @@ def miles_per_megajoule(env, veh_ids=None, gain=.001): env : flow.envs.Env the environment variable, which contains information on the current state of the system. - veh_ids : [list] - list of veh_ids to compute the reward over + veh_ids : [list] or str + list of veh_ids or single veh_id to compute the reward over gain : float scaling factor for the reward """ - mpj = 0 - counter = 0 if veh_ids is None: veh_ids = env.k.vehicle.get_ids() elif not isinstance(veh_ids, list): veh_ids = [veh_ids] - for veh_id in veh_ids: - speed = env.k.vehicle.get_speed(veh_id) - # convert to be positive since the function called is a penalty - power = -veh_energy_consumption(env, veh_id, gain=1.0) - if power > 0 and speed >= 0.0: - counter += 1 - # meters / joule is (v * \delta t) / (power * \delta t) - mpj += speed / power - if counter > 0: - mpj /= counter - # convert from meters per joule to miles per joule - mpj /= 1609.0 - # convert from miles per joule to miles per megajoule - mpj *= 10**6 + power = 0 + for veh_id in veh_ids: + if veh_id not in env.k.vehicle.previous_speeds: + continue + energy_model = env.k.vehicle.get_energy_model(veh_id) + if energy_model != "": + speed = env.k.vehicle.get_speed(veh_id) + accel = env.k.vehicle.get_accel(veh_id, noise=False, failsafe=True) + grade = env.k.vehicle.get_road_grade(veh_id) + power += energy_model.get_instantaneous_power(accel, speed, grade) - return mpj * gain + return -gain * power -def miles_per_gallon(env, veh_ids=None, gain=.001): - """Calculate mpg of either a particular vehicle or the total average of all the vehicles. - - Assumes vehicle is an average sized vehicle. - The power calculated here is the lower bound of the actual power consumed - by a vehicle. +def instantaneous_mpg(env, veh_ids=None, gain=.001): + """Calculate the instantaneous mpg for every simulation step specific to the vehicle type. Parameters ---------- env : flow.envs.Env the environment variable, which contains information on the current state of the system. - veh_ids : [list] - list of veh_ids to compute the reward over + veh_ids : [list] or str + list of veh_ids or single veh_id to compute the reward over gain : float scaling factor for the reward """ - mpg = 0 - counter = 0 if veh_ids is None: veh_ids = env.k.vehicle.get_ids() elif not isinstance(veh_ids, list): veh_ids = [veh_ids] + + cumulative_gallons = 0 + cumulative_distance = 0 for veh_id in veh_ids: - speed = env.k.vehicle.get_speed(veh_id) - gallons_per_s = env.k.vehicle.get_fuel_consumption(veh_id) - if gallons_per_s > 0 and speed >= 0.0: - counter += 1 - # meters / gallon is (v * \delta t) / (gallons_per_s * \delta t) - mpg += speed / gallons_per_s - if counter > 0: - mpg /= counter - - # convert from meters per gallon to miles per gallon - mpg /= 1609.0 + energy_model = env.k.vehicle.get_energy_model(veh_id) + if energy_model != "": + speed = env.k.vehicle.get_speed(veh_id) + accel = env.k.vehicle.get_accel(veh_id, noise=False, failsafe=True) + grade = env.k.vehicle.get_road_grade(veh_id) + gallons_per_hr = energy_model.get_instantaneous_fuel_consumption(accel, speed, grade) + if speed >= 0.0: + cumulative_gallons += gallons_per_hr + cumulative_distance += speed + + cumulative_gallons /= 3600.0 + cumulative_distance /= 1609.34 + # miles / gallon is (distance_dot * \delta t) / (gallons_dot * \delta t) + mpg = cumulative_distance / (cumulative_gallons + 1e-6) return mpg * gain diff --git a/flow/core/util.py b/flow/core/util.py index 1821a76a5..c0c31f811 100755 --- a/flow/core/util.py +++ b/flow/core/util.py @@ -4,7 +4,6 @@ import errno import os from lxml import etree -from xml.etree import ElementTree def makexml(name, nsl): @@ -47,42 +46,39 @@ def emission_to_csv(emission_path, output_path=None): path to the csv file that will be generated, default is the same directory as the emission file, with the same name """ - parser = etree.XMLParser(recover=True) - tree = ElementTree.parse(emission_path, parser=parser) - root = tree.getroot() - - # parse the xml data into a dict + context = etree.iterparse(emission_path, recover=True) out_data = [] - for time in root.findall('timestep'): - t = float(time.attrib['time']) - - for car in time: - out_data.append(dict()) - try: - out_data[-1]['time'] = t - out_data[-1]['CO'] = float(car.attrib['CO']) - out_data[-1]['y'] = float(car.attrib['y']) - out_data[-1]['CO2'] = float(car.attrib['CO2']) - out_data[-1]['electricity'] = float(car.attrib['electricity']) - out_data[-1]['type'] = car.attrib['type'] - out_data[-1]['id'] = car.attrib['id'] - out_data[-1]['eclass'] = car.attrib['eclass'] - out_data[-1]['waiting'] = float(car.attrib['waiting']) - out_data[-1]['NOx'] = float(car.attrib['NOx']) - out_data[-1]['fuel'] = float(car.attrib['fuel']) - out_data[-1]['HC'] = float(car.attrib['HC']) - out_data[-1]['x'] = float(car.attrib['x']) - out_data[-1]['route'] = car.attrib['route'] - out_data[-1]['relative_position'] = float(car.attrib['pos']) - out_data[-1]['noise'] = float(car.attrib['noise']) - out_data[-1]['angle'] = float(car.attrib['angle']) - out_data[-1]['PMx'] = float(car.attrib['PMx']) - out_data[-1]['speed'] = float(car.attrib['speed']) - out_data[-1]['edge_id'] = car.attrib['lane'].rpartition('_')[0] - out_data[-1]['lane_number'] = car.attrib['lane'].\ - rpartition('_')[-1] - except KeyError: - del out_data[-1] + for event, elem in context: + if elem.tag == "timestep": + t = float(elem.attrib['time']) + for car in elem: + out_data.append(dict()) + try: + out_data[-1]['time'] = t + out_data[-1]['CO'] = float(car.attrib['CO']) + out_data[-1]['y'] = float(car.attrib['y']) + out_data[-1]['CO2'] = float(car.attrib['CO2']) + out_data[-1]['electricity'] = float(car.attrib['electricity']) + out_data[-1]['type'] = car.attrib['type'] + out_data[-1]['id'] = car.attrib['id'] + out_data[-1]['eclass'] = car.attrib['eclass'] + out_data[-1]['waiting'] = float(car.attrib['waiting']) + out_data[-1]['NOx'] = float(car.attrib['NOx']) + out_data[-1]['fuel'] = float(car.attrib['fuel']) + out_data[-1]['HC'] = float(car.attrib['HC']) + out_data[-1]['x'] = float(car.attrib['x']) + out_data[-1]['route'] = car.attrib['route'] + out_data[-1]['relative_position'] = float(car.attrib['pos']) + out_data[-1]['noise'] = float(car.attrib['noise']) + out_data[-1]['angle'] = float(car.attrib['angle']) + out_data[-1]['PMx'] = float(car.attrib['PMx']) + out_data[-1]['speed'] = float(car.attrib['speed']) + out_data[-1]['edge_id'] = car.attrib['lane'].rpartition('_')[0] + out_data[-1]['lane_number'] = car.attrib['lane']. \ + rpartition('_')[-1] + except KeyError: + del out_data[-1] + elem.clear() # sort the elements of the dictionary by the vehicle id out_data = sorted(out_data, key=lambda k: k['id']) diff --git a/flow/data_pipeline/README.md b/flow/data_pipeline/README.md new file mode 100644 index 000000000..65aeb8d49 --- /dev/null +++ b/flow/data_pipeline/README.md @@ -0,0 +1,12 @@ +To run a simulation with output stored locally only: + + `python simulate.py EXP_CONFIG --gen_emission` + +To run a simulation and upload output to pipeline: + + `python simulate.py EXP_CONFIG --to_aws` + +To run a simulation, upload output to pipeline, and mark it as baseline: + + `python simulate.py EXP_CONFIG --to_aws --is_baseline` + diff --git a/flow/data_pipeline/__init__.py b/flow/data_pipeline/__init__.py new file mode 100644 index 000000000..d9d6a6573 --- /dev/null +++ b/flow/data_pipeline/__init__.py @@ -0,0 +1 @@ +"""Empty init file to ensure that data_pipeline is recognized as a package.""" diff --git a/flow/data_pipeline/data_pipeline.py b/flow/data_pipeline/data_pipeline.py new file mode 100644 index 000000000..f0e3637f6 --- /dev/null +++ b/flow/data_pipeline/data_pipeline.py @@ -0,0 +1,349 @@ +"""contains class and helper functions for the data pipeline.""" +import pandas as pd +import boto3 +from botocore.exceptions import ClientError +from flow.data_pipeline.query import QueryStrings, prerequisites +from time import time +from datetime import date +import csv +from io import StringIO +import json + + +def generate_trajectory_table(emission_files, trajectory_table_path, source_id): + """Generate desired output for the trajectory_table based on SUMO emissions. + + Parameters + ---------- + emission_files : list + paths to the SUMO emission + trajectory_table_path : str + path to the file for S3 upload only + source_id : str + a unique id for the simulation that generate these emissions + """ + for i in range(len(emission_files)): + emission_output = pd.read_csv(emission_files[i]) + emission_output['source_id'] = source_id + emission_output['run_id'] = "run_{}".format(i) + # add header row to the file only at the first run (when i==0) + emission_output.to_csv(trajectory_table_path, mode='a+', index=False, header=(i == 0)) + + +def write_dict_to_csv(data_path, extra_info, include_header=False): + """Write extra to the CSV file at data_path, create one if not exist. + + Parameters + ---------- + data_path : str + output file path + extra_info: dict + extra information needed in the trajectory table, collected from flow + include_header: bool + whether or not to include the header in the output, this should be set to + True for the first write to the a empty or newly created CSV, and set to + False for subsequent appends. + """ + extra_info = pd.DataFrame.from_dict(extra_info) + extra_info.to_csv(data_path, mode='a+', index=False, header=include_header) + + +def upload_to_s3(bucket_name, bucket_key, file_path, metadata={}): + """Upload a file to S3 bucket. + + Parameters + ---------- + bucket_name : str + the bucket to upload to + bucket_key: str + the key within the bucket for the file + file_path: str + the path of the file to be uploaded + metadata: dict + all the metadata that should be attached to this simulation + """ + s3 = boto3.resource("s3") + s3.Bucket(bucket_name).upload_file(file_path, bucket_key, + ExtraArgs={"Metadata": metadata}) + return + + +def get_extra_info(veh_kernel, extra_info, veh_ids, source_id, run_id): + """Get all the necessary information for the trajectory output from flow.""" + for vid in veh_ids: + extra_info["time_step"].append(veh_kernel.get_timestep(vid) / 1000) + extra_info["id"].append(vid) + position = veh_kernel.get_2d_position(vid) + extra_info["x"].append(position[0]) + extra_info["y"].append(position[1]) + extra_info["speed"].append(veh_kernel.get_speed(vid)) + extra_info["headway"].append(veh_kernel.get_headway(vid)) + extra_info["leader_id"].append(veh_kernel.get_leader(vid)) + extra_info["follower_id"].append(veh_kernel.get_follower(vid)) + extra_info["leader_rel_speed"].append(veh_kernel.get_speed( + veh_kernel.get_leader(vid)) - veh_kernel.get_speed(vid)) + extra_info["target_accel_with_noise_with_failsafe"].append(veh_kernel.get_accel(vid)) + extra_info["target_accel_no_noise_no_failsafe"].append( + veh_kernel.get_accel(vid, noise=False, failsafe=False)) + extra_info["target_accel_with_noise_no_failsafe"].append( + veh_kernel.get_accel(vid, noise=True, failsafe=False)) + extra_info["target_accel_no_noise_with_failsafe"].append( + veh_kernel.get_accel(vid, noise=False, failsafe=True)) + extra_info["realized_accel"].append(veh_kernel.get_realized_accel(vid)) + extra_info["road_grade"].append(veh_kernel.get_road_grade(vid)) + extra_info["edge_id"].append(veh_kernel.get_edge(vid)) + extra_info["lane_id"].append(veh_kernel.get_lane(vid)) + extra_info["distance"].append(veh_kernel.get_distance(vid)) + extra_info["relative_position"].append(veh_kernel.get_position(vid)) + extra_info["source_id"].append(source_id) + extra_info["run_id"].append(run_id) + + +def get_configuration(): + """Get configuration for the metadata table.""" + try: + config_df = pd.read_csv('./data_pipeline_config') + except FileNotFoundError: + config_df = pd.DataFrame(data={"submitter_name": [""], "strategy": [""]}) + + if not config_df['submitter_name'][0]: + name = input("Please enter your name:").strip() + while not name: + name = input("Please enter a non-empty name:").strip() + config_df['submitter_name'] = [name] + + strategy = input( + "Please enter strategy name (current: \"{}\"):".format(config_df["strategy"][0])).strip() + if strategy: + config_df['strategy'] = [strategy] + + config_df.to_csv('./data_pipeline_config', index=False) + + return config_df['submitter_name'][0], config_df['strategy'][0] + + +def delete_obsolete_data(s3, latest_key, table, bucket="circles.data.pipeline"): + """Delete the obsolete data on S3.""" + response = s3.list_objects_v2(Bucket=bucket) + keys = [e["Key"] for e in response["Contents"] if e["Key"].find(table) == 0 and e["Key"][-4:] == ".csv"] + keys.remove(latest_key) + for key in keys: + s3.delete_object(Bucket=bucket, Key=key) + + +def update_baseline(s3, baseline_network, baseline_source_id): + """Update the baseline table on S3 if new baseline run is added.""" + obj = s3.get_object(Bucket='circles.data.pipeline', Key='baseline_table/baselines.csv')['Body'] + original_str = obj.read().decode() + reader = csv.DictReader(StringIO(original_str)) + new_str = StringIO() + writer = csv.DictWriter(new_str, fieldnames=['network', 'source_id']) + writer.writeheader() + writer.writerow({'network': baseline_network, 'source_id': baseline_source_id}) + for row in reader: + if row['network'] != baseline_network: + writer.writerow(row) + s3.put_object(Bucket='circles.data.pipeline', Key='baseline_table/baselines.csv', + Body=new_str.getvalue().replace('\r', '').encode()) + + +def get_completed_queries(s3, source_id): + """Return the deserialized list of completed queries from S3.""" + try: + completed_queries_obj = \ + s3.get_object(Bucket='circles.data.pipeline', Key='lambda_temp/{}'.format(source_id))['Body'] + completed_queries = json.loads(completed_queries_obj.read().decode('utf-8')) + except ClientError as e: + if e.response['Error']['Code'] == 'NoSuchKey': + completed_queries = set() + else: + raise + return set(completed_queries) + + +def put_completed_queries(s3, completed_queries): + """Put all the completed queries lists into S3 as in a serialized json format.""" + for source_id, completed_queries_set in completed_queries.items(): + completed_queries_list = list(completed_queries_set) + completed_queries_json = json.dumps(completed_queries_list) + s3.put_object(Bucket='circles.data.pipeline', Key='lambda_temp/{}'.format(source_id), + Body=completed_queries_json.encode('utf-8')) + + +def get_ready_queries(completed_queries, new_query): + """Return queries whose prerequisite queries are completed.""" + readied_queries = [] + unfinished_queries = prerequisites.keys() - completed_queries + upadted_completed_queries = completed_queries.copy() + upadted_completed_queries.add(new_query) + for query_name in unfinished_queries: + if not prerequisites[query_name][1].issubset(completed_queries): + if prerequisites[query_name][1].issubset(upadted_completed_queries): + readied_queries.append((query_name, prerequisites[query_name][0])) + return readied_queries + + +class AthenaQuery: + """Class used to run queries. + + Act as a query engine, maintains an open session with AWS Athena. + + Attributes + ---------- + MAX_WAIT : int + maximum number of seconds to wait before declares time-out + client : boto3.client + the athena client that is used to run the query + existing_partitions : list + a list of partitions that is already recorded in Athena's datalog, + this is obtained through query at the initialization of this class + instance. + """ + + def __init__(self): + """Initialize AthenaQuery instance. + + initialize a client session with AWS Athena, + query Athena to obtain extisting_partition. + """ + self.MAX_WAIT = 60 + self.client = boto3.client("athena") + self.existing_partitions = {} + + def get_existing_partitions(self, table): + """Return the existing partitions in the S3 bucket. + + Returns + ------- + partitions: a list of existing partitions on S3 bucket + """ + response = self.client.start_query_execution( + QueryString='SHOW PARTITIONS {}'.format(table), + QueryExecutionContext={ + 'Database': 'circles' + }, + WorkGroup='primary' + ) + if self.wait_for_execution(response['QueryExecutionId']): + raise RuntimeError("get current partitions timed out") + response = self.client.get_query_results( + QueryExecutionId=response['QueryExecutionId'], + MaxResults=1000 + ) + return [data['Data'][0]['VarCharValue'] for data in response['ResultSet']['Rows']] + + def check_status(self, execution_id): + """Return the status of the execution with given id. + + Parameters + ---------- + execution_id : string + id of the execution that is checked for + Returns + ------- + status: str + QUEUED|RUNNING|SUCCEEDED|FAILED|CANCELLED + """ + response = self.client.get_query_execution( + QueryExecutionId=execution_id + ) + return response['QueryExecution']['Status']['State'] + + def wait_for_execution(self, execution_id): + """Wait for the execution to finish or time-out. + + Parameters + ---------- + execution_id : str + id of the execution this is watiing for + Returns + ------- + time_out: bool + True if time-out, False if success + Raises + ------ + RuntimeError: if execution failed or get canceled + """ + start = time() + while time() - start < self.MAX_WAIT: + state = self.check_status(execution_id) + if state == 'FAILED' or state == 'CANCELLED': + raise RuntimeError("update partition failed") + elif state == 'SUCCEEDED': + return False + return True + + def update_partition(self, table, submission_date, partition): + """Load the given partition to the trajectory_table on Athena. + + Parameters + ---------- + table : str + the name of the table to update + submission_date : str + the new partition date that needs to be loaded + partition : str + the new partition that needs to be loaded + """ + response = self.client.start_query_execution( + QueryString=QueryStrings['UPDATE_PARTITION'].value.format(table=table, date=submission_date, + partition=partition), + QueryExecutionContext={ + 'Database': 'circles' + }, + WorkGroup='primary' + ) + if self.wait_for_execution(response['QueryExecutionId']): + raise RuntimeError("update partition timed out") + self.existing_partitions[table].append("date={}/partition_name={}".format(submission_date, partition)) + return + + def repair_partition(self, table, submission_date, partition): + """Load the missing partitions.""" + if table not in self.existing_partitions.keys(): + self.existing_partitions[table] = self.get_existing_partitions(table) + if "date={}/partition_name={}".format(submission_date, partition) not in \ + self.existing_partitions[table]: + self.update_partition(table, submission_date, partition) + + def run_query(self, query_name, result_location="s3://circles.data.pipeline/result/", + submission_date="today", partition="default", **kwargs): + """Start the execution of a query, does not wait for it to finish. + + Parameters + ---------- + query_name : str + name of the query in QueryStrings enum that will be run + result_location: str, optional + location on the S3 bucket where the result will be stored + submission_date : str + name of the partition date to run this query on + partition: str, optional + name of the partition to run this query on + Returns + ------- + execution_id: str + the execution id of the execution started by this method + Raises + ------ + ValueError: if tries to run a query not existed in QueryStrings enum + """ + if query_name not in QueryStrings.__members__: + raise ValueError("query not existed: please add it to query.py") + + if submission_date == "today": + submission_date = date.today().isoformat() + + source_id = "flow_{}".format(partition.split('_')[1]) + + response = self.client.start_query_execution( + QueryString=QueryStrings[query_name].value.format(date=submission_date, partition=source_id, **kwargs), + QueryExecutionContext={ + 'Database': 'circles' + }, + ResultConfiguration={ + 'OutputLocation': result_location, + }, + WorkGroup='primary' + ) + return response['QueryExecutionId'] diff --git a/flow/data_pipeline/lambda_function.py b/flow/data_pipeline/lambda_function.py new file mode 100644 index 000000000..1d813f98b --- /dev/null +++ b/flow/data_pipeline/lambda_function.py @@ -0,0 +1,75 @@ +"""lambda function on AWS Lambda.""" +import boto3 +from urllib.parse import unquote_plus +from flow.data_pipeline.data_pipeline import AthenaQuery, delete_obsolete_data, update_baseline, \ + get_ready_queries, get_completed_queries, put_completed_queries +from flow.data_pipeline.query import tables, network_filters, summary_tables, triggers + +s3 = boto3.client('s3') +queryEngine = AthenaQuery() + + +def lambda_handler(event, context): + """Handle S3 put event on AWS Lambda.""" + # stores all lists of completed query for each source_id + completed = {} + records = [] + # do a pre-sweep to handle tasks other than initalizing a query + for record in event['Records']: + bucket = record['s3']['bucket']['name'] + key = unquote_plus(record['s3']['object']['key']) + table = key.split('/')[0] + if table not in tables: + continue + # delete unwanted metadata files + s3.delete_object(Bucket=bucket, Key=(key + '.metadata')) + # load the partition for newly added table + query_date = key.split('/')[-3].split('=')[-1] + partition = key.split('/')[-2].split('=')[-1] + source_id = "flow_{}".format(partition.split('_')[1]) + if table == "fact_vehicle_trace": + query_name = "FACT_VEHICLE_TRACE" + else: + query_name = partition.replace(source_id, "")[1:] + queryEngine.repair_partition(table, query_date, partition) + # delete obsolete data + if table in summary_tables: + delete_obsolete_data(s3, key, table) + # add table that need to start a query to list + if query_name in triggers: + records.append((bucket, key, table, query_name, query_date, partition, source_id)) + + # initialize the queries + for bucket, key, table, query_name, query_date, partition, source_id in records: + # retrieve the set of completed query for this source_id if not already available + if source_id not in completed.keys(): + completed[source_id] = get_completed_queries(s3, source_id) + # if query already recorded before, skip it. This is to tolerate repetitive execution by Lambda + if query_name in completed[source_id]: + continue + # retrieve metadata and use it to determine the right loc_filter + metadata_key = "fact_vehicle_trace/date={0}/partition_name={1}/{1}.csv".format(query_date, source_id) + response = s3.head_object(Bucket=bucket, Key=metadata_key) + if 'network' in response["Metadata"]: + network = response["Metadata"]['network'] + loc_filter = network_filters[network]['loc_filter'] + start_filter = network_filters[network]['warmup_steps'] + stop_filter = network_filters[network]['horizon_steps'] + + # update baseline if needed + if table == 'fact_vehicle_trace' \ + and 'is_baseline' in response['Metadata'] and response['Metadata']['is_baseline'] == 'True': + update_baseline(s3, network, source_id) + + readied_queries = get_ready_queries(completed[source_id], query_name) + completed[source_id].add(query_name) + # initialize queries and store them at appropriate locations + for readied_query_name, table_name in readied_queries: + result_location = 's3://circles.data.pipeline/{}/date={}/partition_name={}_{}'.format(table_name, + query_date, + source_id, + readied_query_name) + queryEngine.run_query(readied_query_name, result_location, query_date, partition, loc_filter=loc_filter, + start_filter=start_filter, stop_filter=stop_filter) + # stores all the updated lists of completed queries back to S3 + put_completed_queries(s3, completed) diff --git a/flow/data_pipeline/leaderboard_utils.py b/flow/data_pipeline/leaderboard_utils.py new file mode 100644 index 000000000..dd7055f8b --- /dev/null +++ b/flow/data_pipeline/leaderboard_utils.py @@ -0,0 +1,166 @@ +"""APIs for the leader board front end.""" +import os +import boto3 +import pandas as pd +from io import StringIO + + +network_name_map = {"highway": "Single-Lane Straight Road", + "highway_single": "Single-Lane Straight Road", + "ring": "Single-Lane Ring Road", + "I-210_subnetwork": "I-210 without Ramps", + "I_210_subnetwork": "I-210 without Ramps"} + + +def network_name_translate(network_name): + """Translate network name to a human friendly name for the leaderboard.""" + return network_name_map.get(network_name, network_name) + + +def key_to_name(key): + """Return the standard formatted file name from object key.""" + k_list = key.split("/") + date = k_list[1].replace("date=", "") + name = k_list[2].replace("partition_name=", "") + index = name.find("_", 5) + source_id = name + query_name = "" + if index != -1: + source_id = name[0:index] + query_name = "_" + name[index+1:].replace("_", "-") + return "{}_{}{}.csv".format(date, source_id.replace("_", "-"), query_name) + + +def get_table_disk(table_name="fact_vehicle_trace", bucket="circles.data.pipeline"): + """Fetch tables from s3 and store in ./result directory. + + Parameters + ---------- + table_name : str + The name of table to retrieve from S3, the current available tables are: + fact_vehicle_trace + fact_energy_trace + fact_network_throughput_agg + fact_network_inflows_outflows + fact_vehicle_fuel_efficiency_agg + fact_network_metrics_by_distance_agg + fact_network_metrics_by_time_agg + fact_network_fuel_efficiency_agg + leaderboard_chart + leaderboard_chart_agg + Note that leaderboard_chart_agg is a combination of all previous + learderboard_chart entries in one CSV file. It's only used to + avoid burdening the web server with more calculation. The date + and source_id in its name is always going to reflect the latest + leaderboard_chart entry. + bucket : str + the S3 bucket that holds these tables + """ + try: + os.makedirs("result/{}".format(table_name)) + except FileExistsError: + pass + s3 = boto3.client("s3") + response = s3.list_objects_v2(Bucket=bucket) + keys = [e["Key"] for e in response["Contents"] if e["Key"].find(table_name) == 0 and e["Key"][-4:] == ".csv"] + names = [key_to_name(k) for k in keys] + existing_results = os.listdir("./result/{}".format(table_name)) + updated = False + for index in range(len(keys)): + if names[index] not in existing_results: + updated = True + s3.download_file(bucket, keys[index], "./result/{}/{}".format(table_name, names[index])) + if table_name == "leaderboard_chart_agg" and updated: + for p in existing_results: + os.remove("./result/{}/{}".format(table_name, p)) + + +def get_table_memory(table_name="fact_vehicle_trace", bucket="circles.data.pipeline", existing_results=()): + """Fetch tables from s3 and return them as in-memory pandas dataframe objects. + + Parameters + ---------- + bucket: str + the S3 bucket that holds the tables + table_name: str + the name of the name to retrieve from S3, for detail see get_table_disk + existing_results: list + tables that should not be fetched, + the names must follow the convention: + {date}_{source_id(no run number)}_{query_name}.csv + + Returns + ------- + file_list: dict + a dictionary of pandas dataframes, each contains a table from S3 + The dataframs are keyed by their name: {source_id(no run number)}_{query_name}.csv + + """ + s3 = boto3.client("s3") + response = s3.list_objects_v2(Bucket=bucket) + keys = [e["Key"] for e in response["Contents"] if e["Key"].find(table_name) == 0 and e["Key"][-4:] == ".csv"] + names = [key_to_name(k) for k in keys] + results = dict() + for index in range(len(keys)): + if names[index] not in existing_results: + obj = s3.get_object(Bucket=bucket, Key=keys[index])["Body"] + obj_str = obj.read().decode("utf-8") + results[names[index]] = pd.read_csv(StringIO(obj_str)) + return results + + +def get_table_url(table_name="fact_vehicle_trace", bucket="circles.data.pipeline", existing_results=()): + """Fetch tables from s3 and return as urls, requires the bucket to have public access. + + Parameters + ---------- + bucket: str + the S3 bucket that holds the tables + table_name: str + the name of the name to retrieve from S3, for detail see get_table_disk + existing_results: list + tables that should not be fetched, + the names must follow the convention: + {date}_{source_id(no run number)}_{query_name}.csv + + Returns + ------- + file_list: dict + a dictionary of urls, each contains a table from S3 + The urls are keyed by their name: {source_id(no run number)}_{query_name}.csv + + """ + s3 = boto3.client("s3") + response = s3.list_objects_v2(Bucket=bucket) + keys = [e["Key"] for e in response["Contents"] if e["Key"].find(table_name) == 0 and e["Key"][-4:] == ".csv"] + names = [key_to_name(k) for k in keys] + results = dict() + for index in range(len(keys)): + if names[index] not in existing_results: + results[names[index]] = "https://{}.s3.{}.amazonaws.com/{}".format(bucket, "us-west-2", keys[index]) + return results + + +def get_metadata(name, bucket="circles.data.pipeline"): + """Get the metadata by name. + + Parameters + ---------- + name: str + the name of the table whose metadata will be returned + bucket: str + the bucket that hold the table + + Returns + ------- + metadata: dict + a dictionary of all the metadata, there is no guarantee + for which keys are included + """ + s3 = boto3.client("s3") + name_list = name.split('_') + source_id = name_list[1].replace('.csv', "").replace('-', '_') + response = s3.head_object(Bucket=bucket, + Key="fact_vehicle_trace/date={0}/partition_name={1}/{1}.csv".format(name_list[0], + source_id)) + return response["Metadata"] diff --git a/flow/data_pipeline/query.py b/flow/data_pipeline/query.py new file mode 100644 index 000000000..f68dfa321 --- /dev/null +++ b/flow/data_pipeline/query.py @@ -0,0 +1,784 @@ +"""stores all the pre-defined query strings.""" +from collections import defaultdict +from enum import Enum + +# tags for different queries +prerequisites = { + "POWER_DEMAND_MODEL": ( + "fact_energy_trace", {"FACT_VEHICLE_TRACE"} + ), + "POWER_DEMAND_MODEL_DENOISED_ACCEL": ( + "fact_energy_trace", {"FACT_VEHICLE_TRACE"} + ), + "POWER_DEMAND_MODEL_DENOISED_ACCEL_VEL": ( + "fact_energy_trace", {"FACT_VEHICLE_TRACE"} + ), + "FACT_SAFETY_METRICS": ( + "fact_safety_metrics", {"FACT_VEHICLE_TRACE"} + ), + "FACT_NETWORK_THROUGHPUT_AGG": ( + "fact_network_throughput_agg", {"FACT_VEHICLE_TRACE"} + ), + "FACT_NETWORK_INFLOWS_OUTFLOWS": ( + "fact_network_inflows_outflows", {"FACT_VEHICLE_TRACE"} + ), + "FACT_VEHICLE_COUNTS_BY_TIME": ( + "fact_vehicle_counts_by_time", {"FACT_VEHICLE_TRACE"} + ), + "FACT_VEHICLE_FUEL_EFFICIENCY_AGG": ( + "fact_vehicle_fuel_efficiency_agg", {"FACT_VEHICLE_TRACE", + "POWER_DEMAND_MODEL_DENOISED_ACCEL"} + ), + "FACT_NETWORK_METRICS_BY_DISTANCE_AGG": ( + "fact_network_metrics_by_distance_agg", {"FACT_VEHICLE_TRACE", + "POWER_DEMAND_MODEL_DENOISED_ACCEL"} + ), + "FACT_NETWORK_METRICS_BY_TIME_AGG": ( + "fact_network_metrics_by_time_agg", {"FACT_VEHICLE_TRACE", + "POWER_DEMAND_MODEL_DENOISED_ACCEL"} + ), + "FACT_VEHICLE_FUEL_EFFICIENCY_BINNED": ( + "fact_vehicle_fuel_efficiency_binned", {"FACT_VEHICLE_FUEL_EFFICIENCY_AGG"} + ), + "FACT_NETWORK_FUEL_EFFICIENCY_AGG": ( + "fact_network_fuel_efficiency_agg", {"FACT_VEHICLE_FUEL_EFFICIENCY_AGG"} + ), + "FACT_SAFETY_METRICS_AGG": ( + "fact_safety_metrics_agg", {"FACT_SAFETY_METRICS"} + ), + "FACT_SAFETY_METRICS_BINNED": ( + "fact_safety_metrics_binned", {"FACT_SAFETY_METRICS"} + ), + "LEADERBOARD_CHART": ( + "leaderboard_chart", {"FACT_NETWORK_THROUGHPUT_AGG", + "FACT_NETWORK_FUEL_EFFICIENCY_AGG", + "FACT_SAFETY_METRICS_AGG"} + ), + "LEADERBOARD_CHART_AGG": ( + "leaderboard_chart_agg", {"LEADERBOARD_CHART"} + ), + "FACT_TOP_SCORES": ( + "fact_top_scores", {"LEADERBOARD_CHART_AGG"} + ), +} + +triggers = [ + "FACT_VEHICLE_TRACE", + "POWER_DEMAND_MODEL_DENOISED_ACCEL", + "FACT_VEHICLE_FUEL_EFFICIENCY_AGG", + "FACT_SAFETY_METRICS", + "FACT_NETWORK_THROUGHPUT_AGG", + "FACT_NETWORK_FUEL_EFFICIENCY_AGG", + "FACT_SAFETY_METRICS_AGG", + "LEADERBOARD_CHART", + "LEADERBOARD_CHART_AGG" +] + +tables = [ + "fact_vehicle_trace", + "fact_energy_trace", + "fact_vehicle_counts_by_time", + "fact_safety_metrics", + "fact_safety_metrics_agg", + "fact_safety_metrics_binned", + "fact_network_throughput_agg", + "fact_network_inflows_outflows", + "fact_vehicle_fuel_efficiency_agg", + "fact_vehicle_fuel_efficiency_binned", + "fact_network_metrics_by_distance_agg", + "fact_network_metrics_by_time_agg", + "fact_network_fuel_efficiency_agg", + "leaderboard_chart", + "leaderboard_chart_agg", + "fact_top_scores", + "metadata_table" +] + +summary_tables = ["leaderboard_chart_agg", "fact_top_scores"] + +network_filters = defaultdict(lambda: { + 'loc_filter': "x BETWEEN 500 AND 2300", + 'warmup_steps': 500 * 3 * 0.4, + 'horizon_steps': 1000 * 3 * 0.4 + }) +network_filters['I-210 without Ramps'] = { + 'loc_filter': "edge_id <> ALL (VALUES 'ghost0', '119257908#3')", + 'warmup_steps': 600 * 3 * 0.4, + 'horizon_steps': 1000 * 3 * 0.4 + } + +VEHICLE_POWER_DEMAND_TACOMA_FINAL_SELECT = """ + SELECT + id, + time_step, + speed, + acceleration, + road_grade, + GREATEST(0, 2041 * acceleration * speed + + 3405.5481762 + + 83.12392997 * speed + + 6.7650718327 * POW(speed,2) + + 0.7041355229 * POW(speed,3) + ) + GREATEST(0, 4598.7155 * accel + 975.12719 * accel * speed) AS power, + \'{1}\' AS energy_model_id, + source_id + FROM {2} + ORDER BY id, time_step + """ + +VEHICLE_POWER_DEMAND_PRIUS_FINAL_SELECT = """ + SELECT + id, + time_step, + speed, + acceleration, + road_grade, + GREATEST(-2.8 * speed, 1663 * speed * (( + CASE + WHEN acceleration > 0 THEN 1 + WHEN acceleration < 0 THEN 0 + ELSE 0.5 + END * (1 - {0}) + {0}) * acceleration + 9.807 * SIN(road_grade) + ) + 1663 * 9.807 * 0.007 * speed + 0.5 * 1.225 * 2.4 * 0.24 * POW(speed,3)) AS power, + \'{1}\' AS energy_model_id, + source_id + FROM {2} + ORDER BY id, time_step + """ + + +class QueryStrings(Enum): + """An enumeration of all the pre-defined query strings.""" + + SAMPLE = """ + SELECT * + FROM trajectory_table + WHERE date = \'{date}\' + AND partition_name=\'{partition}\' + LIMIT 15; + """ + + UPDATE_PARTITION = """ + ALTER TABLE {table} + ADD IF NOT EXISTS PARTITION (date = \'{date}\', partition_name=\'{partition}\'); + """ + + POWER_DEMAND_MODEL = """ + WITH regular_cte AS ( + SELECT + id, + time_step, + speed, + COALESCE (target_accel_with_noise_with_failsafe, realized_accel) AS acceleration, + road_grade, + source_id + FROM fact_vehicle_trace + WHERE 1 = 1 + AND date = \'{{date}}\' + AND partition_name=\'{{partition}}\' + ) + {}""".format(VEHICLE_POWER_DEMAND_TACOMA_FINAL_SELECT.format(1, + 'POWER_DEMAND_MODEL', + 'regular_cte')) + + POWER_DEMAND_MODEL_DENOISED_ACCEL = """ + WITH denoised_accel_cte AS ( + SELECT + id, + time_step, + speed, + COALESCE (target_accel_no_noise_with_failsafe, + target_accel_no_noise_no_failsafe, + realized_accel) AS acceleration, + road_grade, + source_id + FROM fact_vehicle_trace + WHERE 1 = 1 + AND date = \'{{date}}\' + AND partition_name=\'{{partition}}\' + ) + {}""".format(VEHICLE_POWER_DEMAND_TACOMA_FINAL_SELECT.format(1, + 'POWER_DEMAND_MODEL_DENOISED_ACCEL', + 'denoised_accel_cte')) + + POWER_DEMAND_MODEL_DENOISED_ACCEL_VEL = """ + WITH lagged_timestep AS ( + SELECT + id, + time_step, + COALESCE (target_accel_no_noise_with_failsafe, + target_accel_no_noise_no_failsafe, + realized_accel) AS acceleration, + road_grade, + source_id, + speed AS cur_speed, + time_step - LAG(time_step, 1) + OVER (PARTITION BY id ORDER BY time_step ASC ROWS BETWEEN 1 PRECEDING and CURRENT ROW) AS sim_step, + LAG(speed, 1) + OVER (PARTITION BY id ORDER BY time_step ASC ROWS BETWEEN 1 PRECEDING and CURRENT ROW) AS prev_speed + FROM fact_vehicle_trace + WHERE 1 = 1 + AND date = \'{{date}}\' + AND partition_name=\'{{partition}}\' + ), denoised_speed_cte AS ( + SELECT + id, + time_step, + COALESCE (prev_speed + acceleration * sim_step, cur_speed) AS speed, + acceleration, + road_grade, + source_id + FROM lagged_timestep + ) + {}""".format(VEHICLE_POWER_DEMAND_TACOMA_FINAL_SELECT.format(1, + 'POWER_DEMAND_MODEL_DENOISED_ACCEL_VEL', + 'denoised_speed_cte')) + + FACT_SAFETY_METRICS = """ + SELECT + vt.id, + vt.time_step, + COALESCE(( + value_lower_left*(headway_upper-headway)*(rel_speed_upper-leader_rel_speed) + + value_lower_right*(headway-headway_lower)*(rel_speed_upper-leader_rel_speed) + + value_upper_left*(headway_upper-headway)*(leader_rel_speed-rel_speed_lower) + + value_upper_right*(headway-headway_lower)*(leader_rel_speed-rel_speed_lower) + ) / ((headway_upper-headway_lower)*(rel_speed_upper-rel_speed_lower)), 200.0) AS safety_value, + vt.source_id + FROM fact_vehicle_trace vt + LEFT OUTER JOIN fact_safety_matrix sm ON 1 = 1 + AND vt.leader_rel_speed BETWEEN sm.rel_speed_lower AND sm.rel_speed_upper + AND vt.headway BETWEEN sm.headway_lower AND sm.headway_upper + WHERE 1 = 1 + AND vt.date = \'{date}\' + AND vt.partition_name = \'{partition}\' + AND vt.time_step >= {start_filter} + AND vt.{loc_filter} + ; + """ + + FACT_SAFETY_METRICS_AGG = """ + SELECT + source_id, + SUM(CASE WHEN safety_value < 0 THEN 1.0 ELSE 0.0 END) * 100.0 / COUNT() safety_rate, + MAX(safety_value) AS safety_value_max + FROM fact_safety_metrics + WHERE 1 = 1 + AND date = \'{date}\' + AND partition_name = \'{partition}_FACT_SAFETY_METRICS\' + GROUP BY 1 + ; + """ + + FACT_SAFETY_METRICS_BINNED = """ + WITH unfilter_bins AS ( + SELECT + ROW_NUMBER() OVER() - 51 AS lb, + ROW_NUMBER() OVER() - 50 AS ub + FROM fact_safety_metrics + ), bins AS ( + SELECT + lb, + ub + FROM unfilter_bins + WHERE 1=1 + AND lb >= -10 + AND ub <= 10 + ) + SELECT + CONCAT('[', CAST(bins.lb AS VARCHAR), ', ', CAST(bins.ub AS VARCHAR), ')') AS safety_value_bin, + COUNT() AS count + FROM bins, fact_safety_metrics fsm + WHERE 1 = 1 + AND fsm.date = \'{date}\' + AND fsm.partition_name = \'{partition}_FACT_SAFETY_METRICS\' + AND fsm.safety_value >= bins.lb + AND fsm.safety_value < bins.ub + GROUP BY 1 + ; + """ + + FACT_NETWORK_THROUGHPUT_AGG = """ + WITH min_time AS ( + SELECT + source_id, + id, + MIN(time_step) AS enter_time + FROM fact_vehicle_trace + WHERE 1 = 1 + AND date = \'{date}\' + AND partition_name = \'{partition}\' + AND {loc_filter} + GROUP BY 1, 2 + ), agg AS ( + SELECT + source_id, + COUNT(DISTINCT id) AS n_vehicles, + MAX(enter_time) - MIN(enter_time) AS total_time_seconds + FROM min_time + WHERE 1 = 1 + AND enter_time >= {start_filter} + GROUP BY 1 + ) + SELECT + source_id, + n_vehicles * 3600 / total_time_seconds AS throughput_per_hour + FROM agg + ;""" + + FACT_VEHICLE_FUEL_EFFICIENCY_AGG = """ + WITH sub_fact_vehicle_trace AS ( + SELECT + v.id, + v.source_id, + e.energy_model_id, + MAX(distance) - MIN(distance) AS distance_meters, + (MAX(e.time_step) - MIN(e.time_step)) / (COUNT(DISTINCT e.time_step) - 1) AS time_step_size_seconds, + SUM(e.power) AS power_watts + FROM fact_vehicle_trace v + JOIN fact_energy_trace AS e ON 1 = 1 + AND e.id = v.id + AND e.time_step = v.time_step + AND e.source_id = v.source_id + AND e.date = \'{date}\' + AND e.partition_name = \'{partition}_POWER_DEMAND_MODEL_DENOISED_ACCEL\' + AND e.energy_model_id = 'POWER_DEMAND_MODEL_DENOISED_ACCEL' + AND e.time_step >= {start_filter} + WHERE 1 = 1 + AND v.date = \'{date}\' + AND v.partition_name = \'{partition}\' + AND v.{loc_filter} + GROUP BY 1, 2, 3 + HAVING 1 = 1 + AND MAX(distance) - MIN(distance) > 10 + AND COUNT(DISTINCT e.time_step) > 10 + ) + SELECT + id, + source_id, + energy_model_id, + distance_meters, + power_watts * time_step_size_seconds AS energy_joules, + distance_meters / (power_watts * time_step_size_seconds) AS efficiency_meters_per_joules, + 33554.13 * distance_meters / (power_watts * time_step_size_seconds) AS efficiency_miles_per_gallon + FROM sub_fact_vehicle_trace + WHERE 1 = 1 + AND power_watts * time_step_size_seconds != 0 + ; + """ + + FACT_VEHICLE_FUEL_EFFICIENCY_BINNED = """ + WITH unfilter_bins AS ( + SELECT + ROW_NUMBER() OVER() - 1 AS lb, + ROW_NUMBER() OVER() AS ub + FROM fact_safety_metrics + ) bins AS ( + SELECT + lb, + ub + FROM unfilter_bins + WHERE 1=1 + AND lb >= 0 + AND ub <= 20 + ) + SELECT + CONCAT('[', CAST(bins.lb AS VARCHAR), ', ', CAST(bins.ub AS VARCHAR), ')') AS fuel_efficiency_bin, + COUNT() AS count + FROM bins, fact_vehicle_fuel_efficiency_agg agg + WHERE 1 = 1 + AND agg.date = \'{date}\' + AND agg.partition_name = \'{partition}_FACT_VEHICLE_FUEL_EFFICIENCY_AGG\' + AND agg.energy_model_id = 'POWER_DEMAND_MODEL_DENOISED_ACCEL' + AND 1000 * agg.efficiency_meters_per_joules >= bins.lb + AND 1000 * agg.efficiency_meters_per_joules < bins.ub + GROUP BY 1 + ; + """ + + FACT_NETWORK_FUEL_EFFICIENCY_AGG = """ + SELECT + source_id, + energy_model_id, + SUM(distance_meters) AS distance_meters, + SUM(energy_joules) AS energy_joules, + SUM(distance_meters) / SUM(energy_joules) AS efficiency_meters_per_joules, + 33554.13 * SUM(distance_meters) / SUM(energy_joules) AS efficiency_miles_per_gallon + FROM fact_vehicle_fuel_efficiency_agg + WHERE 1 = 1 + AND date = \'{date}\' + AND partition_name = \'{partition}_FACT_VEHICLE_FUEL_EFFICIENCY_AGG\' + AND energy_model_id = 'POWER_DEMAND_MODEL_DENOISED_ACCEL' + GROUP BY 1, 2 + HAVING 1=1 + AND SUM(energy_joules) != 0 + ;""" + + LEADERBOARD_CHART = """ + SELECT + t.source_id, + e.energy_model_id, + e.efficiency_meters_per_joules, + 33554.13 * e.efficiency_meters_per_joules AS efficiency_miles_per_gallon, + t.throughput_per_hour, + s.safety_rate, + s.safety_value_max + FROM fact_network_throughput_agg AS t + JOIN fact_network_fuel_efficiency_agg AS e ON 1 = 1 + AND e.date = \'{date}\' + AND e.partition_name = \'{partition}_FACT_NETWORK_FUEL_EFFICIENCY_AGG\' + AND t.source_id = e.source_id + AND e.energy_model_id = 'POWER_DEMAND_MODEL_DENOISED_ACCEL' + JOIN fact_safety_metrics_agg AS s ON 1 = 1 + AND s.date = \'{date}\' + AND s.partition_name = \'{partition}_FACT_SAFETY_METRICS_AGG\' + AND t.source_id = s.source_id + WHERE 1 = 1 + AND t.date = \'{date}\' + AND t.partition_name = \'{partition}_FACT_NETWORK_THROUGHPUT_AGG\' + ;""" + + FACT_NETWORK_INFLOWS_OUTFLOWS = """ + WITH min_max_time_step AS ( + SELECT + id, + source_id, + MIN(time_step) AS min_time_step, + MAX(time_step) AS max_time_step + FROM fact_vehicle_trace + WHERE 1 = 1 + AND date = \'{date}\' + AND partition_name = \'{partition}\' + AND {loc_filter} + GROUP BY 1, 2 + ), inflows AS ( + SELECT + CAST(min_time_step / 60 AS INTEGER) * 60 AS time_step, + source_id, + 60 * COUNT(DISTINCT id) AS inflow_rate + FROM min_max_time_step + WHERE 1 = 1 + AND min_time_step >= {start_filter} + AND min_time_step < {stop_filter} + GROUP BY 1, 2 + ), outflows AS ( + SELECT + CAST(max_time_step / 60 AS INTEGER) * 60 AS time_step, + source_id, + 60 * COUNT(DISTINCT id) AS outflow_rate + FROM min_max_time_step + WHERE 1 = 1 + AND max_time_step >= {start_filter} + AND max_time_step < {stop_filter} + GROUP BY 1, 2 + ) + SELECT + COALESCE(i.time_step, o.time_step) - MIN(COALESCE(i.time_step, o.time_step)) + OVER (PARTITION BY COALESCE(i.source_id, o.source_id) + ORDER BY COALESCE(i.time_step, o.time_step) ASC) AS time_step, + COALESCE(i.source_id, o.source_id) AS source_id, + COALESCE(i.inflow_rate, 0) AS inflow_rate, + COALESCE(o.outflow_rate, 0) AS outflow_rate + FROM inflows i + FULL OUTER JOIN outflows o ON 1 = 1 + AND i.time_step = o.time_step + AND i.source_id = o.source_id + ORDER BY time_step + ;""" + + FACT_NETWORK_METRICS_BY_DISTANCE_AGG = """ + WITH joined_trace AS ( + SELECT + vt.id, + vt.source_id, + vt.time_step, + vt.distance - FIRST_VALUE(vt.distance) + OVER (PARTITION BY vt.id, vt.source_id ORDER BY vt.time_step ASC) AS distance_meters, + energy_model_id, + et.speed, + et.acceleration, + vt.time_step - LAG(vt.time_step, 1) + OVER (PARTITION BY vt.id, vt.source_id ORDER BY vt.time_step ASC) AS sim_step, + SUM(power) + OVER (PARTITION BY vt.id, vt.source_id ORDER BY vt.time_step ASC + ROWS BETWEEN UNBOUNDED PRECEDING and CURRENT ROW) AS cumulative_power + FROM fact_vehicle_trace vt + JOIN fact_energy_trace et ON 1 = 1 + AND et.date = \'{date}\' + AND et.partition_name = \'{partition}_POWER_DEMAND_MODEL_DENOISED_ACCEL\' + AND vt.id = et.id + AND vt.source_id = et.source_id + AND vt.time_step = et.time_step + AND et.energy_model_id = 'POWER_DEMAND_MODEL_DENOISED_ACCEL' + WHERE 1 = 1 + AND vt.date = \'{date}\' + AND vt.partition_name = \'{partition}\' + AND vt.{loc_filter} + AND vt.time_step >= {start_filter} + ), cumulative_energy AS ( + SELECT + id, + source_id, + time_step, + distance_meters, + energy_model_id, + speed, + acceleration, + cumulative_power * sim_step AS energy_joules + FROM joined_trace + ), binned_cumulative_energy AS ( + SELECT + source_id, + CAST(distance_meters/10 AS INTEGER) * 10 AS distance_meters_bin, + AVG(speed) AS speed_avg, + AVG(speed) + STDDEV(speed) AS speed_upper_bound, + AVG(speed) - STDDEV(speed) AS speed_lower_bound, + AVG(acceleration) AS accel_avg, + AVG(acceleration) + STDDEV(acceleration) AS accel_upper_bound, + AVG(acceleration) - STDDEV(acceleration) AS accel_lower_bound, + AVG(energy_joules) AS cumulative_energy_avg, + AVG(energy_joules) + STDDEV(energy_joules) AS cumulative_energy_upper_bound, + AVG(energy_joules) - STDDEV(energy_joules) AS cumulative_energy_lower_bound + FROM cumulative_energy + GROUP BY 1, 2 + HAVING 1 = 1 + AND COUNT(DISTINCT time_step) > 1 + ), binned_energy_start_end AS ( + SELECT DISTINCT + source_id, + id, + CAST(distance_meters/10 AS INTEGER) * 10 AS distance_meters_bin, + FIRST_VALUE(energy_joules) + OVER (PARTITION BY id, CAST(distance_meters/10 AS INTEGER) * 10 + ORDER BY time_step ASC) AS energy_start, + LAST_VALUE(energy_joules) + OVER (PARTITION BY id, CAST(distance_meters/10 AS INTEGER) * 10 + ORDER BY time_step ASC) AS energy_end + FROM cumulative_energy + ), binned_energy AS ( + SELECT + source_id, + distance_meters_bin, + AVG(energy_end - energy_start) AS instantaneous_energy_avg, + AVG(energy_end - energy_start) + STDDEV(energy_end - energy_start) AS instantaneous_energy_upper_bound, + AVG(energy_end - energy_start) - STDDEV(energy_end - energy_start) AS instantaneous_energy_lower_bound + FROM binned_energy_start_end + GROUP BY 1, 2 + ) + SELECT + bce.source_id AS source_id, + bce.distance_meters_bin AS distance_meters_bin, + bce.cumulative_energy_avg, + bce.cumulative_energy_lower_bound, + bce.cumulative_energy_upper_bound, + bce.speed_avg, + bce.speed_upper_bound, + bce.speed_lower_bound, + bce.accel_avg, + bce.accel_upper_bound, + bce.accel_lower_bound, + COALESCE(be.instantaneous_energy_avg, 0) AS instantaneous_energy_avg, + COALESCE(be.instantaneous_energy_upper_bound, 0) AS instantaneous_energy_upper_bound, + COALESCE(be.instantaneous_energy_lower_bound, 0) AS instantaneous_energy_lower_bound + FROM binned_cumulative_energy bce + JOIN binned_energy be ON 1 = 1 + AND bce.source_id = be.source_id + AND bce.distance_meters_bin = be.distance_meters_bin + ORDER BY distance_meters_bin ASC + ;""" + + FACT_NETWORK_METRICS_BY_TIME_AGG = """ + WITH joined_trace AS ( + SELECT + vt.id, + vt.source_id, + vt.time_step - FIRST_VALUE(vt.time_step) + OVER (PARTITION BY vt.id, vt.source_id ORDER BY vt.time_step ASC) AS time_step, + energy_model_id, + et.speed, + et.acceleration, + vt.time_step - LAG(vt.time_step, 1) + OVER (PARTITION BY vt.id, vt.source_id ORDER BY vt.time_step ASC) AS sim_step, + SUM(power) + OVER (PARTITION BY vt.id, vt.source_id ORDER BY vt.time_step ASC + ROWS BETWEEN UNBOUNDED PRECEDING and CURRENT ROW) AS cumulative_power + FROM fact_vehicle_trace vt + JOIN fact_energy_trace et ON 1 = 1 + AND et.date = \'{date}\' + AND et.partition_name = \'{partition}_POWER_DEMAND_MODEL_DENOISED_ACCEL\' + AND vt.id = et.id + AND vt.source_id = et.source_id + AND vt.time_step = et.time_step + AND et.energy_model_id = 'POWER_DEMAND_MODEL_DENOISED_ACCEL' + WHERE 1 = 1 + AND vt.date = \'{date}\' + AND vt.partition_name = \'{partition}\' + AND vt.{loc_filter} + AND vt.time_step >= {start_filter} + ), cumulative_energy AS ( + SELECT + id, + source_id, + time_step, + energy_model_id, + speed, + acceleration, + cumulative_power * sim_step AS energy_joules + FROM joined_trace + ), binned_cumulative_energy AS ( + SELECT + source_id, + CAST(time_step/10 AS INTEGER) * 10 AS time_seconds_bin, + AVG(speed) AS speed_avg, + AVG(speed) + STDDEV(speed) AS speed_upper_bound, + AVG(speed) - STDDEV(speed) AS speed_lower_bound, + AVG(acceleration) AS accel_avg, + AVG(acceleration) + STDDEV(acceleration) AS accel_upper_bound, + AVG(acceleration) - STDDEV(acceleration) AS accel_lower_bound, + AVG(energy_joules) AS cumulative_energy_avg, + AVG(energy_joules) + STDDEV(energy_joules) AS cumulative_energy_upper_bound, + AVG(energy_joules) - STDDEV(energy_joules) AS cumulative_energy_lower_bound + FROM cumulative_energy + GROUP BY 1, 2 + HAVING 1 = 1 + AND COUNT(DISTINCT time_step) > 1 + ), binned_energy_start_end AS ( + SELECT DISTINCT + source_id, + id, + CAST(time_step/10 AS INTEGER) * 10 AS time_seconds_bin, + FIRST_VALUE(energy_joules) + OVER (PARTITION BY id, CAST(time_step/10 AS INTEGER) * 10 + ORDER BY time_step ASC) AS energy_start, + LAST_VALUE(energy_joules) + OVER (PARTITION BY id, CAST(time_step/10 AS INTEGER) * 10 + ORDER BY time_step ASC) AS energy_end + FROM cumulative_energy + ), binned_energy AS ( + SELECT + source_id, + time_seconds_bin, + AVG(energy_end - energy_start) AS instantaneous_energy_avg, + AVG(energy_end - energy_start) + STDDEV(energy_end - energy_start) AS instantaneous_energy_upper_bound, + AVG(energy_end - energy_start) - STDDEV(energy_end - energy_start) AS instantaneous_energy_lower_bound + FROM binned_energy_start_end + GROUP BY 1, 2 + ) + SELECT + bce.source_id AS source_id, + bce.time_seconds_bin AS time_seconds_bin, + bce.cumulative_energy_avg, + bce.cumulative_energy_lower_bound, + bce.cumulative_energy_upper_bound, + bce.speed_avg, + bce.speed_upper_bound, + bce.speed_lower_bound, + bce.accel_avg, + bce.accel_upper_bound, + bce.accel_lower_bound, + COALESCE(be.instantaneous_energy_avg, 0) AS instantaneous_energy_avg, + COALESCE(be.instantaneous_energy_upper_bound, 0) AS instantaneous_energy_upper_bound, + COALESCE(be.instantaneous_energy_lower_bound, 0) AS instantaneous_energy_lower_bound + FROM binned_cumulative_energy bce + JOIN binned_energy be ON 1 = 1 + AND bce.source_id = be.source_id + AND bce.time_seconds_bin = be.time_seconds_bin + ORDER BY time_seconds_bin ASC + ;""" + + FACT_VEHICLE_COUNTS_BY_TIME = """ + WITH counts AS ( + SELECT + vt.source_id, + vt.time_step, + COUNT(DISTINCT vt.id) AS vehicle_count + FROM fact_vehicle_trace vt + WHERE 1 = 1 + AND vt.date = \'{date}\' + AND vt.partition_name = \'{partition}\' + AND vt.{loc_filter} + AND vt.time_step >= {start_filter} + GROUP BY 1, 2 + ) + SELECT + source_id, + time_step - FIRST_VALUE(time_step) + OVER (PARTITION BY source_id ORDER BY time_step ASC) AS time_step, + vehicle_count + FROM counts + ; + """ + + LEADERBOARD_CHART_AGG = """ + WITH agg AS ( + SELECT + l.date AS submission_date, + m.submission_time, + l.source_id, + m.submitter_name, + m.strategy, + m.network, + m.is_baseline, + l.energy_model_id, + l.efficiency_meters_per_joules, + l.efficiency_miles_per_gallon, + l.throughput_per_hour, + l.safety_rate, + l.safety_value_max, + b.source_id AS baseline_source_id + FROM leaderboard_chart AS l, metadata_table AS m, baseline_table as b + WHERE 1 = 1 + AND l.source_id = m.source_id + AND m.network = b.network + AND (m.is_baseline='False' + OR (m.is_baseline='True' + AND m.source_id = b.source_id)) + ) + SELECT + agg.submission_date, + agg.source_id, + agg.submitter_name, + agg.strategy, + agg.network, + agg.is_baseline, + agg.energy_model_id, + agg.efficiency_meters_per_joules, + agg.efficiency_miles_per_gallon, + 100 * (1 - baseline.efficiency_miles_per_gallon / agg.efficiency_miles_per_gallon) AS percent_improvement, + agg.throughput_per_hour, + agg.safety_rate, + agg.safety_value_max + FROM agg + JOIN agg AS baseline ON 1 = 1 + AND agg.network = baseline.network + AND baseline.is_baseline = 'True' + AND agg.baseline_source_id = baseline.source_id + ORDER BY agg.submission_date, agg.submission_time ASC + ;""" + + FACT_TOP_SCORES = """ + WITH curr_max AS ( + SELECT + network, + submission_date, + 1000 * MAX(efficiency_meters_per_joules) + OVER (PARTITION BY network ORDER BY submission_date ASC + ROWS BETWEEN UNBOUNDED PRECEDING and CURRENT ROW) AS max_score + FROM leaderboard_chart_agg + WHERE 1 = 1 + AND is_baseline = 'False' + ), prev_max AS ( + SELECT + network, + submission_date, + LAG(max_score IGNORE NULLS, 1) OVER (PARTITION BY network ORDER BY submission_date ASC) AS max_score + FROM curr_max + ), unioned AS ( + SELECT * FROM curr_max + UNION ALL + SELECT * FROM prev_max + ) + SELECT DISTINCT * + FROM unioned + ORDER BY 1, 2, 3 + ;""" diff --git a/flow/data_pipeline/run_query.py b/flow/data_pipeline/run_query.py new file mode 100644 index 000000000..1eb802205 --- /dev/null +++ b/flow/data_pipeline/run_query.py @@ -0,0 +1,35 @@ +"""runner script for invoking query manually.""" +import argparse +from flow.data_pipeline.data_pipeline import AthenaQuery +from flow.data_pipeline.query import QueryStrings + +parser = argparse.ArgumentParser(prog="run_query", description="runs query on AWS Athena and stores the result to" + "a S3 location") +parser.add_argument("--run", type=str, nargs="+") +parser.add_argument("--result_location", type=str, nargs='?', default="s3://circles.data.pipeline/query-result/") +parser.add_argument("--partition", type=str, nargs='?', default="default") +parser.add_argument("--list_partitions", action="store_true") +parser.add_argument("--check_status", type=str, nargs='+') +parser.add_argument("--list_queries", action="store_true") +parser.add_argument("--test_query", nargs=1) + + +if __name__ == "__main__": + args = parser.parse_args() + queryEngine = AthenaQuery() + + if args.run: + execution_ids = [] + for query_name in args.run: + execution_ids.append(queryEngine.run_query(query_name, args.result_location, partition=args.partition)) + print(execution_ids) + if args.list_partitions: + print(queryEngine.existing_partitions) + if args.check_status: + status = dict() + for execution_id in args.check_status: + status[execution_id] = queryEngine.check_status(execution_id) + print(status) + if args.list_queries: + for q in QueryStrings: + print(q) diff --git a/flow/energy_models/base_energy.py b/flow/energy_models/base_energy.py new file mode 100644 index 000000000..ba5da5080 --- /dev/null +++ b/flow/energy_models/base_energy.py @@ -0,0 +1,62 @@ +"""Script containing the base vehicle energy class.""" +from abc import ABCMeta, abstractmethod + + +class BaseEnergyModel(metaclass=ABCMeta): + """Base energy model class. + + Calculate the instantaneous power consumption of a vehicle in + the network. It returns the power in Watts regardless of the + vehicle type: whether EV or Combustion Engine, Toyota Prius or Tacoma + or non-Toyota vehicles. Non-Toyota vehicles are set by default + to be an averaged-size vehicle. + + Note: road grade is included as an input parameter, but the + functional dependence on road grade is not yet implemented. + """ + + def __init__(self): + # 15 kilowatts = 1 gallon/hour conversion factor + self.conversion = 15e3 + + @abstractmethod + def get_instantaneous_power(self, accel, speed, grade): + """Calculate the instantaneous power consumption of a vehicle. + + Must be implemented by child classes. + + Parameters + ---------- + accel : float + Instantaneous acceleration of the vehicle + speed : float + Instantaneous speed of the vehicle + grade : float + Instantaneous road grade of the vehicle + + Returns + ------- + float + """ + pass + + def get_instantaneous_fuel_consumption(self, accel, speed, grade): + """Calculate the instantaneous fuel consumption of a vehicle. + + Fuel consumption is reported in gallons per hour, with the conversion + rate of 15kW = 1 gallon/hour. + + Parameters + ---------- + accel : float + Instantaneous acceleration of the vehicle + speed : float + Instantaneous speed of the vehicle + grade : float + Instantaneous road grade of the vehicle + + Returns + ------- + float + """ + return self.get_instantaneous_power(accel, speed, grade) / self.conversion diff --git a/flow/energy_models/power_demand.py b/flow/energy_models/power_demand.py new file mode 100644 index 000000000..d8cd918e0 --- /dev/null +++ b/flow/energy_models/power_demand.py @@ -0,0 +1,171 @@ +"""Script containing the vehicle power demand model energy classes.""" +from abc import ABCMeta, abstractmethod +import math +import numpy as np + +from flow.energy_models.base_energy import BaseEnergyModel + + +class PowerDemandModel(BaseEnergyModel, metaclass=ABCMeta): + """Vehicle Power Demand base energy model class. + + Calculate power consumption of a vehicle based on physics + derivation. Assumes some vehicle characteristics. The + power calculated here is the lower bound of the actual + power consumed by the vehicle plus a bilinear polynomial + function used as a correction factor. + """ + + def __init__(self, + mass=2041, + area=3.2, + rolling_res_coeff=0.0027, + aerodynamic_drag_coeff=0.4, + p1_correction=4598.7155, + p3_correction=975.12719): + super(PowerDemandModel, self).__init__() + + self.g = 9.807 + self.rho_air = 1.225 + self.gamma = 1 + self.mass = mass + self.cross_area = area + self.rolling_res_coeff = rolling_res_coeff + self.aerodynamic_drag_coeff = aerodynamic_drag_coeff + self.power_correction_coeffs = np.array([p1_correction, p3_correction]) + + def calculate_power_at_the_wheels(self, accel, speed, grade): + """Calculate the instantaneous power required. + + Parameters + ---------- + accel : float + Instantaneous acceleration of the vehicle + speed : float + Instantaneous speed of the vehicle + grade : float + Instantaneous road grade of the vehicle + Returns + ------- + float + """ + accel_slope_forces = self.mass * speed * ((np.heaviside(accel, 0.5) * (1 - self.gamma) + self.gamma)) * accel + accel_slope_forces += self.g * math.sin(grade) + rolling_friction = self.mass * self.g * self.rolling_res_coeff * speed + air_drag = 0.5 * self.rho_air * self.cross_area * self.aerodynamic_drag_coeff * speed**3 + power = accel_slope_forces + rolling_friction + air_drag + return power + + @abstractmethod + def get_regen_cap(self, accel, speed, grade): + """Set the maximum power retainable from regenerative braking. + + A negative regen cap is interpretted as a positive regenerative power. + + Parameters + ---------- + accel : float + Instantaneous acceleration of the vehicle + speed : float + Instantaneous speed of the vehicle + grade : float + Instantaneous road grade of the vehicle + + Returns + ------- + float + """ + pass + + def get_power_correction_factor(self, accel, speed, grade): + """Calculate the instantaneous power correction of a vehicle. + + Parameters + ---------- + accel : float + Instantaneous acceleration of the vehicle + speed : float + Instantaneous speed of the vehicle + grade : float + Instantaneous road grade of the vehicle + + Returns + ------- + float + """ + state_variables = np.array([accel, accel * speed]) + return max(0, np.dot(self.power_correction_coeffs, state_variables)) + + def get_instantaneous_power(self, accel, speed, grade): + """See parent class. + + Apply the regenerative braking cap to the modelled power demand. + """ + regen_cap = self.get_regen_cap(accel, speed, grade) + power_at_the_wheels = max(regen_cap, self.calculate_power_at_the_wheels(accel, speed, grade)) + correction_factor = self.get_power_correction_factor(accel, speed, grade) + return power_at_the_wheels + correction_factor + + +class PDMCombustionEngine(PowerDemandModel): + """Power Demand Model for a combustion engine vehicle.""" + + def __init__(self, + idle_coeff=3405.5481762, + linear_friction_coeff=83.123929917, + quadratic_friction_coeff=6.7650718327, + drag_coeff=0.7041355229, + p1_correction=4598.7155, + p3_correction=975.12719): + super(PDMCombustionEngine, self).__init__(p1_correction=p1_correction, p3_correction=p3_correction) + self.fuel_consumption_power_coeffs = np.array([idle_coeff, + linear_friction_coeff, + quadratic_friction_coeff, + drag_coeff]) + + def get_regen_cap(self, accel, speed, grade): + """See parent class.""" + return 0 + + def calculate_fuel_consumption_power(self, accel, speed, grade): + """Calculate the instantaneous power from a fitted function to Toyota Tacoma fuel consumption. + + Parameters + ---------- + accel : float + Instantaneous acceleration of the vehicle + speed : float + Instantaneous speed of the vehicle + grade : float + Instantaneous road grade of the vehicle + Returns + ------- + float + """ + state_variables = np.array([1, speed, speed**2, speed**3]) + power_0 = np.dot(self.fuel_consumption_power_coeffs, state_variables) + return max(self.mass * accel * speed + power_0, 0) + + def get_instantaneous_power(self, accel, speed, grade): + """See parent class.""" + fuel_consumption_power = self.calculate_fuel_consumption_power(accel, speed, grade) + power_correction_factor = self.get_power_correction_factor(accel, speed, grade) + return fuel_consumption_power + power_correction_factor + + +class PDMElectric(PowerDemandModel): + """Power Demand Model for an electric vehicle.""" + + def __init__(self, + mass=1663, + area=2.4, + rolling_res_coeff=0.007, + aerodynamic_drag_coeff=0.24): + super(PDMElectric, self).__init__(mass=mass, + area=area, + rolling_res_coeff=rolling_res_coeff, + aerodynamic_drag_coeff=aerodynamic_drag_coeff) + + def get_regen_cap(self, accel, speed, grade): + """See parent class.""" + return -2.8 * speed diff --git a/flow/energy_models/toyota_energy.py b/flow/energy_models/toyota_energy.py new file mode 100644 index 000000000..397610089 --- /dev/null +++ b/flow/energy_models/toyota_energy.py @@ -0,0 +1,66 @@ +"""Script containing the Toyota energy classes.""" +from abc import ABCMeta, abstractmethod +import dill as pickle +import boto3 +import os + +from flow.energy_models.base_energy import BaseEnergyModel + + +class ToyotaModel(BaseEnergyModel, metaclass=ABCMeta): + """Base Toyota Energy model class.""" + + def __init__(self, filename): + super(ToyotaModel, self).__init__() + + # download file from s3 bucket + s3 = boto3.client('s3') + s3.download_file('toyota.restricted', filename, 'temp.pkl') + + with open('temp.pkl', 'rb') as file: + try: + self.toyota_energy = pickle.load(file) + # delete pickle file + os.remove('temp.pkl') + except TypeError: + print('Must use Python version 3.6.8 to unpickle') + # delete pickle file + os.remove('temp.pkl') + raise + + @abstractmethod + def get_instantaneous_power(self, accel, speed, grade): + """See parent class.""" + pass + + +class PriusEnergy(ToyotaModel): + """Toyota Prius (EV) energy model class.""" + + def __init__(self, sim_step, soc=0.9): + super(PriusEnergy, self).__init__(filename='prius_ev.pkl') + self.sim_step = sim_step + self.soc = soc + + def get_instantaneous_power(self, accel, speed, grade): + """See parent class.""" + socdot = self.toyota_energy(self.soc, accel, speed, grade) + self.soc -= socdot * self.sim_step + # FIXME (Joy): convert socdot to power + return socdot + + +class TacomaEnergy(ToyotaModel): + """Toyota Tacoma energy model class.""" + + def __init__(self): + super(TacomaEnergy, self).__init__(filename='tacoma.pkl') + + def get_instantaneous_power(self, accel, speed, grade): + """See parent class.""" + return self.get_instantaneous_fuel_consumption(accel, speed, grade) * self.conversion + + def get_instantaneous_fuel_consumption(self, accel, speed, grade): + """See parent class.""" + fc = self.toyota_energy(accel, speed, grade) + return fc * 3600.0 / 3217.25 diff --git a/flow/envs/__init__.py b/flow/envs/__init__.py index 611ed3d9a..8bea3dd4f 100755 --- a/flow/envs/__init__.py +++ b/flow/envs/__init__.py @@ -11,6 +11,7 @@ from flow.envs.ring.wave_attenuation import WaveAttenuationEnv, \ WaveAttenuationPOEnv from flow.envs.merge import MergePOEnv +from flow.envs.straightroad_env import SingleStraightRoad from flow.envs.test import TestEnv # deprecated classes whose names have changed @@ -37,6 +38,7 @@ 'BottleneckDesiredVelocityEnv', 'TestEnv', 'BayBridgeEnv', + 'SingleStraightRoad', # deprecated classes 'BottleNeckAccelEnv', 'DesiredVelocityEnv', diff --git a/flow/envs/base.py b/flow/envs/base.py index c4462e8c8..c8df037b0 100644 --- a/flow/envs/base.py +++ b/flow/envs/base.py @@ -26,6 +26,8 @@ from flow.core.kernel import Kernel from flow.utils.exceptions import FatalFlowError +from flow.data_pipeline.data_pipeline import get_extra_info + class Env(gym.Env, metaclass=ABCMeta): """Base environment class. @@ -149,6 +151,13 @@ def __init__(self, self.state = None self.obs_var_labels = [] + # number of training iterations (used by the rllib training procedure) + self._num_training_iters = 0 + + # track IDs that have ever been observed in the system + self._observed_ids = set() + self._observed_rl_ids = set() + # simulation step size self.sim_step = sim_params.sim_step @@ -323,6 +332,11 @@ def step(self, rl_actions): contains other diagnostic information from the previous action """ for _ in range(self.env_params.sims_per_step): + # This tracks vehicles that have appeared during warmup steps + if self.time_counter <= self.env_params.sims_per_step * self.env_params.warmup_steps: + self._observed_ids.update(self.k.vehicle.get_ids()) + self._observed_rl_ids.update(self.k.vehicle.get_rl_ids()) + self.time_counter += 1 self.step_counter += 1 @@ -397,8 +411,7 @@ def step(self, rl_actions): # test if the environment should terminate due to a collision or the # time horizon being met done = (self.time_counter >= self.env_params.sims_per_step * - (self.env_params.warmup_steps + self.env_params.horizon) - or crash) + (self.env_params.warmup_steps + self.env_params.horizon)) # compute the info for each agent infos = {} @@ -431,6 +444,10 @@ def reset(self): # reset the time counter self.time_counter = 0 + # reset the observed ids + self._observed_ids = set() + self._observed_rl_ids = set() + # Now that we've passed the possibly fake init steps some rl libraries # do, we can feel free to actually render things if self.should_render: @@ -554,6 +571,14 @@ def reset(self): # perform (optional) warm-up steps before training for _ in range(self.env_params.warmup_steps): observation, _, _, _ = self.step(rl_actions=None) + # collect data for pipeline during the warmup period + try: + extra_info, source_id, run_id = self.pipeline_params + veh_ids = self.k.vehicle.get_ids() + get_extra_info(self.k.vehicle, extra_info, veh_ids, source_id, run_id) + # In case the attribute `pipeline_params` if not added to this instance + except AttributeError: + pass # render a frame self.render(reset=True) @@ -802,3 +827,7 @@ def pyglet_render(self): sight = self.renderer.get_sight( orientation, id) self.sights.append(sight) + + def set_iteration_num(self): + """Increment the number of training iterations.""" + self._num_training_iters += 1 diff --git a/flow/envs/multiagent/__init__.py b/flow/envs/multiagent/__init__.py index f7889591d..8c5552580 100644 --- a/flow/envs/multiagent/__init__.py +++ b/flow/envs/multiagent/__init__.py @@ -10,7 +10,8 @@ from flow.envs.multiagent.traffic_light_grid import MultiTrafficLightGridPOEnv from flow.envs.multiagent.highway import MultiAgentHighwayPOEnv from flow.envs.multiagent.merge import MultiAgentMergePOEnv -from flow.envs.multiagent.i210 import I210MultiEnv +from flow.envs.multiagent.i210 import I210MultiEnv, MultiStraightRoad + __all__ = [ 'MultiEnv', @@ -21,5 +22,6 @@ 'MultiAgentAccelPOEnv', 'MultiAgentWaveAttenuationPOEnv', 'MultiAgentMergePOEnv', - 'I210MultiEnv' + 'I210MultiEnv', + 'MultiStraightRoad', ] diff --git a/flow/envs/multiagent/base.py b/flow/envs/multiagent/base.py index ec95474c6..e708a6ce6 100644 --- a/flow/envs/multiagent/base.py +++ b/flow/envs/multiagent/base.py @@ -49,6 +49,10 @@ def step(self, rl_actions): contains other diagnostic information from the previous action """ for _ in range(self.env_params.sims_per_step): + if self.time_counter <= self.env_params.sims_per_step * self.env_params.warmup_steps: + self._observed_ids.update(self.k.vehicle.get_ids()) + self._observed_rl_ids.update(self.k.vehicle.get_rl_ids()) + self.time_counter += 1 self.step_counter += 1 @@ -103,6 +107,7 @@ def step(self, rl_actions): # stop collecting new simulation steps if there is a collision if crash: + print('A CRASH! A CRASH!!!!!! AAAAAAAAAH!!!!!') break states = self.get_state() @@ -122,10 +127,11 @@ def step(self, rl_actions): else: reward = self.compute_reward(rl_actions, fail=crash) - for rl_id in self.k.vehicle.get_arrived_rl_ids(): - done[rl_id] = True - reward[rl_id] = 0 - states[rl_id] = np.zeros(self.observation_space.shape[0]) + if self.env_params.done_at_exit: + for rl_id in self.k.vehicle.get_arrived_rl_ids(self.env_params.sims_per_step): + done[rl_id] = True + reward[rl_id] = 0 + states[rl_id] = -1 * np.ones(self.observation_space.shape[0]) return states, reward, done, infos @@ -148,12 +154,17 @@ def reset(self, new_inflow_rate=None): # reset the time counter self.time_counter = 0 + # reset the observed ids + self._observed_ids = set() + self._observed_rl_ids = set() + # Now that we've passed the possibly fake init steps some rl libraries # do, we can feel free to actually render things if self.should_render: self.sim_params.render = True # got to restart the simulation to make it actually display anything self.restart_simulation(self.sim_params) + self.should_render = False # warn about not using restart_instance when using inflows if len(self.net_params.inflows.get()) > 0 and \ diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 409aeb14f..004208cb4 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -3,19 +3,27 @@ from gym.spaces import Box import numpy as np -from flow.core.rewards import average_velocity +from flow.core.rewards import instantaneous_mpg from flow.envs.multiagent.base import MultiEnv # largest number of lanes on any given edge in the network MAX_LANES = 6 +SPEED_SCALE = 50 +HEADWAY_SCALE = 1000 ADDITIONAL_ENV_PARAMS = { # maximum acceleration for autonomous vehicles, in m/s^2 "max_accel": 1, # maximum deceleration for autonomous vehicles, in m/s^2 "max_decel": 1, - # whether we use an obs space that contains adjacent lane info or just the lead obs + # whether we use an obs space that contains adjacent lane info or just the + # lead obs "lead_obs": True, + # whether the reward should come from local vehicles instead of global + # rewards + "local_reward": True, + # desired velocity + "target_velocity": 25 } @@ -59,6 +67,37 @@ class I210MultiEnv(MultiEnv): def __init__(self, env_params, sim_params, network, simulator='traci'): super().__init__(env_params, sim_params, network, simulator) self.lead_obs = env_params.additional_params.get("lead_obs") + self.reroute_on_exit = env_params.additional_params.get("reroute_on_exit") + self.max_lanes = MAX_LANES + self.num_enter_lanes = 5 + self.entrance_edge = "ghost0" + self.exit_edge = "119257908#2" + self.control_range = env_params.additional_params.get('control_range', None) + self.no_control_edges = env_params.additional_params.get('no_control_edges', []) + self.mpg_reward = env_params.additional_params["mpg_reward"] + self.look_back_length = env_params.additional_params["look_back_length"] + + # whether to add a slight reward for opening up a gap that will be annealed out N iterations in + self.headway_curriculum = env_params.additional_params["headway_curriculum"] + # how many timesteps to anneal the headway curriculum over + self.headway_curriculum_iters = env_params.additional_params["headway_curriculum_iters"] + self.headway_reward_gain = env_params.additional_params["headway_reward_gain"] + self.min_time_headway = env_params.additional_params["min_time_headway"] + + # whether to add a slight reward for opening up a gap that will be annealed out N iterations in + self.speed_curriculum = env_params.additional_params["speed_curriculum"] + # how many timesteps to anneal the headway curriculum over + self.speed_curriculum_iters = env_params.additional_params["speed_curriculum_iters"] + self.speed_reward_gain = env_params.additional_params["speed_reward_gain"] + self.leader = [] + + # penalize stops + self.penalize_stops = env_params.additional_params["penalize_stops"] + self.stop_penalty = env_params.additional_params["stop_penalty"] + + # penalize accel + self.penalize_accel = env_params.additional_params.get("penalize_accel", False) + self.accel_penalty = env_params.additional_params["accel_penalty"] @property def observation_space(self): @@ -74,8 +113,8 @@ def observation_space(self): # speed, dist to ego vehicle, binary value which is 1 if the vehicle is # an AV else: - leading_obs = 3 * MAX_LANES - follow_obs = 3 * MAX_LANES + leading_obs = 3 * self.max_lanes + follow_obs = 3 * self.max_lanes # speed and lane self_obs = 2 @@ -99,90 +138,202 @@ def action_space(self): def _apply_rl_actions(self, rl_actions): """See class definition.""" # in the warmup steps, rl_actions is None + id_list = [] + accel_list = [] if rl_actions: for rl_id, actions in rl_actions.items(): accel = actions[0] + id_list.append(rl_id) + accel_list.append(accel) + self.k.vehicle.apply_acceleration(id_list, accel_list) - # lane_change_softmax = np.exp(actions[1:4]) - # lane_change_softmax /= np.sum(lane_change_softmax) - # lane_change_action = np.random.choice([-1, 0, 1], - # p=lane_change_softmax) + def in_control_range(self, veh_id): + """Return if a veh_id is on an edge that is allowed to be controlled. - self.k.vehicle.apply_acceleration(rl_id, accel) - # self.k.vehicle.apply_lane_change(rl_id, lane_change_action) + If control range is defined it uses control range, otherwise it searches over a set of edges + """ + return (self.control_range and self.control_range[1] > + self.k.vehicle.get_x_by_id(veh_id) > self.control_range[0]) or \ + (len(self.no_control_edges) > 0 and self.k.vehicle.get_edge(veh_id) not in + self.no_control_edges) def get_state(self): """See class definition.""" + valid_ids = [rl_id for rl_id in self.k.vehicle.get_rl_ids() if self.in_control_range(rl_id)] if self.lead_obs: veh_info = {} - for rl_id in self.k.vehicle.get_rl_ids(): + for rl_id in valid_ids: speed = self.k.vehicle.get_speed(rl_id) - headway = self.k.vehicle.get_headway(rl_id) - lead_speed = self.k.vehicle.get_speed(self.k.vehicle.get_leader(rl_id)) - if lead_speed == -1001: - lead_speed = 0 - veh_info.update({rl_id: np.array([speed / 50.0, headway / 1000.0, lead_speed / 50.0])}) + lead_id = self.k.vehicle.get_leader(rl_id) + if lead_id in ["", None]: + # in case leader is not visible + lead_speed = SPEED_SCALE + headway = HEADWAY_SCALE + else: + lead_speed = self.k.vehicle.get_speed(lead_id) + headway = self.k.vehicle.get_headway(rl_id) + veh_info.update({rl_id: np.array([speed / SPEED_SCALE, headway / HEADWAY_SCALE, + lead_speed / SPEED_SCALE])}) else: veh_info = {rl_id: np.concatenate((self.state_util(rl_id), self.veh_statistics(rl_id))) - for rl_id in self.k.vehicle.get_rl_ids()} + for rl_id in valid_ids} return veh_info def compute_reward(self, rl_actions, **kwargs): - # TODO(@evinitsky) we need something way better than this. Something that adds - # in notions of local reward """See class definition.""" # in the warmup steps if rl_actions is None: return {} rewards = {} - for rl_id in self.k.vehicle.get_rl_ids(): - if self.env_params.evaluate: - # reward is speed of vehicle if we are in evaluation mode - reward = self.k.vehicle.get_speed(rl_id) - elif kwargs['fail']: - # reward is 0 if a collision occurred - reward = 0 - else: - # reward high system-level velocities - cost1 = average_velocity(self, fail=kwargs['fail']) + valid_ids = [rl_id for rl_id in self.k.vehicle.get_rl_ids() if self.in_control_range(rl_id)] + valid_human_ids = [veh_id for veh_id in self.k.vehicle.get_ids() if self.in_control_range(veh_id)] + + if self.env_params.additional_params["local_reward"]: + des_speed = self.env_params.additional_params["target_velocity"] + for rl_id in valid_ids: + rewards[rl_id] = 0 + if self.mpg_reward: + rewards[rl_id] = instantaneous_mpg(self, rl_id, gain=1.0) / 100.0 + follow_id = rl_id + for i in range(self.look_back_length): + follow_id = self.k.vehicle.get_follower(follow_id) + if follow_id not in ["", None]: + rewards[rl_id] += instantaneous_mpg(self, follow_id, gain=1.0) / 100.0 + else: + break + else: + follow_id = rl_id + for i in range(self.look_back_length + 1): + if follow_id not in ["", None]: + follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(follow_id)) + reward = (des_speed - min(np.abs(follow_speed - des_speed), des_speed)) ** 2 + reward /= ((des_speed ** 2) * self.look_back_length) + rewards[rl_id] += reward + else: + break + follow_id = self.k.vehicle.get_follower(follow_id) - # penalize small time headways - cost2 = 0 - t_min = 1 # smallest acceptable time headway - - lead_id = self.k.vehicle.get_leader(rl_id) + else: + if self.mpg_reward: + reward = np.nan_to_num(instantaneous_mpg(self, valid_human_ids, gain=1.0)) / 100.0 + else: + speeds = self.k.vehicle.get_speed(valid_human_ids) + des_speed = self.env_params.additional_params["target_velocity"] + # rescale so the critic can estimate it quickly + if self.reroute_on_exit: + reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) + for speed in speeds]) / des_speed) + else: + reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 + for speed in speeds]) / (des_speed ** 2)) + rewards = {rl_id: reward for rl_id in valid_ids} + + # curriculum over time-gaps + if self.headway_curriculum and self._num_training_iters <= self.headway_curriculum_iters: + t_min = self.min_time_headway # smallest acceptable time headway + for veh_id, rew in rewards.items(): + lead_id = self.k.vehicle.get_leader(veh_id) + penalty = 0 if lead_id not in ["", None] \ - and self.k.vehicle.get_speed(rl_id) > 0: + and self.k.vehicle.get_speed(veh_id) > 0: t_headway = max( - self.k.vehicle.get_headway(rl_id) / - self.k.vehicle.get_speed(rl_id), 0) - cost2 += min((t_headway - t_min) / t_min, 0) - - # weights for cost1, cost2, and cost3, respectively - eta1, eta2 = 1.00, 0.10 - - reward = max(eta1 * cost1 + eta2 * cost2, 0) - - rewards[rl_id] = reward + self.k.vehicle.get_headway(veh_id) / + self.k.vehicle.get_speed(veh_id), 0) + scaling_factor = max(0, 1 - self._num_training_iters / self.headway_curriculum_iters) + penalty += scaling_factor * self.headway_reward_gain * min((t_headway - t_min) / t_min, 0) + + rewards[veh_id] += penalty + + if self.speed_curriculum and self._num_training_iters <= self.speed_curriculum_iters: + des_speed = self.env_params.additional_params["target_velocity"] + + for veh_id, rew in rewards.items(): + speed = self.k.vehicle.get_speed(veh_id) + speed_reward = 0.0 + follow_id = veh_id + for i in range(self.look_back_length): + follow_id = self.k.vehicle.get_follower(follow_id) + if follow_id not in ["", None]: + if self.reroute_on_exit: + speed_reward += (des_speed - np.abs(speed - des_speed)) / des_speed + else: + speed_reward += ((des_speed - np.abs(speed - des_speed)) ** 2) / (des_speed ** 2) + else: + break + scaling_factor = max(0, 1 - self._num_training_iters / self.speed_curriculum_iters) + + rewards[veh_id] += speed_reward * scaling_factor * self.speed_reward_gain + + for veh_id in rewards.keys(): + speed = self.k.vehicle.get_speed(veh_id) + if self.penalize_stops: + if speed < 1.0: + rewards[veh_id] -= self.stop_penalty + if self.penalize_accel and veh_id in self.k.vehicle.previous_speeds: + prev_speed = self.k.vehicle.get_previous_speed(veh_id) + abs_accel = abs(speed - prev_speed) / self.sim_step + rewards[veh_id] -= abs_accel * self.accel_penalty + + # print('time to get reward is ', time() - t) return rewards def additional_command(self): """See parent class. - Define which vehicles are observed for visualization purposes. + Define which vehicles are observed for visualization purposes. Additionally, optionally reroute vehicles + back once they have exited. """ + super().additional_command() # specify observed vehicles for rl_id in self.k.vehicle.get_rl_ids(): # leader lead_id = self.k.vehicle.get_leader(rl_id) if lead_id: self.k.vehicle.set_observed(lead_id) - # follower - follow_id = self.k.vehicle.get_follower(rl_id) - if follow_id: - self.k.vehicle.set_observed(follow_id) + + if self.reroute_on_exit and self.time_counter >= self.env_params.sims_per_step * self.env_params.warmup_steps \ + and not self.env_params.evaluate: + veh_ids = self.k.vehicle.get_ids() + edges = self.k.vehicle.get_edge(veh_ids) + valid_lanes = list(range(self.num_enter_lanes)) + for veh_id, edge in zip(veh_ids, edges): + if edge == "": + continue + if edge[0] == ":": # center edge + continue + # on the exit edge, near the end, and is the vehicle furthest along + if edge == self.exit_edge and \ + (self.k.vehicle.get_position(veh_id) > self.k.network.edge_length(self.exit_edge) - 100) \ + and self.k.vehicle.get_leader(veh_id) is None: + type_id = self.k.vehicle.get_type(veh_id) + # remove the vehicle + self.k.vehicle.remove(veh_id) + index = np.random.randint(low=0, high=len(valid_lanes)) + lane = valid_lanes[index] + del valid_lanes[index] + # reintroduce it at the start of the network + # Note, the position is 20 so you are not overlapping with the inflow car that is being removed. + # this allows the vehicle to be immediately inserted. + try: + self.k.vehicle.add( + veh_id=veh_id, + edge=self.entrance_edge, + type_id=str(type_id), + lane=str(lane), + pos="20.0", + speed="23.0") + except Exception as e: + print(e) + if len(valid_lanes) == 0: + break + + departed_ids = self.k.vehicle.get_departed_ids() + if isinstance(departed_ids, tuple) and len(departed_ids) > 0: + for veh_id in departed_ids: + if veh_id not in self._observed_ids: + self.k.vehicle.remove(veh_id) def state_util(self, rl_id): """Return an array of headway, tailway, leader speed, follower speed. @@ -223,3 +374,49 @@ def veh_statistics(self, rl_id): speed = self.k.vehicle.get_speed(rl_id) / 100.0 lane = (self.k.vehicle.get_lane(rl_id) + 1) / 10.0 return np.array([speed, lane]) + + def step(self, rl_actions): + """See parent class for more details; add option to reroute vehicles.""" + state, reward, done, info = super().step(rl_actions) + # handle the edge case where a vehicle hasn't been put back when the rollout terminates + if self.reroute_on_exit and done['__all__']: + for rl_id in self.observed_rl_ids: + if rl_id not in state.keys(): + done[rl_id] = True + reward[rl_id] = 0 + state[rl_id] = -1 * np.ones(self.observation_space.shape[0]) + else: + # you have to catch the vehicles on the exit edge, they have not yet + # recieved a done when the env terminates + if done['__all__']: + on_exit_edge = [rl_id for rl_id in self.k.vehicle.get_rl_ids() + if self.k.vehicle.get_edge(rl_id) == self.exit_edge] + for rl_id in on_exit_edge: + done[rl_id] = True + reward[rl_id] = 0 + state[rl_id] = -1 * np.ones(self.observation_space.shape[0]) + + return state, reward, done, info + + +class MultiStraightRoad(I210MultiEnv): + """Partially observable multi-agent environment for a straight road. Look at superclass for more information.""" + + def __init__(self, env_params, sim_params, network, simulator): + super().__init__(env_params, sim_params, network, simulator) + self.num_enter_lanes = 1 + self.entrance_edge = self.network.routes['highway_0'][0][0][0] + self.exit_edge = self.network.routes['highway_0'][0][0][-1] + + def _apply_rl_actions(self, rl_actions): + """See class definition.""" + # in the warmup steps, rl_actions is None + if rl_actions: + rl_ids = [] + accels = [] + for rl_id, actions in rl_actions.items(): + accels.append(actions[0]) + rl_ids.append(rl_id) + + # prevent the AV from blocking the entrance + self.k.vehicle.apply_acceleration(rl_ids, accels) diff --git a/flow/envs/straightroad_env.py b/flow/envs/straightroad_env.py new file mode 100644 index 000000000..92fbb855b --- /dev/null +++ b/flow/envs/straightroad_env.py @@ -0,0 +1,231 @@ +"""Environment for training vehicles to reduce congestion in the I210.""" + +from gym.spaces import Box +import numpy as np + +from flow.envs.base import Env + +# largest number of lanes on any given edge in the network +MAX_LANES = 6 +MAX_NUM_VEHS = 8 +SPEED_SCALE = 50 +HEADWAY_SCALE = 1000 + +ADDITIONAL_ENV_PARAMS = { + # maximum acceleration for autonomous vehicles, in m/s^2 + "max_accel": 1, + # maximum deceleration for autonomous vehicles, in m/s^2 + "max_decel": 1, + # whether we use an obs space that contains adjacent lane info or just the lead obs + "lead_obs": True, + # whether the reward should come from local vehicles instead of global rewards + "local_reward": True, + # if the environment terminates once a wave has occurred + "terminate_on_wave": False, + # the environment is not allowed to terminate below this horizon length + 'wave_termination_horizon': 500, + # the speed below which we consider a wave to have occured + 'wave_termination_speed': 10.0 +} + + +class I210SingleEnv(Env): + """Partially observable single-agent environment for the I-210 subnetworks. + + The policy is shared among the agents, so there can be a non-constant + number of RL vehicles throughout the simulation. + Required from env_params: + * max_accel: maximum acceleration for autonomous vehicles, in m/s^2 + * max_decel: maximum deceleration for autonomous vehicles, in m/s^2 + The following states, actions and rewards are considered for one autonomous + vehicle only, as they will be computed in the same way for each of them. + States + The observation consists of the speeds and bumper-to-bumper headways of + the vehicles immediately preceding and following autonomous vehicles in + all of the preceding lanes as well, a binary value indicating which of + these vehicles is autonomous, and the speed of the autonomous vehicle. + Missing vehicles are padded with zeros. + Actions + The action consists of an acceleration, bound according to the + environment parameters, as well as three values that will be converted + into probabilities via softmax to decide of a lane change (left, none + or right). NOTE: lane changing is currently not enabled. It's a TODO. + Rewards + The reward function encourages proximity of the system-level velocity + to a desired velocity specified in the environment parameters, while + slightly penalizing small time headways among autonomous vehicles. + Termination + A rollout is terminated if the time horizon is reached or if two + vehicles collide into one another. + """ + + def __init__(self, env_params, sim_params, network, simulator='traci'): + super().__init__(env_params, sim_params, network, simulator) + self.lead_obs = env_params.additional_params.get("lead_obs") + self.max_lanes = MAX_LANES + self.total_reward = 0.0 + + @property + def observation_space(self): + """See class definition.""" + # speed, speed of leader, headway + if self.lead_obs: + return Box( + low=-float('inf'), + high=float('inf'), + shape=(3 * MAX_NUM_VEHS,), + dtype=np.float32 + ) + # speed, dist to ego vehicle, binary value which is 1 if the vehicle is + # an AV + else: + leading_obs = 3 * self.max_lanes + follow_obs = 3 * self.max_lanes + + # speed and lane + self_obs = 2 + + return Box( + low=-float('inf'), + high=float('inf'), + shape=(leading_obs + follow_obs + self_obs,), + dtype=np.float32 + ) + + @property + def action_space(self): + """See class definition.""" + return Box( + low=-np.abs(self.env_params.additional_params['max_decel']), + high=self.env_params.additional_params['max_accel'], + shape=(1 * MAX_NUM_VEHS,), # (4,), + dtype=np.float32) + + def _apply_rl_actions(self, rl_actions): + """See class definition.""" + # in the warmup steps, rl_actions is None + if rl_actions is not None: + accels = [] + veh_ids = [] + rl_ids = self.get_sorted_rl_ids() + + for i, rl_id in enumerate(self.rl_id_list): + accels.append(rl_actions[i]) + veh_ids.append(rl_id) + + # lane_change_softmax = np.exp(actions[1:4]) + # lane_change_softmax /= np.sum(lane_change_softmax) + # lane_change_action = np.random.choice([-1, 0, 1], + # p=lane_change_softmax) + + self.k.vehicle.apply_acceleration(rl_ids, accels) + + def get_state(self): + """See class definition.""" + rl_ids = self.get_sorted_rl_ids() + self.rl_id_list = rl_ids + veh_info = np.zeros(self.observation_space.shape[0]) + per_vehicle_obs = 3 + for i, rl_id in enumerate(rl_ids): + speed = self.k.vehicle.get_speed(rl_id) + lead_id = self.k.vehicle.get_leader(rl_id) + if lead_id in ["", None]: + # in case leader is not visible + lead_speed = SPEED_SCALE + headway = HEADWAY_SCALE + else: + lead_speed = self.k.vehicle.get_speed(lead_id) + headway = self.k.vehicle.get_headway(rl_id) + veh_info[i * per_vehicle_obs: (i + 1) * per_vehicle_obs] = [speed / SPEED_SCALE, + headway / HEADWAY_SCALE, + lead_speed / SPEED_SCALE] + return veh_info + + def compute_reward(self, rl_actions, **kwargs): + """See class definition.""" + # in the warmup steps + if rl_actions is None: + return {} + + rl_ids = self.get_sorted_rl_ids() + + des_speed = self.env_params.additional_params["target_velocity"] + rewards = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 + for speed in self.k.vehicle.get_speed(rl_ids)])) / (des_speed ** 2) + return rewards + + def get_sorted_rl_ids(self): + """Return the MAX_NUM_VEHS closest to the exit.""" + rl_ids = self.k.vehicle.get_rl_ids() + rl_ids = sorted(rl_ids, key=lambda veh_id: self.k.vehicle.get_x_by_id(veh_id)) + rl_ids = rl_ids[-MAX_NUM_VEHS:] + return rl_ids + + def additional_command(self): + """Define which vehicles are observed for visualization purposes.""" + # specify observed vehicles + for rl_id in self.k.vehicle.get_rl_ids(): + # leader + lead_id = self.k.vehicle.get_leader(rl_id) + if lead_id: + self.k.vehicle.set_observed(lead_id) + + def state_util(self, rl_id): + """Return an array of headway, tailway, leader speed, follower speed. + + Also return a 1 if leader is rl 0 otherwise, a 1 if follower is rl 0 otherwise. + If there are fewer than MAX_LANES the extra + entries are filled with -1 to disambiguate from zeros. + """ + veh = self.k.vehicle + lane_headways = veh.get_lane_headways(rl_id).copy() + lane_tailways = veh.get_lane_tailways(rl_id).copy() + lane_leader_speed = veh.get_lane_leaders_speed(rl_id).copy() + lane_follower_speed = veh.get_lane_followers_speed(rl_id).copy() + leader_ids = veh.get_lane_leaders(rl_id).copy() + follower_ids = veh.get_lane_followers(rl_id).copy() + rl_ids = self.k.vehicle.get_rl_ids() + is_leader_rl = [1 if l_id in rl_ids else 0 for l_id in leader_ids] + is_follow_rl = [1 if f_id in rl_ids else 0 for f_id in follower_ids] + diff = MAX_LANES - len(is_leader_rl) + if diff > 0: + # the minus 1 disambiguates missing cars from missing lanes + lane_headways += diff * [-1] + lane_tailways += diff * [-1] + lane_leader_speed += diff * [-1] + lane_follower_speed += diff * [-1] + is_leader_rl += diff * [-1] + is_follow_rl += diff * [-1] + lane_headways = np.asarray(lane_headways) / 1000 + lane_tailways = np.asarray(lane_tailways) / 1000 + lane_leader_speed = np.asarray(lane_leader_speed) / 100 + lane_follower_speed = np.asarray(lane_follower_speed) / 100 + return np.concatenate((lane_headways, lane_tailways, lane_leader_speed, + lane_follower_speed, is_leader_rl, + is_follow_rl)) + + def veh_statistics(self, rl_id): + """Return speed, edge information, and x, y about the vehicle itself.""" + speed = self.k.vehicle.get_speed(rl_id) / 100.0 + lane = (self.k.vehicle.get_lane(rl_id) + 1) / 10.0 + return np.array([speed, lane]) + + +class SingleStraightRoad(I210SingleEnv): + """Partially observable multi-agent environment for a straight road. Look at superclass for more information.""" + + def __init__(self, env_params, sim_params, network, simulator): + super().__init__(env_params, sim_params, network, simulator) + self.max_lanes = 1 + + def step(self, rl_actions): + """See parent class.""" + obs, rew, done, info = super().step(rl_actions) + mean_speed = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) + if self.env_params.additional_params['terminate_on_wave'] and \ + mean_speed < self.env_params.additional_params['wave_termination_speed'] \ + and self.time_counter > self.env_params.additional_params['wave_termination_horizon'] \ + and len(self.k.vehicle.get_ids()) > 0: + done = True + + return obs, rew, done, info diff --git a/flow/utils/rllib.py b/flow/utils/rllib.py index 80193c22b..fc3229e52 100644 --- a/flow/utils/rllib.py +++ b/flow/utils/rllib.py @@ -96,6 +96,8 @@ def get_flow_params(config): flow_params = json.loads(config['env_config']['flow_params']) else: flow_params = json.load(open(config, 'r')) + if 'env_config' in flow_params: + flow_params = json.loads(flow_params['env_config']['flow_params']) # reinitialize the vehicles class from stored data veh = VehicleParams() @@ -146,6 +148,13 @@ def get_flow_params(config): if flow_params["net"]["inflows"]: net.inflows.__dict__ = flow_params["net"]["inflows"].copy() + if net.template is not None and len(net.template) > 0: + dirname = os.getcwd() + filename = os.path.join(dirname, '../../examples') + split = net.template.split('examples')[1][1:] + path = os.path.abspath(os.path.join(filename, split)) + net.template = path + env = EnvParams() env.__dict__ = flow_params["env"].copy() diff --git a/flow/visualize/i210_replay.py b/flow/visualize/i210_replay.py new file mode 100644 index 000000000..4c7498413 --- /dev/null +++ b/flow/visualize/i210_replay.py @@ -0,0 +1,575 @@ +"""Transfer and replay for i210 environment.""" +import argparse +from datetime import datetime, timezone +from collections import defaultdict +from copy import deepcopy +import numpy as np +import json +import os +import pytz +import subprocess +import time + +import ray + +try: + from ray.rllib.agents.agent import get_agent_class +except ImportError: + from ray.rllib.agents.registry import get_agent_class +from ray.tune.registry import register_env + +from flow.core.util import emission_to_csv, ensure_dir +from flow.core.rewards import veh_energy_consumption +from flow.utils.registry import make_create_env +from flow.utils.rllib import get_flow_params +from flow.utils.rllib import get_rllib_config +from flow.utils.rllib import get_rllib_pkl +from flow.utils.rllib import FlowParamsEncoder + +from flow.visualize.transfer.util import inflows_range +from flow.visualize.plot_custom_callables import plot_trip_distribution + +from examples.exp_configs.rl.multiagent.multiagent_i210 import flow_params as I210_MA_DEFAULT_FLOW_PARAMS +from examples.exp_configs.rl.multiagent.multiagent_i210 import custom_callables + +from flow.data_pipeline.data_pipeline import write_dict_to_csv, upload_to_s3, get_extra_info, get_configuration +from flow.data_pipeline.leaderboard_utils import network_name_translate +import uuid + +EXAMPLE_USAGE = """ +example usage: + python i210_replay.py -r /ray_results/experiment_dir/result_dir -c 1 + python i210_replay.py --controller idm + python i210_replay.py --controller idm --run_transfer + +Here the arguments are: +1 - the path to the simulation results +2 - the number of the checkpoint +""" + + +@ray.remote +def replay(args, flow_params, output_dir=None, transfer_test=None, rllib_config=None, result_dir=None, + max_completed_trips=None, v_des=12): + """Replay or run transfer test (defined by transfer_fn) by modif. + + Arguments: + --------- + args {[Namespace]} -- [args from argparser] + flow_params {[flow_params object, pulled from ]} -- [description] + transfer_fn {[type]} -- [description] + + Keyword Arguments: + ----------------- + rllib_config {[type]} -- [description] (default: {None}) + result_dir {[type]} -- [description] (default: {None}) + """ + assert bool(args.controller) ^ bool(rllib_config), \ + "Need to specify either controller or rllib_config, but not both" + if transfer_test is not None: + if type(transfer_test) == bytes: + transfer_test = ray.cloudpickle.loads(transfer_test) + flow_params = transfer_test.flow_params_modifier_fn(flow_params) + + if args.controller: + test_params = {} + if args.controller == 'idm': + from flow.controllers.car_following_models import IDMController + controller = IDMController + test_params.update({'v0': 1, 'T': 1, 'a': 0.2, 'b': 0.2}) # An example of really obvious changes + elif args.controller == 'default_human': + controller = flow_params['veh'].type_parameters['human']['acceleration_controller'][0] + test_params.update(flow_params['veh'].type_parameters['human']['acceleration_controller'][1]) + elif args.controller == 'follower_stopper': + from flow.controllers.velocity_controllers import FollowerStopper + controller = FollowerStopper + test_params.update({'v_des': v_des}) + # flow_params['veh'].type_parameters['av']['car_following_params'] + elif args.controller == 'sumo': + from flow.controllers.car_following_models import SimCarFollowingController + controller = SimCarFollowingController + + flow_params['veh'].type_parameters['av']['acceleration_controller'] = (controller, test_params) + + for veh_param in flow_params['veh'].initial: + if veh_param['veh_id'] == 'av': + veh_param['acceleration_controller'] = (controller, test_params) + + sim_params = flow_params['sim'] + sim_params.num_clients = 1 + + sim_params.restart_instance = True + dir_path = os.path.dirname(os.path.realpath(__file__)) + emission_path = '{0}/test_time_rollout/'.format(dir_path) + sim_params.emission_path = emission_path if args.gen_emission else None + + # pick your rendering mode + if args.render_mode == 'sumo_web3d': + sim_params.num_clients = 2 + sim_params.render = False + elif args.render_mode == 'drgb': + sim_params.render = 'drgb' + sim_params.pxpm = 4 + elif args.render_mode == 'sumo_gui': + sim_params.render = False # will be set to True below + elif args.render_mode == 'no_render': + sim_params.render = False + if args.save_render: + if args.render_mode != 'sumo_gui': + sim_params.render = 'drgb' + sim_params.pxpm = 4 + sim_params.save_render = True + + # Start the environment with the gui turned on and a path for the + # emission file + env_params = flow_params['env'] + env_params.restart_instance = False + if args.evaluate: + env_params.evaluate = True + + # lower the horizon if testing + if args.horizon: + env_params.horizon = args.horizon + + # Create and register a gym+rllib env + create_env, env_name = make_create_env(params=flow_params, version=0) + env = create_env(env_name) + + if args.render_mode == 'sumo_gui': + env.sim_params.render = True # set to True after initializing agent and env + + # if restart_instance, don't restart here because env.reset will restart later + if not sim_params.restart_instance: + env.restart_simulation(sim_params=sim_params, render=sim_params.render) + + # reroute on exit is a training hack, it should be turned off at test time. + if hasattr(env, "reroute_on_exit"): + env.reroute_on_exit = False + + if rllib_config: + # check if we have a multiagent environment but in a + # backwards compatible way + if rllib_config.get('multiagent', {}).get('policies', None): + multiagent = True + pkl = get_rllib_pkl(result_dir) + rllib_config['multiagent'] = pkl['multiagent'] + else: + multiagent = False + raise NotImplementedError + + # Run on only one cpu for rendering purposes + rllib_config['num_workers'] = 0 + + # lower the horizon if testing + if args.horizon: + rllib_config['horizon'] = args.horizon + + assert 'run' in rllib_config['env_config'], "Was this trained with the latest version of Flow?" + # Determine agent and checkpoint + config_run = rllib_config['env_config']['run'] + + rllib_flow_params = get_flow_params(rllib_config) + agent_create_env, agent_env_name = make_create_env(params=rllib_flow_params, version=0) + register_env(agent_env_name, agent_create_env) + + if rllib_config['env_config']['run'] == "": + from flow.algorithms.centralized_PPO import CCTrainer, CentralizedCriticModel + from ray.rllib.models import ModelCatalog + agent_cls = CCTrainer + ModelCatalog.register_custom_model("cc_model", CentralizedCriticModel) + elif rllib_config['env_config']['run'] == "": + from flow.algorithms.custom_ppo import CustomPPOTrainer + agent_cls = CustomPPOTrainer + elif config_run: + agent_cls = get_agent_class(config_run) + else: + raise Exception('You forgot to store the algorithm type') + + # create the agent that will be used to compute the actions + agent = agent_cls(env=agent_env_name, config=rllib_config) + checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num + checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num + agent.restore(checkpoint) + + if multiagent: + # map the agent id to its policy + policy_map_fn = rllib_config['multiagent']['policy_mapping_fn'] + + if rllib_config['model']['use_lstm']: + use_lstm = True + if multiagent: + # map the agent id to its policy + size = rllib_config['model']['lstm_cell_size'] + lstm_state = defaultdict(lambda: [np.zeros(size, np.float32), + np.zeros(size, np.float32)]) + else: + lstm_state = [ + np.zeros(rllib_config['model']['lstm_cell_size'], np.float32), + np.zeros(rllib_config['model']['lstm_cell_size'], np.float32) + ] + else: + use_lstm = False + + # used to store + info_dict = { + "velocities": [], + "outflows": [], + "avg_trip_energy": [], + "avg_trip_time": [], + "total_completed_trips": [] + } + all_trip_energy_distribution = defaultdict(lambda: []) + all_trip_time_distribution = defaultdict(lambda: []) + + info_dict.update({ + key: [] for key in custom_callables.keys() + }) + + # reroute on exit is a training hack, it should be turned off at test time. + if hasattr(env, "reroute_on_exit"): + env.reroute_on_exit = False + + # date pipeline + extra_info = defaultdict(lambda: []) + source_id = 'flow_{}'.format(uuid.uuid4().hex) + metadata = defaultdict(lambda: []) + # collect current time + cur_datetime = datetime.now(timezone.utc) + cur_date = cur_datetime.date().isoformat() + cur_time = cur_datetime.time().isoformat() + metadata['source_id'].append(source_id) + metadata['submission_time'].append(cur_time) + metadata['network'].append(network_name_translate(env.network.name.split('_20')[0])) + metadata['is_baseline'].append(str(args.is_baseline)) + if args.to_aws: + name, strategy = get_configuration() + metadata['submitter_name'].append(name) + metadata['strategy'].append(strategy) + + i = 0 + t = 0 + while i < args.num_rollouts: + print("Rollout iter", i) + vel = [] + per_vehicle_energy_trace = defaultdict(lambda: []) + completed_veh_types = {} + completed_vehicle_avg_energy = {} + completed_vehicle_travel_time = {} + custom_vals = {key: [] for key in custom_callables.keys()} + run_id = "run_{}".format(i) + env.pipeline_params = (extra_info, source_id, run_id) + state = env.reset() + initial_vehicles = set(env.k.vehicle.get_ids()) + for t in range(env_params.horizon): + if rllib_config: + if multiagent: + action = {} + for agent_id in state.keys(): + if use_lstm: + action[agent_id], lstm_state[agent_id], _ = \ + agent.compute_action( + state[agent_id], state=lstm_state[agent_id], + policy_id=policy_map_fn(agent_id)) + else: + action[agent_id] = agent.compute_action( + state[agent_id], policy_id=policy_map_fn(agent_id)) + else: + if use_lstm: + raise NotImplementedError + else: + action = agent.compute_action(state) + else: + action = None + + state, reward, done, _ = env.step(action) + + # Compute the velocity speeds and cumulative returns. + veh_ids = env.k.vehicle.get_ids() + vel.append(np.mean(env.k.vehicle.get_speed(veh_ids))) + + # collect additional information for the data pipeline + get_extra_info(env.k.vehicle, extra_info, veh_ids, source_id, run_id) + + # Compute the results for the custom callables. + for (key, lambda_func) in custom_callables.items(): + custom_vals[key].append(lambda_func(env)) + + for past_veh_id in per_vehicle_energy_trace.keys(): + if past_veh_id not in veh_ids and past_veh_id not in completed_vehicle_avg_energy: + all_trip_energy_distribution[completed_veh_types[past_veh_id]].append( + np.sum(per_vehicle_energy_trace[past_veh_id])) + all_trip_time_distribution[completed_veh_types[past_veh_id]].append( + len(per_vehicle_energy_trace[past_veh_id])) + completed_vehicle_avg_energy[past_veh_id] = np.sum(per_vehicle_energy_trace[past_veh_id]) + completed_vehicle_travel_time[past_veh_id] = len(per_vehicle_energy_trace[past_veh_id]) + + for veh_id in veh_ids: + if veh_id not in initial_vehicles: + if veh_id not in per_vehicle_energy_trace: + # we have to skip the first step's energy calculation + per_vehicle_energy_trace[veh_id].append(0) + completed_veh_types[veh_id] = env.k.vehicle.get_type(veh_id) + else: + per_vehicle_energy_trace[veh_id].append(-1 * veh_energy_consumption(env, veh_id)) + + if type(done) is dict and done['__all__'] or done is True: + break + elif max_completed_trips is not None and len(completed_vehicle_avg_energy) > max_completed_trips: + break + + if t < env_params.horizon - 1: + # Early terminations signify a collision. + print("Crash on iter", i) + else: + # Store the information from the run in info_dict. + outflow = env.k.vehicle.get_outflow_rate(int(500)) + info_dict["velocities"].append(np.mean(vel)) + info_dict["outflows"].append(outflow) + info_dict["avg_trip_energy"].append(np.mean(list(completed_vehicle_avg_energy.values()))) + info_dict["avg_trip_time"].append(np.mean(list(completed_vehicle_travel_time.values()))) + info_dict["total_completed_trips"].append(len(list(completed_vehicle_avg_energy.values()))) + for key in custom_vals.keys(): + info_dict[key].append(np.mean(custom_vals[key])) + i += 1 + + print('======== Summary of results ========') + if args.run_transfer: + print("Transfer test: {}".format(transfer_test.transfer_str)) + print("====================================") + + # Print the averages/std for all variables in the info_dict. + for key in info_dict.keys(): + print("Average, std {}: {}, {}".format( + key, np.mean(info_dict[key]), np.std(info_dict[key]))) + + # terminate the environment + env.unwrapped.terminate() + + if output_dir: + ensure_dir(output_dir) + if args.run_transfer: + exp_name = "{}-replay".format(transfer_test.transfer_str) + else: + exp_name = "i210_replay" + replay_out = os.path.join(output_dir, '{}-info.npy'.format(exp_name)) + np.save(replay_out, info_dict) + # if prompted, convert the emission file into a csv file + if args.gen_emission: + emission_filename = '{0}-emission.xml'.format(env.network.name) + time.sleep(0.1) + + emission_path = \ + '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) + + output_path = os.path.join(output_dir, '{}-emission.csv'.format(exp_name)) + # convert the emission file into a csv file + emission_to_csv(emission_path, output_path=output_path) + + # generate the trajectory output file + trajectory_table_path = os.path.join(dir_path, '{}.csv'.format(source_id)) + write_dict_to_csv(trajectory_table_path, extra_info, True) + metadata_table_path = os.path.join(dir_path, '{}_METADATA.csv'.format(source_id)) + write_dict_to_csv(metadata_table_path, metadata, True) + + # upload to s3 if asked + if args.use_s3: + upload_to_s3('circles.data.pipeline', 'metadata_table/date={0}/partition_name={1}_METADATA/' + '{1}_METADATA.csv'.format(cur_date, source_id), + metadata_table_path) + upload_to_s3('circles.data.pipeline', 'fact_vehicle_trace/date={0}/partition_name={1}/{1}.csv'.format( + cur_date, source_id), + trajectory_table_path, {'network': metadata['network'][0]}) + + # print the location of the emission csv file + print("\nGenerated emission file at " + output_path) + + # delete the .xml version of the emission file + os.remove(emission_path) + + all_trip_energies = os.path.join(output_dir, '{}-all_trip_energies.npy'.format(exp_name)) + np.save(all_trip_energies, dict(all_trip_energy_distribution)) + fig_names, figs = plot_trip_distribution(all_trip_energy_distribution) + + for fig_name, fig in zip(fig_names, figs): + edist_out = os.path.join(output_dir, '{}_energy_distribution.png'.format(fig_name)) + fig.savefig(edist_out) + + # Create the flow_params object + with open(os.path.join(output_dir, exp_name) + '.json', 'w') as outfile: + json.dump(flow_params, outfile, + cls=FlowParamsEncoder, sort_keys=True, indent=4) + + return info_dict + + +def create_parser(): + """Create the parser to capture CLI arguments.""" + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description='[Flow] Evaluates a reinforcement learning agent ' + 'given a checkpoint.', + epilog=EXAMPLE_USAGE) + + parser.add_argument( + '--rllib_result_dir', '-r', required=False, type=str, help='Directory containing results') + parser.add_argument('--checkpoint_num', '-c', required=False, type=str, help='Checkpoint number.') + + parser.add_argument( + '--num_rollouts', + type=int, + default=1, + help='The number of rollouts to visualize.') + parser.add_argument( + '--gen_emission', + action='store_true', + help='Specifies whether to generate an emission file from the ' + 'simulation') + parser.add_argument( + '--evaluate', + action='store_true', + help='Specifies whether to use the \'evaluate\' reward ' + 'for the environment.') + parser.add_argument( + '--render_mode', '-rm', + type=str, + default=None, + help='Pick the render mode. Options include sumo_web3d, ' + 'rgbd and sumo_gui') + parser.add_argument( + '--save_render', + action='store_true', + help='Saves a rendered video to a file. NOTE: Overrides render_mode ' + 'with pyglet rendering.') + parser.add_argument( + '--horizon', + type=int, + help='Specifies the horizon.') + parser.add_argument( + '--local', + action='store_true', + help='Adjusts run settings to be compatible with limited ' + 'memory capacity' + ) + parser.add_argument( + '--controller', + type=str, + help='Which custom controller to use. Defaults to IDM' + ) + parser.add_argument( + '--run_transfer', + action='store_true', + help='Runs transfer tests if true' + ) + parser.add_argument( + '-pr', + '--penetration_rate', + type=float, + help='Specifies percentage of AVs.', + required=False) + parser.add_argument( + '-mct', + '--max_completed_trips', + type=int, + help='Terminate rollout after max_completed_trips vehicles have started and ended.', + default=None) + parser.add_argument( + '--v_des_sweep', + action='store_true', + help='Runs a sweep over v_des params.', + default=None) + parser.add_argument( + '--output_dir', + type=str, + help='Directory to save results.', + default=None + ) + parser.add_argument('--use_s3', action='store_true', help='If true, upload results to s3') + parser.add_argument('--num_cpus', type=int, default=1, help='Number of cpus to run experiment with') + parser.add_argument('--multi_node', action='store_true', help='Set to true if this will ' + 'be run in cluster mode') + parser.add_argument('--exp_title', type=str, required=False, default=None, + help='Informative experiment title to help distinguish results') + parser.add_argument( + '--only_query', + nargs='*', default="[\'all\']", + help='specify which query should be run by lambda' + 'for detail, see upload_to_s3 in data_pipeline.py' + ) + parser.add_argument( + '--is_baseline', + action='store_true', + help='specifies whether this is a baseline run' + ) + return parser + + +if __name__ == '__main__': + date = datetime.now(tz=pytz.utc) + date = date.astimezone(pytz.timezone('US/Pacific')).strftime("%m-%d-%Y") + + parser = create_parser() + args = parser.parse_args() + + rllib_config = None + rllib_result_dir = None + if args.rllib_result_dir is not None: + rllib_result_dir = args.rllib_result_dir if args.rllib_result_dir[-1] != '/' \ + else args.rllib_result_dir[:-1] + + rllib_config = get_rllib_config(rllib_result_dir) + + flow_params = deepcopy(I210_MA_DEFAULT_FLOW_PARAMS) + + if args.multi_node: + ray.init(redis_address='localhost:6379') + elif args.local: + ray.init(local_mode=True, object_store_memory=200 * 1024 * 1024) + else: + ray.init(num_cpus=args.num_cpus + 1, object_store_memory=200 * 1024 * 1024) + + if args.exp_title: + output_dir = os.path.join(args.output_dir, args.exp_title) + else: + output_dir = args.output_dir + + if args.run_transfer: + s = [ray.cloudpickle.dumps(transfer_test) for transfer_test in + inflows_range(penetration_rates=[0.0, 0.1, 0.2, 0.3])] + ray_output = [replay.remote(args, flow_params, output_dir=output_dir, transfer_test=transfer_test, + rllib_config=rllib_config, result_dir=rllib_result_dir, + max_completed_trips=args.max_completed_trips) + for transfer_test in s] + ray.get(ray_output) + + elif args.v_des_sweep: + assert args.controller == 'follower_stopper' + + ray_output = [ + replay.remote(args, flow_params, output_dir="{}/{}".format(output_dir, v_des), rllib_config=rllib_config, + result_dir=rllib_result_dir, max_completed_trips=args.max_completed_trips, v_des=v_des) + for v_des in range(8, 17, 2)] + ray.get(ray_output) + + else: + if args.penetration_rate is not None: + pr = args.penetration_rate if args.penetration_rate is not None else 0 + single_transfer = next(inflows_range(penetration_rates=pr)) + ray.get(replay.remote(args, flow_params, output_dir=output_dir, transfer_test=single_transfer, + rllib_config=rllib_config, result_dir=rllib_result_dir, + max_completed_trips=args.max_completed_trips)) + else: + ray.get(replay.remote(args, flow_params, output_dir=output_dir, + rllib_config=rllib_config, result_dir=rllib_result_dir, + max_completed_trips=args.max_completed_trips)) + + if args.use_s3: + s3_string = 's3://kanaad.experiments/i210_replay/' + date + if args.exp_title: + s3_string += '/' + args.exp_title + + for i in range(4): + try: + p1 = subprocess.Popen("aws s3 sync {} {}".format(output_dir, s3_string).split(' ')) + p1.wait(50) + except Exception as e: + print('This is the error ', e) diff --git a/flow/visualize/plot_custom_callables.py b/flow/visualize/plot_custom_callables.py new file mode 100644 index 000000000..ee9a10c1d --- /dev/null +++ b/flow/visualize/plot_custom_callables.py @@ -0,0 +1,115 @@ +"""Generate charts from with .npy files containing custom callables through replay.""" + +import argparse +from datetime import datetime +import errno +import numpy as np + +try: + from matplotlib import pyplot as plt +except ImportError: + import matplotlib + + matplotlib.use('TkAgg') + from matplotlib import pyplot as plt +import os +import pytz +import sys + + +def make_bar_plot(vals, title): + """Make a bar plot.""" + print(len(vals)) + fig = plt.figure() + plt.hist(vals, 10, facecolor='blue', alpha=0.5) + plt.title(title) + plt.xlim(1000, 3000) + return fig + + +def plot_trip_distribution(all_trip_energy_distribution): + """Plot a distribution of trips.""" + non_av_vals = [] + figures = [] + figure_names = [] + for key in all_trip_energy_distribution: + if key != 'av': + non_av_vals.extend(all_trip_energy_distribution[key]) + figures.append(make_bar_plot(all_trip_energy_distribution[key], key)) + figure_names.append(key) + + figure_names.append('All Non-AV') + figures.append(make_bar_plot(non_av_vals, 'All Non-AV')) + + figure_names.append('All') + figures.append(make_bar_plot(non_av_vals + all_trip_energy_distribution['av'], 'All')) + + return figure_names, figures + + +def parse_flags(args): + """Parse training options user can specify in command line. + + Returns + ------- + argparse.Namespace + the output parser object + """ + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description="Parse argument used when running a Flow simulation.", + epilog="python train.py EXP_CONFIG") + parser.add_argument("target_folder", type=str, + help='Folder containing results') + parser.add_argument("--output_folder", type=str, required=False, default=None, + help='Folder to save charts to.') + parser.add_argument("--show_images", action='store_true', + help='Whether to display charts.') + return parser.parse_args(args) + + +if __name__ == "__main__": + flags = parse_flags(sys.argv[1:]) + + date = datetime.now(tz=pytz.utc) + date = date.astimezone(pytz.timezone('US/Pacific')).strftime("%m-%d-%Y") + + if flags.output_folder: + if not os.path.exists(flags.output_folder): + try: + os.makedirs(flags.output_folder) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + + info_dicts = [] + custom_callable_names = set() + exp_names = [] + for (dirpath, dir_names, file_names) in os.walk(flags.target_folder): + for file_name in file_names: + if file_name[-4:] == ".npy": + exp_name = os.path.basename(dirpath) + info_dict = np.load(os.path.join(dirpath, file_name), allow_pickle=True).item() + + info_dicts.append(info_dict) + exp_names.append(exp_name) + custom_callable_names.update(info_dict.keys()) + + idxs = np.argsort(exp_names) + exp_names = [exp_names[i] for i in idxs] + info_dicts = [info_dicts[i] for i in idxs] + + for name in custom_callable_names: + y_vals = [np.mean(info_dict[name]) for info_dict in info_dicts] + y_stds = [np.std(info_dict[name]) for info_dict in info_dicts] + x_pos = np.arange(len(exp_names)) + + plt.bar(x_pos, y_vals, align='center', alpha=0.5) + plt.xticks(x_pos, [exp_name for exp_name in exp_names], rotation=60) + plt.xlabel('Experiment') + plt.title('I210 Replay Result: {}'.format(name)) + plt.tight_layout() + if flags.output_folder: + plt.savefig(os.path.join(flags.output_folder, '{}-plot.png'.format(name))) + + plt.show() diff --git a/flow/visualize/time_space_diagram.py b/flow/visualize/time_space_diagram.py index bc26ad855..955043691 100644 --- a/flow/visualize/time_space_diagram.py +++ b/flow/visualize/time_space_diagram.py @@ -17,7 +17,7 @@ python time_space_diagram.py .csv .json """ from flow.utils.rllib import get_flow_params -from flow.networks import RingNetwork, FigureEightNetwork, MergeNetwork, I210SubNetwork +from flow.networks import RingNetwork, FigureEightNetwork, MergeNetwork, I210SubNetwork, HighwayNetwork import argparse from collections import defaultdict @@ -27,7 +27,8 @@ import matplotlib matplotlib.use('TkAgg') from matplotlib import pyplot as plt -from matplotlib.collections import LineCollection +from matplotlib.collections import LineCollection, PatchCollection +from matplotlib.patches import Rectangle import matplotlib.colors as colors import numpy as np import pandas as pd @@ -38,9 +39,21 @@ RingNetwork, FigureEightNetwork, MergeNetwork, - I210SubNetwork + I210SubNetwork, + HighwayNetwork ] +# networks that use edgestarts +USE_EDGESTARTS = set([ + RingNetwork, + FigureEightNetwork, + MergeNetwork +]) + +GHOST_DICT = defaultdict(dict) +GHOST_DICT[I210SubNetwork] = {'ghost_edges': {'ghost0', '119257908#3'}} +GHOST_DICT[HighwayNetwork] = {'ghost_bounds': (500, 2300)} + def import_data_from_trajectory(fp, params=dict()): r"""Import and preprocess data from the Flow trajectory (.csv) file. @@ -51,7 +64,6 @@ def import_data_from_trajectory(fp, params=dict()): file path (for the .csv formatted file) params : dict flow-specific parameters, including: - * "network" (str): name of the network that was used when generating the emission file. Must be one of the network names mentioned in ACCEPTABLE_NETWORKS, @@ -61,8 +73,10 @@ def import_data_from_trajectory(fp, params=dict()): Returns ------- - pd.DataFrame + pd.DataFrame, float, float """ + network = params['network'] + # Read trajectory csv into pandas dataframe df = pd.read_csv(fp) @@ -72,42 +86,54 @@ def import_data_from_trajectory(fp, params=dict()): 'lane_number': 'lane_id', } df = df.rename(columns=column_conversions) - if 'distance' not in df.columns: + if network in USE_EDGESTARTS: df['distance'] = _get_abs_pos(df, params) + start = params['env'].warmup_steps * params['env'].sims_per_step * params['sim'].sim_step + # produce upper and lower bounds for the non-greyed-out domain + ghost_edges = GHOST_DICT[network].get('ghost_edges') + ghost_bounds = GHOST_DICT[network].get('ghost_bounds') + if ghost_edges: + domain_lb = df[~df['edge_id'].isin(ghost_edges)]['distance'].min() + domain_ub = df[~df['edge_id'].isin(ghost_edges)]['distance'].max() + elif ghost_bounds: + domain_lb = ghost_bounds[0] + domain_ub = ghost_bounds[1] + else: + domain_lb = df['distance'].min() + domain_ub = df['distance'].max() + + df.loc[:, 'time_step'] = df['time_step'].apply(lambda x: x - start) + df.loc[:, 'distance'] = df['distance'].apply(lambda x: x - domain_lb) + domain_ub -= domain_lb + # Compute line segment ends by shifting dataframe by 1 row df[['next_pos', 'next_time']] = df.groupby('id')[['distance', 'time_step']].shift(-1) # Remove nans from data df = df[df['next_time'].notna()] - return df + return df, domain_lb, domain_ub, start -def get_time_space_data(data, params): +def get_time_space_data(data, network): r"""Compute the unique inflows and subsequent outflow statistics. Parameters ---------- data : pd.DataFrame cleaned dataframe of the trajectory data - params : dict - flow-specific parameters, including: - - * "network" (str): name of the network that was used when generating - the emission file. Must be one of the network names mentioned in - ACCEPTABLE_NETWORKS, - * "net_params" (flow.core.params.NetParams): network-specific - parameters. This is used to collect the lengths of various network - links. + network : child class of Network() + network that was used when generating the emission file. + Must be one of the network names mentioned in + ACCEPTABLE_NETWORKS Returns ------- - ndarray (or dict of ndarray) + ndarray (or dict < str, np.ndarray >) 3d array (n_segments x 2 x 2) containing segments to be plotted. every inner 2d array is comprised of two 1d arrays representing [start time, start distance] and [end time, end distance] pairs. - in the case of I210, the nested arrays are wrapped into a dict, keyed on the lane number, so that each lane can be plotted separately. @@ -118,19 +144,20 @@ def get_time_space_data(data, params): if the specified network is not supported by this method """ # check that the network is appropriate - assert params['network'] in ACCEPTABLE_NETWORKS, \ - 'Network must be one of: ' + ', '.join([network.__name__ for network in ACCEPTABLE_NETWORKS]) + assert network in ACCEPTABLE_NETWORKS, \ + 'Network must be one of: ' + ', '.join([network_.__name__ for network_ in ACCEPTABLE_NETWORKS]) # switcher used to compute the positions based on the type of network switcher = { RingNetwork: _ring_road, MergeNetwork: _merge, FigureEightNetwork: _figure_eight, - I210SubNetwork: _i210_subnetwork + I210SubNetwork: _i210_subnetwork, + HighwayNetwork: _highway, } # Get the function from switcher dictionary - func = switcher[params['network']] + func = switcher[network] # Execute the function segs, data = func(data) @@ -139,7 +166,7 @@ def get_time_space_data(data, params): def _merge(data): - r"""Generate position and speed data for the merge. + r"""Generate time and position data for the merge. This only include vehicles on the main highway, and not on the adjacent on-ramp. @@ -167,8 +194,30 @@ def _merge(data): return segs, data +def _highway(data): + r"""Generate time and position data for the highway. + + Parameters + ---------- + data : pd.DataFrame + cleaned dataframe of the trajectory data + + Returns + ------- + ndarray + 3d array (n_segments x 2 x 2) containing segments to be plotted. + every inner 2d array is comprised of two 1d arrays representing + [start time, start distance] and [end time, end distance] pairs. + pd.DataFrame + modified trajectory dataframe + """ + segs = data[['time_step', 'distance', 'next_time', 'next_pos']].values.reshape((len(data), 2, 2)) + + return segs, data + + def _ring_road(data): - r"""Generate position and speed data for the ring road. + r"""Generate time and position data for the ring road. Vehicles that reach the top of the plot simply return to the bottom and continue. @@ -205,7 +254,7 @@ def _i210_subnetwork(data): Returns ------- - dict of ndarray + dict < str, np.ndarray > dictionary of 3d array (n_segments x 2 x 2) containing segments to be plotted. the dictionary is keyed on lane numbers, with the values being the 3d array representing the segments. every inner @@ -214,13 +263,9 @@ def _i210_subnetwork(data): pd.DataFrame modified trajectory dataframe """ - # Omit ghost edges - omit_edges = {'ghost0', '119257908#3'} - data.loc[:, :] = data[~data['edge_id'].isin(omit_edges)] - # Reset lane numbers that are offset by ramp lanes offset_edges = set(data[data['lane_id'] == 5]['edge_id'].unique()) - data.loc[data['edge_id'].isin(offset_edges), 'lane_id'] -= 1 + data.loc[data['edge_id'].isin(offset_edges), 'lane_id'] = data[data['edge_id'].isin(offset_edges)]['lane_id'] - 1 segs = dict() for lane, df in data.groupby('lane_id'): @@ -230,7 +275,7 @@ def _i210_subnetwork(data): def _figure_eight(data): - r"""Generate position and speed data for the figure eight. + r"""Generate time and position data for the figure eight. The vehicles traveling towards the intersection from one side will be plotted from the top downward, while the vehicles from the other side will @@ -329,6 +374,24 @@ def _get_abs_pos(df, params): 'bottom_to_top': intersection / 2 + inner, 'right_to_left': junction + 3 * inner, } + elif params['network'] == HighwayNetwork: + return df['x'] + elif params['network'] == I210SubNetwork: + edgestarts = { + '119257914': -5.0999999999995795, + '119257908#0': 56.49000000018306, + ':300944379_0': 56.18000000000016, + ':300944436_0': 753.4599999999871, + '119257908#1-AddedOnRampEdge': 756.3299999991157, + ':119257908#1-AddedOnRampNode_0': 853.530000000022, + '119257908#1': 856.7699999997207, + ':119257908#1-AddedOffRampNode_0': 1096.4499999999707, + '119257908#1-AddedOffRampEdge': 1099.6899999995558, + ':1686591010_1': 1198.1899999999541, + '119257908#2': 1203.6499999994803, + ':1842086610_1': 1780.2599999999056, + '119257908#3': 1784.7899999996537, + } else: edgestarts = defaultdict(float) @@ -346,57 +409,140 @@ def _get_abs_pos(df, params): return ret -def plot_tsd(ax, df, segs, args, lane=None): +def plot_tsd(df, network, cmap, min_speed=0, max_speed=10, start=0, domain_bounds=None): """Plot the time-space diagram. - Take the pre-processed segments and other meta-data, then plot all the line segments. + Take the pre-processed segments and other meta-data, then plot all the line + segments. Parameters ---------- - ax : matplotlib.axes.Axes - figure axes that will be plotted on df : pd.DataFrame data used for axes bounds and speed coloring - segs : list of list of lists - line segments to be plotted, where each segment is a list of two [x,y] pairs - args : dict - parsed arguments - lane : int, optional - lane number to be shown in plot title - - Returns - ------- - None + network : child class of Network() + network that was used when generating the emission file. + Must be one of the network names mentioned in + ACCEPTABLE_NETWORKS + cmap : colors.LinearSegmentedColormap + colormap for plotting speed + min_speed : int or float + minimum speed in colorbar + max_speed : int or float + maximum speed in colorbar + start : int or float + starting time_step not greyed out + domain_bounds : tuple + lower and upper bounds of domain, excluding ghost edges, default None """ - norm = plt.Normalize(args.min_speed, args.max_speed) + norm = plt.Normalize(min_speed, max_speed) - xmin = max(df['time_step'].min(), args.start) - xmax = min(df['time_step'].max(), args.stop) + xmin, xmax = df['time_step'].min(), df['time_step'].max() xbuffer = (xmax - xmin) * 0.025 # 2.5% of range ymin, ymax = df['distance'].min(), df['distance'].max() ybuffer = (ymax - ymin) * 0.025 # 2.5% of range - ax.set_xlim(xmin - xbuffer, xmax + xbuffer) - ax.set_ylim(ymin - ybuffer, ymax + ybuffer) + # Convert df data into segments for plotting + segs, df = get_time_space_data(df, network) - lc = LineCollection(segs, cmap=my_cmap, norm=norm) - lc.set_array(df['speed'].values) - lc.set_linewidth(1) - ax.add_collection(lc) - ax.autoscale() + nlanes = df['lane_id'].nunique() + plt.figure(figsize=(16, 9*nlanes)) + if nlanes == 1: + segs = [segs] - if lane: - ax.set_title('Time-Space Diagram: Lane {}'.format(lane), fontsize=25) - else: - ax.set_title('Time-Space Diagram', fontsize=25) - ax.set_ylabel('Position (m)', fontsize=20) - ax.set_xlabel('Time (s)', fontsize=20) - plt.xticks(fontsize=18) - plt.yticks(fontsize=18) + for lane, lane_df in df.groupby('lane_id'): + ax = plt.subplot(nlanes, 1, lane+1) + + ax.set_xlim(xmin - xbuffer, xmax + xbuffer) + ax.set_ylim(ymin - ybuffer, ymax + ybuffer) + + lc = LineCollection(segs[lane], cmap=cmap, norm=norm) + lc.set_array(lane_df['speed'].values) + lc.set_linewidth(1) + ax.add_collection(lc) + ax.autoscale() + + rects = [] + # rectangle for warmup period, but not ghost edges + rects.append(Rectangle((xmin, 0), start, domain_bounds[1])) + # rectangle for lower ghost edge (including warmup period) + rects.append(Rectangle((xmin, ymin), xmax - xmin, domain_bounds[0])) + # rectangle for upper ghost edge (including warmup period) + rects.append(Rectangle((xmin, domain_bounds[1]), xmax - xmin, ymax - domain_bounds[1])) + + pc = PatchCollection(rects, facecolor='grey', alpha=0.5, edgecolor=None) + pc.set_zorder(20) + ax.add_collection(pc) + + if nlanes > 1: + ax.set_title('Time-Space Diagram: Lane {}'.format(lane), fontsize=25) + else: + ax.set_title('Time-Space Diagram', fontsize=25) - cbar = plt.colorbar(lc, ax=ax, norm=norm) - cbar.set_label('Velocity (m/s)', fontsize=20) - cbar.ax.tick_params(labelsize=18) + ax.set_ylabel('Position (m)', fontsize=20) + if lane == nlanes - 1: + ax.set_xlabel('Time (s)', fontsize=20) + plt.xticks(fontsize=18) + plt.yticks(fontsize=18) + + cbar = plt.colorbar(lc, ax=ax, norm=norm) + cbar.set_label('Velocity (m/s)', fontsize=20) + cbar.ax.tick_params(labelsize=18) + + plt.tight_layout() + + +def tsd_main(trajectory_path, flow_params, min_speed=0, max_speed=10): + """Prepare and plot the time-space diagram. + + Parameters + ---------- + trajectory_path : str + file path (for the .csv formatted file) + flow_params : dict + flow-specific parameters, including: + * "network" (str): name of the network that was used when generating + the emission file. Must be one of the network names mentioned in + ACCEPTABLE_NETWORKS, + * "net_params" (flow.core.params.NetParams): network-specific + parameters. This is used to collect the lengths of various network + links. + min_speed : int or float + minimum speed in colorbar + max_speed : int or float + maximum speed in colorbar + """ + network = flow_params['network'] + + # some plotting parameters + cdict = { + 'red': ((0, 0, 0), (0.2, 1, 1), (0.6, 1, 1), (1, 0, 0)), + 'green': ((0, 0, 0), (0.2, 0, 0), (0.6, 1, 1), (1, 1, 1)), + 'blue': ((0, 0, 0), (0.2, 0, 0), (0.6, 0, 0), (1, 0, 0)) + } + my_cmap = colors.LinearSegmentedColormap('my_colormap', cdict, 1024) + + # Read trajectory csv into pandas dataframe + traj_df, domain_lb, domain_ub, start = import_data_from_trajectory(trajectory_path, flow_params) + + plot_tsd(df=traj_df, + network=network, + cmap=my_cmap, + min_speed=min_speed, + max_speed=max_speed, + start=start, + domain_bounds=(domain_lb, domain_ub)) + + ########################################################################### + # Note: For MergeNetwork only # + if network == MergeNetwork: # + plt.plot([traj_df['time_step'].min(), traj_df['time_step'].max()], + [0, 0], linewidth=3, color="white") # + plt.plot([traj_df['time_step'].min(), traj_df['time_step'].max()], + [-0.1, -0.1], linewidth=3, color="white") # + ########################################################################### + + outfile = trajectory_path.replace('csv', 'png') + plt.savefig(outfile) if __name__ == '__main__': @@ -417,15 +563,11 @@ def plot_tsd(ax, df, segs, args, lane=None): parser.add_argument('--steps', type=int, default=1, help='rate at which steps are plotted.') parser.add_argument('--title', type=str, default='Time Space Diagram', - help='rate at which steps are plotted.') + help='Title for the time-space diagrams.') parser.add_argument('--max_speed', type=int, default=8, help='The maximum speed in the color range.') parser.add_argument('--min_speed', type=int, default=0, help='The minimum speed in the color range.') - parser.add_argument('--start', type=float, default=0, - help='initial time (in sec) in the plot.') - parser.add_argument('--stop', type=float, default=float('inf'), - help='final time (in sec) in the plot.') args = parser.parse_args() @@ -436,42 +578,9 @@ def plot_tsd(ax, df, segs, args, lane=None): module = __import__("examples.exp_configs.non_rl", fromlist=[args.flow_params]) flow_params = getattr(module, args.flow_params).flow_params - # some plotting parameters - cdict = { - 'red': ((0, 0, 0), (0.2, 1, 1), (0.6, 1, 1), (1, 0, 0)), - 'green': ((0, 0, 0), (0.2, 0, 0), (0.6, 1, 1), (1, 1, 1)), - 'blue': ((0, 0, 0), (0.2, 0, 0), (0.6, 0, 0), (1, 0, 0)) - } - my_cmap = colors.LinearSegmentedColormap('my_colormap', cdict, 1024) - - # Read trajectory csv into pandas dataframe - traj_df = import_data_from_trajectory(args.trajectory_path, flow_params) - - # Convert df data into segments for plotting - segs, traj_df = get_time_space_data(traj_df, flow_params) - - if flow_params['network'] == I210SubNetwork: - nlanes = traj_df['lane_id'].nunique() - fig = plt.figure(figsize=(16, 9*nlanes)) - - for lane, df in traj_df.groupby('lane_id'): - ax = plt.subplot(nlanes, 1, lane+1) - - plot_tsd(ax, df, segs[lane], args, lane) - else: - # perform plotting operation - fig = plt.figure(figsize=(16, 9)) - ax = plt.axes() - - plot_tsd(ax, traj_df, segs, args) - - ########################################################################### - # Note: For MergeNetwork only # - if flow_params['network'] == 'MergeNetwork': # - plt.plot([df['time_step'].min(), df['time_step'].max()], - [0, 0], linewidth=3, color="white") # - plt.plot([df['time_step'].min(), df['time_step'].max()], - [-0.1, -0.1], linewidth=3, color="white") # - ########################################################################### - - plt.show() + tsd_main( + args.trajectory_path, + flow_params, + min_speed=args.min_speed, + max_speed=args.max_speed + ) diff --git a/flow/visualize/transfer/util.py b/flow/visualize/transfer/util.py new file mode 100644 index 000000000..8c933c5a3 --- /dev/null +++ b/flow/visualize/transfer/util.py @@ -0,0 +1,141 @@ +"""Definitions of transfer classes.""" +from copy import deepcopy + +from flow.core.params import InFlows +from examples.exp_configs.rl.multiagent.multiagent_i210 import INFLOW_RATE, ON_RAMP_INFLOW_RATE + + +def make_inflows(pr=0.1, fr_coef=1.0, departSpeed=20, on_ramp=False): + """Generate inflows object from parameters. Uses default inflows from multiagent_i210. + + Keyword Arguments: + ----------------- + pr {float} -- [AV Penetration Rate] (default: {0.1}) + fr_coef {float} -- [Scale flow rate by] (default: {1.0}) + departSpeed {int} -- [Initial speed of all flows] (default: {20}) + + Returns + ------- + [Inflows] -- [Inflows parameter object] + + """ + inflow = InFlows() + # main highway + assert pr < 1.0, "your penetration rate is over 100%" + + all_inflows = [] + + inflow_119257914 = dict(veh_type="human", + edge="ghost0", + vehs_per_hour=INFLOW_RATE * (1 - (pr)) * fr_coef, + # probability=1.0, + departLane="random", + departSpeed=departSpeed) + all_inflows.append(inflow_119257914) + + if pr > 0.0: + inflow_119257914_av = dict(veh_type="av", + edge="ghost0", + vehs_per_hour=int(INFLOW_RATE * pr * fr_coef), + # probability=1.0, + departLane="random", + departSpeed=departSpeed) + all_inflows.append(inflow_119257914_av) + + if on_ramp: + inflow_27414345 = dict(veh_type="human", + edge="27414345", + vehs_per_hour=ON_RAMP_INFLOW_RATE * (1 - (pr)) * fr_coef, + departLane="random", + departSpeed=departSpeed) + all_inflows.append(inflow_27414345) + if pr > 0.0: + inflow_27414342 = dict(veh_type="human", + edge="27414342#0", + vehs_per_hour=ON_RAMP_INFLOW_RATE * pr * fr_coef, + departLane="random", + departSpeed=departSpeed) + all_inflows.append(inflow_27414342) + + for inflow_def in all_inflows: + inflow.add(**inflow_def) + + return inflow + + +class BaseTransfer: + """Base Transfer class.""" + + def __init__(self): + self.transfer_str = "Base" + pass + + def flow_params_modifier_fn(self, flow_params, clone_params=True): + """Return modified flow_params. + + Arguments: + --------- + flow_params {[flow_params_dictionary]} -- [flow_params] + """ + if clone_params: + flow_params = deepcopy(flow_params) + + return flow_params + + def env_modifier_fn(self, env): + """Modify the env before rollouts are run. + + Arguments: + --------- + env {[I210MultiEnv]} -- [Env to modify] + """ + pass + + +class InflowTransfer(BaseTransfer): + """Modifies the inflow of i210 env.""" + + def __init__(self, penetration_rate=0.1, flow_rate_coef=1.0, departSpeed=20): + super(InflowTransfer, self).__init__() + self.penetration_rate = penetration_rate + self.flow_rate_coef = flow_rate_coef + self.departSpeed = departSpeed + + self.transfer_str = "{:0.2f}_pen_{:0.2f}_flow_rate_coef_{:0.2f}_depspeed".format( + penetration_rate, flow_rate_coef, departSpeed) + + def flow_params_modifier_fn(self, flow_params, clone_params=True): + """See Parent.""" + if clone_params: + flow_params = deepcopy(flow_params) + + flow_params['net'].inflows = make_inflows(self.penetration_rate, self.flow_rate_coef, self.departSpeed) + + return flow_params + + +def inflows_range(penetration_rates=0.1, flow_rate_coefs=1.0, departSpeeds=20.0): + """Generate inflow objects given penetration_rates, flow_rates, and depart speeds. + + Keyword Arguments: + ----------------- + penetration_rates {float | list of floats} -- [single, or multiple penetration rates] (default: {0.1}) + flow_rate_coefs {float | list of floats} -- [single, or multiple flow rate coefficient] (default: {1.0}) + departSpeeds {float | list of floats} -- [single, or multiple depart speeds] (default: {20.0}) + + Yields + ------ + [InflowTransfer] -- [Transfer object] + """ + if not hasattr(penetration_rates, '__iter__'): + penetration_rates = [penetration_rates] + if not hasattr(flow_rate_coefs, '__iter__'): + flow_rate_coefs = [flow_rate_coefs] + if not hasattr(departSpeeds, '__iter__'): + departSpeeds = [departSpeeds] + + for departSpeed in departSpeeds: + for penetration_rate in penetration_rates: + for flow_rate_coef in flow_rate_coefs: + yield InflowTransfer(penetration_rate=penetration_rate, flow_rate_coef=flow_rate_coef, + departSpeed=departSpeed) diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index 8c38a91c1..e1f09fafd 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -26,12 +26,18 @@ from ray.rllib.agents.registry import get_agent_class from ray.tune.registry import register_env +from flow.core.rewards import instantaneous_mpg from flow.core.util import emission_to_csv from flow.utils.registry import make_create_env from flow.utils.rllib import get_flow_params from flow.utils.rllib import get_rllib_config from flow.utils.rllib import get_rllib_pkl +from flow.data_pipeline.data_pipeline import write_dict_to_csv, upload_to_s3, get_extra_info, get_configuration +from flow.data_pipeline.leaderboard_utils import network_name_translate +from collections import defaultdict +from datetime import datetime, timezone +import uuid EXAMPLE_USAGE = """ example usage: @@ -90,6 +96,22 @@ def visualizer_rllib(args): sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) + elif config['env_config']['run'] == "": + from flow.controllers.imitation_learning.imitation_trainer import Imitation_PPO_Trainable + from flow.controllers.imitation_learning.ppo_model import PPONetwork + from ray.rllib.models import ModelCatalog + agent_cls = get_agent_class("PPO") + ModelCatalog.register_custom_model("imitation_ppo_trainable", Imitation_PPO_Trainable) + ModelCatalog.register_custom_model("PPO_loaded_weights", PPONetwork) + + elif config['env_config']['run'] == "": + from flow.algorithms.centralized_PPO import CCTrainer, CentralizedCriticModel + from ray.rllib.models import ModelCatalog + agent_cls = CCTrainer + ModelCatalog.register_custom_model("cc_model", CentralizedCriticModel) + elif config['env_config']['run'] == "": + from flow.algorithms.custom_ppo import CustomPPOTrainer + agent_cls = CustomPPOTrainer elif config_run: agent_cls = get_agent_class(config_run) else: @@ -154,19 +176,24 @@ def visualizer_rllib(args): checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) + if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) + # reroute on exit is a training hack, it should be turned off at test time. + if hasattr(env, "reroute_on_exit"): + env.reroute_on_exit = False + if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env if multiagent: rets = {} # map the agent id to its policy - policy_map_fn = config['multiagent']['policy_mapping_fn'].func + policy_map_fn = config['multiagent']['policy_mapping_fn'] for key in config['multiagent']['policies'].keys(): rets[key] = [] else: @@ -177,7 +204,7 @@ def visualizer_rllib(args): if multiagent: state_init = {} # map the agent id to its policy - policy_map_fn = config['multiagent']['policy_mapping_fn'].func + policy_map_fn = config['multiagent']['policy_mapping_fn'] size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [np.zeros(size, np.float32), @@ -194,13 +221,34 @@ def visualizer_rllib(args): if not sim_params.restart_instance: env.restart_simulation(sim_params=sim_params, render=sim_params.render) + # data pipeline + extra_info = defaultdict(lambda: []) + source_id = 'flow_{}'.format(uuid.uuid4().hex) + metadata = defaultdict(lambda: []) + # collect current time + cur_datetime = datetime.now(timezone.utc) + cur_date = cur_datetime.date().isoformat() + cur_time = cur_datetime.time().isoformat() + # collecting information for metadata table + metadata['source_id'].append(source_id) + metadata['submission_time'].append(cur_time) + metadata['network'].append(network_name_translate(env.network.name.split('_20')[0])) + metadata['is_baseline'].append(str(args.is_baseline)) + if args.to_aws: + name, strategy = get_configuration() + metadata['submitter_name'].append(name) + metadata['strategy'].append(strategy) + # Simulate and collect metrics final_outflows = [] final_inflows = [] + mpg = [] mean_speed = [] std_speed = [] for i in range(args.num_rollouts): vel = [] + run_id = "run_{}".format(i) + env.pipeline_params = (extra_info, source_id, run_id) state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} @@ -214,6 +262,8 @@ def visualizer_rllib(args): if speeds: vel.append(np.mean(speeds)) + mpg.append(instantaneous_mpg(env.unwrapped, vehicles.get_ids(), gain=1.0)) + if multiagent: action = {} for agent_id in state.keys(): @@ -228,6 +278,10 @@ def visualizer_rllib(args): else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) + + # collect data for data pipeline + get_extra_info(vehicles, extra_info, vehicles.get_ids(), source_id, run_id) + if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew @@ -279,10 +333,8 @@ def visualizer_rllib(args): print(mean_speed) print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std( mean_speed))) - print("\nSpeed, std (m/s):") - print(std_speed) - print('Average, std: {}, {}'.format(np.mean(std_speed), np.std( - std_speed))) + + print('Average, std miles per gallon: {}, {}'.format(np.mean(mpg), np.std(mpg))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") @@ -323,6 +375,22 @@ def visualizer_rllib(args): # delete the .xml version of the emission file os.remove(emission_path) + # generate datapipeline output + trajectory_table_path = os.path.join(dir_path, '{}.csv'.format(source_id)) + metadata_table_path = os.path.join(dir_path, '{}_METADATA.csv'.format(source_id)) + write_dict_to_csv(trajectory_table_path, extra_info, True) + write_dict_to_csv(metadata_table_path, metadata, True) + + if args.to_aws: + upload_to_s3('circles.data.pipeline', + 'metadata_table/date={0}/partition_name={1}_METADATA/{1}_METADATA.csv'.format(cur_date, + source_id), + metadata_table_path) + upload_to_s3('circles.data.pipeline', + 'fact_vehicle_trace/date={0}/partition_name={1}/{1}.csv'.format(cur_date, source_id), + trajectory_table_path, + {'network': metadata['network'][0]}) + def create_parser(): """Create the parser to capture CLI arguments.""" @@ -376,11 +444,24 @@ def create_parser(): '--horizon', type=int, help='Specifies the horizon.') + parser.add_argument( + '--is_baseline', + action='store_true', + help='specifies whether this is a baseline run' + ) + parser.add_argument( + '--to_aws', + type=str, nargs='?', default=None, const="default", + help='Specifies the name of the partition to store the output' + 'file on S3. Putting not None value for this argument' + 'automatically set gen_emission to True.' + ) return parser if __name__ == '__main__': parser = create_parser() args = parser.parse_args() - ray.init(num_cpus=1) + print("GEN EMISSION: ", args.gen_emission) + ray.init(local_mode=True) visualizer_rllib(args) diff --git a/requirements.txt b/requirements.txt index ccb971a99..a4f6f83f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,19 +9,24 @@ path.py joblib==0.10.3 python-dateutil==2.7.3 cached_property -cloudpickle==1.2.0 pyglet==1.3.2 matplotlib==3.1.0 imutils==0.5.1 numpydoc -ray==0.7.3 +ray==0.8.0 opencv-python dill lz4 setproctitle psutil opencv-python -boto3==1.4.8 +boto3==1.10.45 redis~=2.10.6 pandas==0.24.2 plotly==2.4.0 +tabulate +tensorflow==1.15.2 +awscli==1.16.309 +torch==1.4.0 +pytz +tensorboardX diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index 27ac0898e..18e25154d 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -1,4 +1,4 @@ -# cluster.yaml ========================================= +# cluster.yaml ========================================= # An unique identifier for the head node and workers of this cluster. cluster_name: test # @@ -32,15 +32,15 @@ auth: # By default Ray creates a new private keypair, but you can also use your own. # If you do so, make sure to also set "KeyName" in the head and worker node # configurations below. -# ssh_private_key: /path/to/your/key.pem +# ssh_private_key: # Provider-specific config for the head node, e.g. instance type. By default # Ray will auto-configure unspecified fields such as SubnetId and KeyName. # For more documentation on available fields, see: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances head_node: - InstanceType: c4.4xlarge - ImageId: ami-09544298704576518 # Flow AMI (Ubuntu) + InstanceType: c4.8xlarge + ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) InstanceMarketOptions: MarketType: spot #Additional options can be found in the boto docs, e.g. @@ -54,10 +54,10 @@ head_node: # For more documentation on available fields, see: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances worker_nodes: - InstanceType: c4.4xlarge - ImageId: ami-09544298704576518 # Flow AMI (Ubuntu) + InstanceType: c4.8xlarge + ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) - #Run workers on spot by default. Comment this out to use on-demand. + #Run workers on spot by default. Comment this out to use on-demand. InstanceMarketOptions: MarketType: spot # Additional options can be found in the boto docs, e.g. @@ -67,12 +67,19 @@ worker_nodes: # Additional options in the boto docs. setup_commands: - - cd flow && git fetch && git checkout origin/master + - cd flow && git fetch && git checkout origin/flow_maddpg + - flow/scripts/setup_sumo_ubuntu1604.sh + - pip install ray==0.8.0 + - pip install tabulate - pip install boto3==1.10.45 # 1.4.8 adds InstanceMarketOptions - pip install awscli==1.16.309 + - pip install stable-baselines - pip install pytz + - pip install torch==1.3.1 + - pip install tensorflow==2.0.0 + - pip install lz4 + - pip install dm-tree - pip install numpy==1.18.4 - - ./flow/scripts/setup_sumo_ubuntu1604.sh head_setup_commands: [] diff --git a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 index 0693ed4b6..d346e9dc5 100644 Binary files a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 and b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 differ diff --git a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata index 7eef2ef15..febe7b205 100644 Binary files a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata and b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata differ diff --git a/tests/data/rllib_data/multi_agent/params.json b/tests/data/rllib_data/multi_agent/params.json index 01089f730..39a737f75 100644 --- a/tests/data/rllib_data/multi_agent/params.json +++ b/tests/data/rllib_data/multi_agent/params.json @@ -8,17 +8,19 @@ "on_sample_end": null, "on_train_result": null }, - "clip_actions": false, + "clip_actions": true, "clip_param": 0.3, "clip_rewards": null, "collect_metrics_timeout": 180, "compress_observations": false, "custom_resources_per_worker": {}, + "eager": false, + "eager_tracing": false, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, - "env": "MultiWaveAttenuationPOEnv-v0", + "env": "MultiAgentAccelPOEnv-v1", "env_config": { - "flow_params": "{\n \"env\": {\n \"additional_params\": {\n \"max_accel\": 1,\n \"max_decel\": 1,\n \"ring_length\": [\n 230,\n 230\n ],\n \"target_velocity\": 4\n },\n \"clip_actions\": true,\n \"evaluate\": false,\n \"horizon\": 3000,\n \"sims_per_step\": 1,\n \"warmup_steps\": 750\n },\n \"env_name\": \"MultiWaveAttenuationPOEnv\",\n \"exp_tag\": \"lord_of_numrings1\",\n \"initial\": {\n \"additional_params\": {},\n \"bunching\": 20.0,\n \"edges_distribution\": \"all\",\n \"lanes_distribution\": Infinity,\n \"min_gap\": 0,\n \"perturbation\": 0.0,\n \"shuffle\": false,\n \"spacing\": \"custom\",\n \"x0\": 0\n },\n \"net\": {\n \"additional_params\": {\n \"lanes\": 1,\n \"length\": 230,\n \"num_rings\": 1,\n \"resolution\": 40,\n \"speed_limit\": 30\n },\n \"inflows\": {\n \"_InFlows__flows\": []\n },\n \"osm_path\": null,\n \"template\": null\n },\n \"network\": \"MultiRingNetwork\",\n \"sim\": {\n \"color_vehicles\": true,\n \"emission_path\": null,\n \"lateral_resolution\": null,\n \"no_step_log\": true,\n \"num_clients\": 1,\n \"overtake_right\": false,\n \"port\": null,\n \"print_warnings\": true,\n \"pxpm\": 2,\n \"render\": false,\n \"restart_instance\": false,\n \"save_render\": false,\n \"seed\": null,\n \"show_radius\": false,\n \"sight_radius\": 25,\n \"sim_step\": 0.1,\n \"teleport_time\": -1\n },\n \"simulator\": \"traci\",\n \"veh\": [\n {\n \"acceleration_controller\": [\n \"IDMController\",\n {\n \"noise\": 0.2\n }\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 4.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 25\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 21,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"human_0\"\n },\n {\n \"acceleration_controller\": [\n \"RLController\",\n {}\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 4.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 25\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 1,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"rl_0\"\n }\n ]\n}", + "flow_params": "{\n \"env\": {\n \"done_at_exit\":false,\n \"additional_params\": {\n \"max_accel\": 3,\n \"max_decel\": 3,\n \"sort_vehicles\": false,\n \"target_velocity\": 20\n },\n \"clip_actions\": true,\n \"evaluate\": false,\n \"horizon\": 1500,\n \"sims_per_step\": 1,\n \"warmup_steps\": 0\n },\n \"env_name\": \"flow.envs.multiagent.ring.accel.MultiAgentAccelPOEnv\",\n \"exp_tag\": \"multiagent_figure_eight\",\n \"initial\": {\n \"additional_params\": {},\n \"bunching\": 0,\n \"edges_distribution\": \"all\",\n \"lanes_distribution\": Infinity,\n \"min_gap\": 0,\n \"perturbation\": 0.0,\n \"shuffle\": false,\n \"spacing\": \"uniform\",\n \"x0\": 0\n },\n \"net\": {\n \"additional_params\": {\n \"lanes\": 1,\n \"radius_ring\": 30,\n \"resolution\": 40,\n \"speed_limit\": 30\n },\n \"inflows\": {\n \"_InFlows__flows\": []\n },\n \"osm_path\": null,\n \"template\": null\n },\n \"network\": \"flow.networks.figure_eight.FigureEightNetwork\",\n \"sim\": {\n \"disable_collisions\": false,\n \"color_by_speed\": false,\n \"emission_path\": null,\n \"force_color_update\": false,\n \"lateral_resolution\": null,\n \"no_step_log\": true,\n \"num_clients\": 1,\n \"overtake_right\": false,\n \"port\": null,\n \"print_warnings\": true,\n \"pxpm\": 2,\n \"render\": false,\n \"restart_instance\": false,\n \"save_render\": false,\n \"seed\": null,\n \"show_radius\": false,\n \"sight_radius\": 25,\n \"sim_step\": 0.1,\n \"teleport_time\": -1,\n \"use_ballistic\": false\n },\n \"simulator\": \"traci\",\n \"veh\": [\n {\n \"acceleration_controller\": [\n \"IDMController\",\n {\n \"noise\": 0.2\n }\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 1.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 6,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"human_0\"\n },\n {\n \"acceleration_controller\": [\n \"RLController\",\n {}\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 3,\n \"carFollowModel\": \"IDM\",\n \"decel\": 3,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 1,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"rl_0\"\n },\n {\n \"acceleration_controller\": [\n \"IDMController\",\n {\n \"noise\": 0.2\n }\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 1.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 6,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"human_1\"\n },\n {\n \"acceleration_controller\": [\n \"RLController\",\n {}\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 3,\n \"carFollowModel\": \"IDM\",\n \"decel\": 3,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 1,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"rl_1\"\n }\n ]\n}", "run": "PPO" }, "evaluation_config": {}, @@ -26,7 +28,7 @@ "evaluation_num_episodes": 10, "gamma": 0.999, "grad_clip": null, - "horizon": 3000, + "horizon": 1500, "ignore_worker_failures": false, "input": "sampler", "input_evaluation": [ @@ -34,27 +36,31 @@ "wis" ], "kl_coeff": 0.2, - "kl_target": 0.01, - "lambda": 1.0, + "kl_target": 0.02, + "lambda": 0.97, "local_tf_session_args": { "inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8 }, - "log_level": "INFO", + "log_level": "WARN", "log_sys_usage": true, - "lr": 1e-05, + "lr": 5e-05, "lr_schedule": null, + "memory": 0, + "memory_per_worker": 0, "metrics_smoothing_episodes": 100, "min_iter_time_s": 0, "model": { "conv_activation": "relu", "conv_filters": null, + "custom_action_dist": null, "custom_model": null, "custom_options": {}, "custom_preprocessor": null, "dim": 84, "fcnet_activation": "tanh", "fcnet_hiddens": [ + 32, 32, 32 ], @@ -75,23 +81,25 @@ "policies": { "av": [ "", - "Box(3,)", + "Box(6,)", "Box(1,)", {} ] }, - "policies_to_train": [ - "av" - ], - "policy_mapping_fn": "tune.function(.policy_mapping_fn at 0x7fda132e6c80>)" + "policies_to_train": null, + "policy_mapping_fn": "" }, + "no_done_at_end": false, + "no_eager_on_workers": false, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "num_envs_per_worker": 1, "num_gpus": 0, "num_gpus_per_worker": 0, - "num_sgd_iter": 30, + "num_sgd_iter": 10, "num_workers": 2, + "object_store_memory": 0, + "object_store_memory_per_worker": 0, "observation_filter": "NoFilter", "optimizer": {}, "output": null, @@ -110,7 +118,7 @@ "sgd_minibatch_size": 128, "shuffle_buffer_size": 0, "shuffle_sequences": true, - "simple_optimizer": true, + "simple_optimizer": false, "soft_horizon": false, "synchronize_filters": true, "tf_session_args": { @@ -126,7 +134,7 @@ "log_device_placement": false }, "timesteps_per_iteration": 0, - "train_batch_size": 60000, + "train_batch_size": 30000, "use_gae": true, "vf_clip_param": 10.0, "vf_loss_coeff": 1.0, diff --git a/tests/data/rllib_data/multi_agent/params.pkl b/tests/data/rllib_data/multi_agent/params.pkl index cd832aa1c..192cf7558 100644 Binary files a/tests/data/rllib_data/multi_agent/params.pkl and b/tests/data/rllib_data/multi_agent/params.pkl differ diff --git a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1 b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1 index f8a7e8976..b7ae94640 100644 Binary files a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1 and b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1 differ diff --git a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata index e83b72aea..55b72be28 100644 Binary files a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata and b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata differ diff --git a/tests/data/rllib_data/single_agent/params.json b/tests/data/rllib_data/single_agent/params.json index c5e605ef4..2f55a1eed 100644 --- a/tests/data/rllib_data/single_agent/params.json +++ b/tests/data/rllib_data/single_agent/params.json @@ -8,25 +8,27 @@ "on_sample_end": null, "on_train_result": null }, - "clip_actions": false, + "clip_actions": true, "clip_param": 0.3, "clip_rewards": null, "collect_metrics_timeout": 180, "compress_observations": false, "custom_resources_per_worker": {}, + "eager": false, + "eager_tracing": false, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, - "env": "WaveAttenuationPOEnv-v0", + "env": "AccelEnv-v0", "env_config": { - "flow_params": "{\n \"env\": {\n \"additional_params\": {\n \"max_accel\": 1,\n \"max_decel\": 1,\n \"ring_length\": [\n 220,\n 270\n ]\n },\n \"clip_actions\": false,\n \"evaluate\": false,\n \"horizon\": 3000,\n \"sims_per_step\": 1,\n \"warmup_steps\": 750\n },\n \"env_name\": \"WaveAttenuationPOEnv\",\n \"exp_tag\": \"stabilizing_the_ring\",\n \"initial\": {\n \"additional_params\": {},\n \"bunching\": 0,\n \"edges_distribution\": \"all\",\n \"lanes_distribution\": Infinity,\n \"min_gap\": 0,\n \"perturbation\": 0.0,\n \"shuffle\": false,\n \"spacing\": \"uniform\",\n \"x0\": 0\n },\n \"net\": {\n \"additional_params\": {\n \"lanes\": 1,\n \"length\": 260,\n \"resolution\": 40,\n \"speed_limit\": 30\n },\n \"inflows\": {\n \"_InFlows__flows\": []\n },\n \"osm_path\": null,\n \"template\": null\n },\n \"network\": \"RingNetwork\",\n \"sim\": {\n \"color_vehicles\": true,\n \"emission_path\": null,\n \"lateral_resolution\": null,\n \"no_step_log\": true,\n \"num_clients\": 1,\n \"overtake_right\": false,\n \"port\": null,\n \"print_warnings\": true,\n \"pxpm\": 2,\n \"render\": false,\n \"restart_instance\": false,\n \"save_render\": false,\n \"seed\": null,\n \"show_radius\": false,\n \"sight_radius\": 25,\n \"sim_step\": 0.1,\n \"teleport_time\": -1\n },\n \"simulator\": \"traci\",\n \"veh\": [\n {\n \"acceleration_controller\": [\n \"IDMController\",\n {\n \"noise\": 0.2\n }\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 4.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 0,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 25\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 21,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"human\"\n },\n {\n \"acceleration_controller\": [\n \"RLController\",\n {}\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 4.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 25\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 1,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"rl\"\n }\n ]\n}", - "run": "PPO" + "run": "PPO", + "flow_params": "{\n \"env\": {\n \"additional_params\": {\n \"max_accel\": 3,\n \"max_decel\": 3,\n \"sort_vehicles\": false,\n \"target_velocity\": 20\n },\n \"clip_actions\": true,\n \"evaluate\": false,\n \"horizon\": 1500,\n \"sims_per_step\": 1,\n \"warmup_steps\": 0\n },\n \"env_name\": \"flow.envs.ring.accel.AccelEnv\",\n \"exp_tag\": \"singleagent_figure_eight\",\n \"initial\": {\n \"additional_params\": {},\n \"bunching\": 0,\n \"edges_distribution\": \"all\",\n \"lanes_distribution\": Infinity,\n \"min_gap\": 0,\n \"perturbation\": 0.0,\n \"shuffle\": false,\n \"spacing\": \"uniform\",\n \"x0\": 0\n },\n \"net\": {\n \"additional_params\": {\n \"lanes\": 1,\n \"radius_ring\": 30,\n \"resolution\": 40,\n \"speed_limit\": 30\n },\n \"inflows\": {\n \"_InFlows__flows\": []\n },\n \"osm_path\": null,\n \"template\": null\n },\n \"network\": \"flow.networks.figure_eight.FigureEightNetwork\",\n \"sim\": {\n \"disable_collisions\": false,\n \"color_by_speed\": false,\n \"emission_path\": null,\n \"force_color_update\": false,\n \"lateral_resolution\": null,\n \"no_step_log\": true,\n \"num_clients\": 1,\n \"overtake_right\": false,\n \"port\": null,\n \"print_warnings\": true,\n \"pxpm\": 2,\n \"render\": false,\n \"restart_instance\": false,\n \"save_render\": false,\n \"seed\": null,\n \"show_radius\": false,\n \"sight_radius\": 25,\n \"sim_step\": 0.1,\n \"teleport_time\": -1,\n \"use_ballistic\": false\n },\n \"simulator\": \"traci\",\n \"veh\": [\n {\n \"acceleration_controller\": [\n \"IDMController\",\n {\n \"noise\": 0.2\n }\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 1.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 13,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"human\"\n },\n {\n \"acceleration_controller\": [\n \"RLController\",\n {}\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 1.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 1,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"rl\"\n }\n ]\n}" }, "evaluation_config": {}, "evaluation_interval": null, "evaluation_num_episodes": 10, "gamma": 0.999, "grad_clip": null, - "horizon": 3000, + "horizon": 1500, "ignore_worker_failures": false, "input": "sampler", "input_evaluation": [ @@ -40,23 +42,27 @@ "inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8 }, - "log_level": "INFO", + "log_level": "WARN", "log_sys_usage": true, "lr": 5e-05, "lr_schedule": null, + "memory": 0, + "memory_per_worker": 0, "metrics_smoothing_episodes": 100, "min_iter_time_s": 0, "model": { "conv_activation": "relu", "conv_filters": null, + "custom_action_dist": null, "custom_model": null, "custom_options": {}, "custom_preprocessor": null, "dim": 84, "fcnet_activation": "tanh", "fcnet_hiddens": [ - 3, - 3 + 32, + 32, + 32 ], "framestack": true, "free_log_std": false, @@ -76,6 +82,8 @@ "policies_to_train": null, "policy_mapping_fn": null }, + "no_done_at_end": false, + "no_eager_on_workers": false, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "num_envs_per_worker": 1, @@ -83,6 +91,8 @@ "num_gpus_per_worker": 0, "num_sgd_iter": 10, "num_workers": 2, + "object_store_memory": 0, + "object_store_memory_per_worker": 0, "observation_filter": "NoFilter", "optimizer": {}, "output": null, @@ -117,7 +127,7 @@ "log_device_placement": false }, "timesteps_per_iteration": 0, - "train_batch_size": 60000, + "train_batch_size": 30000, "use_gae": true, "vf_clip_param": 10.0, "vf_loss_coeff": 1.0, diff --git a/tests/data/rllib_data/single_agent/params.pkl b/tests/data/rllib_data/single_agent/params.pkl index 511d34343..e69753b7f 100644 Binary files a/tests/data/rllib_data/single_agent/params.pkl and b/tests/data/rllib_data/single_agent/params.pkl differ diff --git a/tests/fast_tests/test_controllers.py b/tests/fast_tests/test_controllers.py index 58967cef8..bef765396 100644 --- a/tests/fast_tests/test_controllers.py +++ b/tests/fast_tests/test_controllers.py @@ -405,6 +405,175 @@ def test_no_crash_LinearOVM(self): self.tearDown_failsafe() +class TestFeasibleAccelFailsafe(TestInstantaneousFailsafe): + """ + Tests that the feasible accel failsafe of the base acceleration controller + does not fail under extreme conditions. + """ + + def test_no_crash_OVM(self): + vehicles = VehicleParams() + vehicles.add( + veh_id="test", + acceleration_controller=(OVMController, { + "fail_safe": "feasible_accel" + }), + routing_controller=(ContinuousRouter, {}), + num_vehicles=10, + ) + + self.setUp_failsafe(vehicles=vehicles) + + # run the experiment, see if it fails + self.exp.run(1) + + self.tearDown_failsafe() + + def test_no_crash_LinearOVM(self): + vehicles = VehicleParams() + vehicles.add( + veh_id="test", + acceleration_controller=(LinearOVM, { + "fail_safe": "feasible_accel" + }), + routing_controller=(ContinuousRouter, {}), + num_vehicles=10, + ) + + self.setUp_failsafe(vehicles=vehicles) + + # run the experiment, see if it fails + self.exp.run(1) + + self.tearDown_failsafe() + + +class TestObeySpeedLimitFailsafe(TestInstantaneousFailsafe): + """ + Tests that the obey speed limit failsafe of the base acceleration controller + does not fail under extreme conditions. + """ + + def test_no_crash_OVM(self): + vehicles = VehicleParams() + vehicles.add( + veh_id="test", + acceleration_controller=(OVMController, { + "fail_safe": "obey_speed_limit" + }), + routing_controller=(ContinuousRouter, {}), + num_vehicles=10, + ) + + self.setUp_failsafe(vehicles=vehicles) + + # run the experiment, see if it fails + self.exp.run(1) + + self.tearDown_failsafe() + + def test_no_crash_LinearOVM(self): + vehicles = VehicleParams() + vehicles.add( + veh_id="test", + acceleration_controller=(LinearOVM, { + "fail_safe": "obey_speed_limit" + }), + routing_controller=(ContinuousRouter, {}), + num_vehicles=10, + ) + + self.setUp_failsafe(vehicles=vehicles) + + # run the experiment, see if it fails + self.exp.run(1) + + self.tearDown_failsafe() + + +class TestBrokenFailsafe(TestInstantaneousFailsafe): + """ + Tests that the failsafe logic triggers exceptions when instantiated + incorrectly. + """ + + def test_invalid_failsafe_string(self): + vehicles = VehicleParams() + vehicles.add( + veh_id="test", + acceleration_controller=(OVMController, { + "fail_safe": "default" + }), + routing_controller=(ContinuousRouter, {}), + num_vehicles=10, + ) + + additional_env_params = { + "target_velocity": 8, + "max_accel": 3, + "max_decel": 3, + "sort_vehicles": False + } + env_params = EnvParams(additional_params=additional_env_params) + + additional_net_params = { + "length": 100, + "lanes": 1, + "speed_limit": 30, + "resolution": 40 + } + net_params = NetParams(additional_params=additional_net_params) + + initial_config = InitialConfig(bunching=10) + + # create the environment and network classes, see that it raises ValueError + with self.assertRaises(ValueError): + ring_road_exp_setup(vehicles=vehicles, + env_params=env_params, + net_params=net_params, + initial_config=initial_config) + + self.tearDown_failsafe() + + def test_invalid_failsafe_type(self): + vehicles = VehicleParams() + vehicles.add( + veh_id="test", + acceleration_controller=(LinearOVM, { + "fail_safe": True + }), + routing_controller=(ContinuousRouter, {}), + num_vehicles=10, + ) + + additional_env_params = { + "target_velocity": 8, + "max_accel": 3, + "max_decel": 3, + "sort_vehicles": False + } + env_params = EnvParams(additional_params=additional_env_params) + + additional_net_params = { + "length": 100, + "lanes": 1, + "speed_limit": 30, + "resolution": 40 + } + net_params = NetParams(additional_params=additional_net_params) + + initial_config = InitialConfig(bunching=10) + + # create the environment and network classes, see that it raises ValueError + with self.assertRaises(ValueError): + ring_road_exp_setup(vehicles=vehicles, + env_params=env_params, + net_params=net_params, + initial_config=initial_config) + + self.tearDown_failsafe() + + class TestStaticLaneChanger(unittest.TestCase): """ Makes sure that vehicles with a static lane-changing controller do not diff --git a/tests/fast_tests/test_environment_base_class.py b/tests/fast_tests/test_environment_base_class.py index ee815393c..b5c6cbc17 100644 --- a/tests/fast_tests/test_environment_base_class.py +++ b/tests/fast_tests/test_environment_base_class.py @@ -13,9 +13,8 @@ from tests.setup_scripts import ring_road_exp_setup, highway_exp_setup import os -import gym.spaces as spaces -from gym.spaces.box import Box import numpy as np +import gym.spaces as spaces os.environ["TEST_FLAG"] = "True" @@ -26,41 +25,6 @@ YELLOW = (255, 255, 0) -class TestFailRLActionsEnv(Env): - """Test environment designed to fail _apply_rl_actions not-implemented test.""" - - @property - def action_space(self): - """See parent class.""" - return Box(low=0, high=0, shape=(0,), dtype=np.float32) # pragma: no cover - - @property - def observation_space(self): - """See parent class.""" - return Box(low=0, high=0, shape=(0,), dtype=np.float32) # pragma: no cover - - def get_state(self, **kwargs): - """See class definition.""" - return np.array([]) # pragma: no cover - - -class TestFailGetStateEnv(Env): - """Test environment designed to fail get_state not-implemented test.""" - - @property - def action_space(self): - """See parent class.""" - return Box(low=0, high=0, shape=(0,), dtype=np.float32) # pragma: no cover - - @property - def observation_space(self): - """See parent class.""" - return Box(low=0, high=0, shape=(0,), dtype=np.float32) # pragma: no cover - - def _apply_rl_actions(self, rl_actions): - return # pragma: no cover - - class TestShuffle(unittest.TestCase): """ Tests that, at resets, the ordering of vehicles changes while the starting @@ -347,34 +311,28 @@ class TestAbstractMethods(unittest.TestCase): """ def setUp(self): - self.env, self.network, _ = ring_road_exp_setup() - self.sim_params = SumoParams() # FIXME: make ambiguous - self.env_params = EnvParams() + env, network, _ = ring_road_exp_setup() + sim_params = SumoParams() # FIXME: make ambiguous + env_params = EnvParams() + self.env = Env(sim_params=sim_params, + env_params=env_params, + network=network) - def test_abstract_base_class(self): - """Checks that instantiating abstract base class raises an error.""" - with self.assertRaises(TypeError): - Env(sim_params=self.sim_params, - env_params=self.env_params, - network=self.network) + def tearDown(self): + self.env.terminate() + self.env = None def test_get_state(self): - """Checks that instantiating without get_state implemented - raises an error. - """ - with self.assertRaises(TypeError): - TestFailGetStateEnv(sim_params=self.sim_params, - env_params=self.env_params, - network=self.network) + """Checks that get_state raises an error.""" + self.assertRaises(NotImplementedError, self.env.get_state) + + def test_compute_reward(self): + """Checks that compute_reward returns 0.""" + self.assertEqual(self.env.compute_reward([]), 0) def test__apply_rl_actions(self): - """Checks that instantiating without _apply_rl_actions - implemented raises an error. - """ - with self.assertRaises(TypeError): - TestFailRLActionsEnv(sim_params=self.sim_params, - env_params=self.env_params, - network=self.network) + self.assertRaises(NotImplementedError, self.env._apply_rl_actions, + rl_actions=None) class TestVehicleColoring(unittest.TestCase): diff --git a/tests/fast_tests/test_examples.py b/tests/fast_tests/test_examples.py index 0b385f28a..85548fc51 100644 --- a/tests/fast_tests/test_examples.py +++ b/tests/fast_tests/test_examples.py @@ -26,7 +26,6 @@ flow_params as multiagent_traffic_light_grid from examples.exp_configs.rl.multiagent.multiagent_highway import flow_params as multiagent_highway -from examples.simulate import parse_args as parse_simulate_args from examples.train import parse_args as parse_train_args from examples.train import run_model_stablebaseline as run_stable_baselines_model from examples.train import setup_exps_rllib as setup_rllib_exps @@ -69,8 +68,11 @@ def test_parse_args(self): 'aimsun': False, 'exp_config': 'exp_config', 'gen_emission': False, + 'is_baseline': False, 'no_render': False, - 'num_runs': 1 + 'num_runs': 1, + 'only_query': "['all']", + 'to_aws': None, }) # test the case when optional args are specified @@ -86,8 +88,11 @@ def test_parse_args(self): 'aimsun': True, 'exp_config': 'exp_config', 'gen_emission': True, + 'is_baseline': False, 'no_render': True, - 'num_runs': 2 + 'num_runs': 2, + 'only_query': "['all']", + 'to_aws': None, }) def test_bottleneck(self): @@ -148,9 +153,12 @@ def test_highway_single(self): @staticmethod def run_simulation(flow_params): + flow_params = deepcopy(flow_params) + # make the horizon small and set render to False flow_params['sim'].render = False flow_params['env'].horizon = 5 + flow_params['env'].warmup_steps = 0 # create an experiment object exp = Experiment(flow_params) @@ -167,12 +175,22 @@ def test_parse_args(self): args = parse_train_args(["exp_config"]) self.assertDictEqual(vars(args), { + 'algorithm': 'PPO', + 'checkpoint_freq': 20, 'exp_config': 'exp_config', + 'exp_title': None, + 'grid_search': False, + 'local_mode': False, 'rl_trainer': 'rllib', 'num_cpus': 1, + 'num_iterations': 200, + 'num_rollouts': 1, 'num_steps': 5000, + 'render': False, 'rollout_size': 1000, - 'checkpoint_path': None + 'checkpoint_path': None, + 'use_s3': False, + 'multi_node': False, }) # test the case when optional args are specified @@ -186,12 +204,22 @@ def test_parse_args(self): ]) self.assertDictEqual(vars(args), { + 'algorithm': 'PPO', + 'checkpoint_freq': 20, 'checkpoint_path': '5', 'exp_config': 'exp_config', + 'exp_title': None, + 'grid_search': False, + 'local_mode': False, 'num_cpus': 1, + 'num_iterations': 200, + 'num_rollouts': 1, 'num_steps': 3, + 'render': False, 'rl_trainer': 'h-baselines', - 'rollout_size': 4 + 'rollout_size': 4, + 'use_s3': False, + 'multi_node': False, }) @@ -203,6 +231,11 @@ class TestStableBaselineExamples(unittest.TestCase): """ @staticmethod def run_exp(flow_params): + # Reduce the number of warmup steps to speedup tests. + flow_params = deepcopy(flow_params) + flow_params['env'].warmup_steps = 0 + + # Run the example. train_model = run_stable_baselines_model(flow_params, 1, 4, 4) train_model.env.close() @@ -229,11 +262,11 @@ class TestHBaselineExamples(unittest.TestCase): confirming that it runs. """ @staticmethod - def run_exp(flow_params, multiagent): + def run_exp(env_name, multiagent): train_h_baselines( - flow_params=flow_params, + env_name=env_name, args=[ - flow_params["env_name"].__name__, + env_name, "--initial_exploration_steps", "1", "--total_steps", "10" ], @@ -241,10 +274,10 @@ def run_exp(flow_params, multiagent): ) def test_singleagent_ring(self): - self.run_exp(singleagent_ring.copy(), multiagent=False) + self.run_exp("singleagent_ring", multiagent=False) def test_multiagent_ring(self): - self.run_exp(multiagent_ring.copy(), multiagent=True) + self.run_exp("multiagent_ring", multiagent=True) class TestRllibExamples(unittest.TestCase): @@ -406,10 +439,16 @@ def test_multiagent_i210(self): @staticmethod def run_exp(flow_params, **kwargs): - alg_run, env_name, config = setup_rllib_exps(flow_params, 1, 1, **kwargs) + # Reduce the number of warmup steps to speedup tests. + flow_params = deepcopy(flow_params) + flow_params['env'].warmup_steps = 0 + + # Run the example. + alg_run, env_name, config = setup_rllib_exps( + flow_params, 1, 1, parse_train_args([""]), **kwargs) try: - ray.init(num_cpus=1) + ray.init(num_cpus=1, local_mode=True) except Exception as e: print("ERROR", e) config['train_batch_size'] = 50 diff --git a/tests/fast_tests/test_experiment_base_class.py b/tests/fast_tests/test_experiment_base_class.py index b3863a77c..8a7a9500c 100644 --- a/tests/fast_tests/test_experiment_base_class.py +++ b/tests/fast_tests/test_experiment_base_class.py @@ -1,6 +1,7 @@ import unittest import os import time +import csv from flow.core.experiment import Experiment from flow.core.params import VehicleParams @@ -168,15 +169,44 @@ def test_convert_to_csv(self): time.sleep(1.0) # check that both the csv file exists and the xml file doesn't. - self.assertFalse(os.path.isfile(dir_path + "/{}-emission.xml".format( + self.assertFalse(os.path.isfile(dir_path + "/{}-0_emission.xml".format( exp.env.network.name))) - self.assertTrue(os.path.isfile(dir_path + "/{}-emission.csv".format( + self.assertTrue(os.path.isfile(dir_path + "/{}-0_emission.csv".format( exp.env.network.name))) + # check that the keys within the emission file matches its expected + # values + with open(dir_path + "/{}-0_emission.csv".format( + exp.env.network.name), "r") as f: + reader = csv.reader(f) + header = next(reader) + + self.assertListEqual(header, [ + "time", + "id", + "x", + "y", + "speed", + "headway", + "leader_id", + "follower_id", + "leader_rel_speed", + "target_accel_with_noise_with_failsafe", + "target_accel_no_noise_no_failsafe", + "target_accel_with_noise_no_failsafe", + "target_accel_no_noise_with_failsafe", + "realized_accel", + "road_grade", + "edge_id", + "lane_number", + "distance", + "relative_position", + ]) + time.sleep(0.1) # delete the files - os.remove(os.path.expanduser(dir_path + "/{}-emission.csv".format( + os.remove(os.path.expanduser(dir_path + "/{}-0_emission.csv".format( exp.env.network.name))) diff --git a/tests/fast_tests/test_files/i210_emission.csv b/tests/fast_tests/test_files/i210_emission.csv index ec63cf9cf..d43c115a4 100644 --- a/tests/fast_tests/test_files/i210_emission.csv +++ b/tests/fast_tests/test_files/i210_emission.csv @@ -1,4 +1,4 @@ -x,time,edge_id,eclass,type,PMx,speed,angle,CO,CO2,electricity,noise,lane_number,NOx,distance,route,y,id,fuel,HC,waiting +x,time,edge_id,eclass,type,PMx,speed,angle,CO,CO2,electricity,noise,lane_number,NOx,relative_position,route,y,id,fuel,HC,waiting 485.04,0.8,119257914,HBEFA3/PC_G_EU4,human,0.05,23.0,119.74,3.32,3793.12,0.0,70.29,1,1.17,5.1,route119257914_0,1068.18,flow_00.0,1.63,0.11,0.0 500.91,1.6,119257914,HBEFA3/PC_G_EU4,human,0.0,22.84,119.74,0.0,0.0,0.0,69.9,1,0.0,23.37,route119257914_0,1059.12,flow_00.0,0.0,0.0,0.0 517.1,2.4,119257914,HBEFA3/PC_G_EU4,human,0.15,23.31,119.74,78.83,7435.5,0.0,71.61,1,2.88,42.02,route119257914_0,1049.87,flow_00.0,3.2,0.54,0.0 diff --git a/tests/fast_tests/test_files/ring_230_emission.csv b/tests/fast_tests/test_files/ring_230_emission.csv index 9051074c8..342c5c7f3 100644 --- a/tests/fast_tests/test_files/ring_230_emission.csv +++ b/tests/fast_tests/test_files/ring_230_emission.csv @@ -1,117 +1,25 @@ -speed,CO,electricity,x,NOx,id,fuel,angle,time,edge_id,eclass,route,waiting,CO2,lane_number,PMx,type,noise,relative_position,HC,y -0.0,164.78,0.0,36.64,1.2,idm_0,1.13,94.02,0.1,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2624.72,0,0.07,idm,55.94,0.0,0.81,-1.65 -0.08,163.5,0.0,36.65,1.21,idm_0,1.13,94.01,0.2,bottom,HBEFA3/PC_G_EU4,routebottom,0.1,2631.03,0,0.07,idm,59.48,0.01,0.81,-1.65 -0.16,162.24,0.0,36.66,1.21,idm_0,1.13,93.98,0.3,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2636.67,0,0.07,idm,59.44,0.02,0.8,-1.65 -0.23,161.0,0.0,36.69,1.21,idm_0,1.14,93.94,0.4,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2641.63,0,0.07,idm,59.4,0.05,0.79,-1.65 -0.31,159.78,0.0,36.72,1.21,idm_0,1.14,93.88,0.5,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2645.91,0,0.06,idm,59.36,0.08,0.79,-1.65 -0.41,158.73,0.0,36.76,1.22,idm_0,1.15,93.8,0.6,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2679.14,0,0.07,idm,60.47,0.12,0.79,-1.65 -0.0,164.78,0.0,46.49,1.2,idm_1,1.13,78.81,0.1,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2624.72,0,0.07,idm,55.94,9.55,0.81,-0.34 -0.08,163.5,0.0,46.5,1.21,idm_1,1.13,78.8,0.2,bottom,HBEFA3/PC_G_EU4,routebottom,0.1,2631.03,0,0.07,idm,59.48,9.55,0.81,-0.33 -0.16,162.24,0.0,46.51,1.21,idm_1,1.13,78.78,0.3,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2636.67,0,0.07,idm,59.44,9.57,0.8,-0.33 -0.23,161.0,0.0,46.54,1.21,idm_1,1.14,78.74,0.4,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2641.63,0,0.07,idm,59.4,9.59,0.79,-0.32 -0.31,159.78,0.0,46.57,1.21,idm_1,1.14,78.7,0.5,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2645.91,0,0.06,idm,59.36,9.62,0.79,-0.31 -0.41,158.73,0.0,46.61,1.22,idm_1,1.15,78.64,0.6,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2679.14,0,0.07,idm,60.47,9.66,0.79,-0.3 -0.0,164.78,0.0,56.08,1.2,idm_10,1.13,304.55,0.1,right,HBEFA3/PC_G_EU4,routeright,0.0,2624.72,0,0.07,idm,55.94,37.95,0.81,69.53 -0.08,163.5,0.0,56.08,1.21,idm_10,1.13,304.54,0.2,right,HBEFA3/PC_G_EU4,routeright,0.1,2631.03,0,0.07,idm,59.48,37.96,0.81,69.53 -0.16,162.24,0.0,56.06,1.21,idm_10,1.13,304.52,0.3,right,HBEFA3/PC_G_EU4,routeright,0.0,2636.67,0,0.07,idm,59.44,37.98,0.8,69.54 -0.23,161.0,0.0,56.04,1.21,idm_10,1.14,304.48,0.4,right,HBEFA3/PC_G_EU4,routeright,0.0,2641.63,0,0.07,idm,59.4,38.0,0.79,69.55 -0.31,159.78,0.0,56.01,1.21,idm_10,1.14,304.44,0.5,right,HBEFA3/PC_G_EU4,routeright,0.0,2645.91,0,0.06,idm,59.36,38.03,0.79,69.57 -0.41,158.73,0.0,55.98,1.22,idm_10,1.15,304.38,0.6,right,HBEFA3/PC_G_EU4,routeright,0.0,2679.14,0,0.07,idm,60.47,38.07,0.79,69.59 -0.0,164.78,0.0,46.95,1.2,idm_11,1.13,289.47,0.1,right,HBEFA3/PC_G_EU4,routeright,0.0,2624.72,0,0.07,idm,55.94,47.5,0.81,73.43 -0.08,163.5,0.0,46.94,1.21,idm_11,1.13,289.45,0.2,right,HBEFA3/PC_G_EU4,routeright,0.1,2631.03,0,0.07,idm,59.48,47.51,0.81,73.43 -0.16,162.24,0.0,46.92,1.21,idm_11,1.13,289.42,0.3,right,HBEFA3/PC_G_EU4,routeright,0.0,2636.67,0,0.07,idm,59.44,47.52,0.8,73.44 -0.23,161.0,0.0,46.9,1.21,idm_11,1.14,289.38,0.4,right,HBEFA3/PC_G_EU4,routeright,0.0,2641.63,0,0.07,idm,59.4,47.55,0.79,73.44 -0.31,159.78,0.0,46.87,1.21,idm_11,1.14,289.32,0.5,right,HBEFA3/PC_G_EU4,routeright,0.0,2645.91,0,0.06,idm,59.36,47.58,0.79,73.45 -0.41,158.73,0.0,46.83,1.22,idm_11,1.15,289.24,0.6,right,HBEFA3/PC_G_EU4,routeright,0.0,2679.14,0,0.07,idm,60.47,47.62,0.79,73.46 -0.0,164.78,0.0,37.11,1.2,idm_12,1.13,274.71,0.1,right,HBEFA3/PC_G_EU4,routeright,0.0,2624.72,0,0.07,idm,55.94,57.05,0.81,74.86 -0.08,163.5,0.0,37.11,1.21,idm_12,1.13,274.7,0.2,right,HBEFA3/PC_G_EU4,routeright,0.1,2631.03,0,0.07,idm,59.48,57.05,0.81,74.86 -0.16,162.24,0.0,37.09,1.21,idm_12,1.13,274.68,0.3,right,HBEFA3/PC_G_EU4,routeright,0.0,2636.67,0,0.07,idm,59.44,57.07,0.8,74.86 -0.23,161.0,0.0,37.07,1.21,idm_12,1.14,274.65,0.4,right,HBEFA3/PC_G_EU4,routeright,0.0,2641.63,0,0.07,idm,59.4,57.09,0.79,74.86 -0.31,159.78,0.0,37.03,1.21,idm_12,1.14,274.6,0.5,right,HBEFA3/PC_G_EU4,routeright,0.0,2645.91,0,0.06,idm,59.36,57.12,0.79,74.86 -0.41,158.73,0.0,36.99,1.22,idm_12,1.15,274.55,0.6,right,HBEFA3/PC_G_EU4,routeright,0.0,2679.14,0,0.07,idm,60.47,57.16,0.79,74.86 -0.0,164.78,0.0,27.19,1.2,idm_13,1.13,259.6,0.1,top,HBEFA3/PC_G_EU4,routetop,0.0,2624.72,0,0.07,idm,55.94,9.09,0.81,73.68 -0.08,163.5,0.0,27.18,1.21,idm_13,1.13,259.58,0.2,top,HBEFA3/PC_G_EU4,routetop,0.1,2631.03,0,0.07,idm,59.48,9.1,0.81,73.68 -0.16,162.24,0.0,27.17,1.21,idm_13,1.13,259.55,0.3,top,HBEFA3/PC_G_EU4,routetop,0.0,2636.67,0,0.07,idm,59.44,9.11,0.8,73.67 -0.23,161.0,0.0,27.14,1.21,idm_13,1.14,259.51,0.4,top,HBEFA3/PC_G_EU4,routetop,0.0,2641.63,0,0.07,idm,59.4,9.14,0.79,73.67 -0.31,159.78,0.0,27.11,1.21,idm_13,1.14,259.45,0.5,top,HBEFA3/PC_G_EU4,routetop,0.0,2645.91,0,0.06,idm,59.36,9.17,0.79,73.66 -0.41,158.73,0.0,27.07,1.22,idm_13,1.15,259.37,0.6,top,HBEFA3/PC_G_EU4,routetop,0.0,2679.14,0,0.07,idm,60.47,9.21,0.79,73.65 -0.0,164.78,0.0,17.96,1.2,idm_14,1.13,244.67,0.1,top,HBEFA3/PC_G_EU4,routetop,0.0,2624.72,0,0.07,idm,55.94,18.64,0.81,70.0 -0.08,163.5,0.0,17.95,1.21,idm_14,1.13,244.66,0.2,top,HBEFA3/PC_G_EU4,routetop,0.1,2631.03,0,0.07,idm,59.48,18.64,0.81,70.0 -0.16,162.24,0.0,17.94,1.21,idm_14,1.13,244.63,0.3,top,HBEFA3/PC_G_EU4,routetop,0.0,2636.67,0,0.07,idm,59.44,18.66,0.8,69.99 -0.23,161.0,0.0,17.92,1.21,idm_14,1.14,244.6,0.4,top,HBEFA3/PC_G_EU4,routetop,0.0,2641.63,0,0.07,idm,59.4,18.68,0.79,69.98 -0.31,159.78,0.0,17.89,1.21,idm_14,1.14,244.55,0.5,top,HBEFA3/PC_G_EU4,routetop,0.0,2645.91,0,0.06,idm,59.36,18.71,0.79,69.96 -0.0,164.78,0.0,9.98,1.2,idm_15,1.13,229.84,0.1,top,HBEFA3/PC_G_EU4,routetop,0.0,2624.72,0,0.07,idm,55.94,28.18,0.81,64.07 -0.08,163.5,0.0,9.98,1.21,idm_15,1.13,229.83,0.2,top,HBEFA3/PC_G_EU4,routetop,0.1,2631.03,0,0.07,idm,59.48,28.19,0.81,64.07 -0.16,162.24,0.0,9.97,1.21,idm_15,1.13,229.8,0.3,top,HBEFA3/PC_G_EU4,routetop,0.0,2636.67,0,0.07,idm,59.44,28.21,0.8,64.06 -0.23,161.0,0.0,9.95,1.21,idm_15,1.14,229.76,0.4,top,HBEFA3/PC_G_EU4,routetop,0.0,2641.63,0,0.07,idm,59.4,28.23,0.79,64.04 -0.31,159.78,0.0,9.93,1.21,idm_15,1.14,229.7,0.5,top,HBEFA3/PC_G_EU4,routetop,0.0,2645.91,0,0.06,idm,59.36,28.26,0.79,64.02 -0.0,164.78,0.0,3.81,1.2,idm_16,1.13,214.88,0.1,top,HBEFA3/PC_G_EU4,routetop,0.0,2624.72,0,0.07,idm,55.94,37.73,0.81,56.29 -0.08,163.5,0.0,3.81,1.21,idm_16,1.13,214.87,0.2,top,HBEFA3/PC_G_EU4,routetop,0.1,2631.03,0,0.07,idm,59.48,37.74,0.81,56.28 -0.16,162.24,0.0,3.8,1.21,idm_16,1.13,214.85,0.3,top,HBEFA3/PC_G_EU4,routetop,0.0,2636.67,0,0.07,idm,59.44,37.75,0.8,56.27 -0.23,161.0,0.0,3.79,1.21,idm_16,1.14,214.81,0.4,top,HBEFA3/PC_G_EU4,routetop,0.0,2641.63,0,0.07,idm,59.4,37.77,0.79,56.24 -0.31,159.78,0.0,3.77,1.21,idm_16,1.14,214.77,0.5,top,HBEFA3/PC_G_EU4,routetop,0.0,2645.91,0,0.06,idm,59.36,37.81,0.79,56.22 -0.0,164.78,0.0,-0.15,1.2,idm_17,1.13,199.9,0.1,top,HBEFA3/PC_G_EU4,routetop,0.0,2624.72,0,0.07,idm,55.94,47.27,0.81,47.18 -0.08,163.5,0.0,-0.15,1.21,idm_17,1.13,199.88,0.2,top,HBEFA3/PC_G_EU4,routetop,0.1,2631.03,0,0.07,idm,59.48,47.28,0.81,47.17 -0.16,162.24,0.0,-0.16,1.21,idm_17,1.13,199.85,0.3,top,HBEFA3/PC_G_EU4,routetop,0.0,2636.67,0,0.07,idm,59.44,47.3,0.8,47.15 -0.23,161.0,0.0,-0.16,1.21,idm_17,1.14,199.81,0.4,top,HBEFA3/PC_G_EU4,routetop,0.0,2641.63,0,0.07,idm,59.4,47.32,0.79,47.13 -0.31,159.78,0.0,-0.17,1.21,idm_17,1.14,199.75,0.5,top,HBEFA3/PC_G_EU4,routetop,0.0,2645.91,0,0.06,idm,59.36,47.35,0.79,47.1 -0.0,164.78,0.0,-1.64,1.2,idm_18,1.13,185.04,0.1,top,HBEFA3/PC_G_EU4,routetop,0.0,2624.72,0,0.07,idm,55.94,56.82,0.81,37.35 -0.08,163.5,0.0,-1.64,1.21,idm_18,1.13,185.03,0.2,top,HBEFA3/PC_G_EU4,routetop,0.1,2631.03,0,0.07,idm,59.48,56.83,0.81,37.34 -0.16,162.24,0.0,-1.64,1.21,idm_18,1.13,185.0,0.3,top,HBEFA3/PC_G_EU4,routetop,0.0,2636.67,0,0.07,idm,59.44,56.84,0.8,37.33 -0.23,161.0,0.0,-1.64,1.21,idm_18,1.14,184.97,0.4,top,HBEFA3/PC_G_EU4,routetop,0.0,2641.63,0,0.07,idm,59.4,56.87,0.79,37.3 -0.31,159.78,0.0,-1.64,1.21,idm_18,1.14,184.93,0.5,top,HBEFA3/PC_G_EU4,routetop,0.0,2645.91,0,0.06,idm,59.36,56.9,0.79,37.27 -0.0,164.78,0.0,-0.52,1.2,idm_19,1.13,170.03,0.1,left,HBEFA3/PC_G_EU4,routeleft,0.0,2624.72,0,0.07,idm,55.94,8.86,0.81,27.42 -0.08,163.5,0.0,-0.52,1.21,idm_19,1.13,170.01,0.2,left,HBEFA3/PC_G_EU4,routeleft,0.1,2631.03,0,0.07,idm,59.48,8.87,0.81,27.41 -0.16,162.24,0.0,-0.51,1.21,idm_19,1.13,169.98,0.3,left,HBEFA3/PC_G_EU4,routeleft,0.0,2636.67,0,0.07,idm,59.44,8.89,0.8,27.39 -0.23,161.0,0.0,-0.51,1.21,idm_19,1.14,169.94,0.4,left,HBEFA3/PC_G_EU4,routeleft,0.0,2641.63,0,0.07,idm,59.4,8.91,0.79,27.37 -0.31,159.78,0.0,-0.5,1.21,idm_19,1.14,169.88,0.5,left,HBEFA3/PC_G_EU4,routeleft,0.0,2645.91,0,0.06,idm,59.36,8.94,0.79,27.34 -0.0,164.78,0.0,55.68,1.2,idm_2,1.13,64.0,0.1,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2624.72,0,0.07,idm,55.94,19.09,0.81,3.45 -0.08,163.5,0.0,55.68,1.21,idm_2,1.13,63.99,0.2,bottom,HBEFA3/PC_G_EU4,routebottom,0.1,2631.03,0,0.07,idm,59.48,19.1,0.81,3.45 -0.16,162.24,0.0,55.7,1.21,idm_2,1.13,63.97,0.3,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2636.67,0,0.07,idm,59.44,19.11,0.8,3.46 -0.23,161.0,0.0,55.72,1.21,idm_2,1.14,63.93,0.4,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2641.63,0,0.07,idm,59.4,19.14,0.79,3.47 -0.31,159.78,0.0,55.75,1.21,idm_2,1.14,63.88,0.5,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2645.91,0,0.06,idm,59.36,19.17,0.79,3.49 -0.0,164.78,0.0,3.11,1.2,idm_20,1.13,155.0,0.1,left,HBEFA3/PC_G_EU4,routeleft,0.0,2624.72,0,0.07,idm,55.94,18.41,0.81,18.17 -0.08,163.5,0.0,3.11,1.21,idm_20,1.13,154.99,0.2,left,HBEFA3/PC_G_EU4,routeleft,0.1,2631.03,0,0.07,idm,59.48,18.42,0.81,18.16 -0.16,162.24,0.0,3.12,1.21,idm_20,1.13,154.96,0.3,left,HBEFA3/PC_G_EU4,routeleft,0.0,2636.68,0,0.07,idm,59.44,18.43,0.8,18.15 -0.23,161.0,0.0,3.13,1.21,idm_20,1.14,154.93,0.4,left,HBEFA3/PC_G_EU4,routeleft,0.0,2641.7,0,0.07,idm,59.41,18.46,0.79,18.12 -0.31,159.77,0.0,3.15,1.21,idm_20,1.14,154.89,0.5,left,HBEFA3/PC_G_EU4,routeleft,0.0,2646.14,0,0.06,idm,59.37,18.49,0.79,18.1 -0.0,164.78,0.0,8.98,1.2,idm_21,1.13,140.22,0.1,left,HBEFA3/PC_G_EU4,routeleft,0.0,2624.72,0,0.07,idm,55.94,27.95,0.81,10.15 -0.1,163.3,0.0,8.99,1.21,idm_21,1.13,140.21,0.2,left,HBEFA3/PC_G_EU4,routeleft,0.1,2637.25,0,0.07,idm,60.3,27.96,0.81,10.15 -0.2,161.84,0.0,9.0,1.21,idm_21,1.14,140.18,0.3,left,HBEFA3/PC_G_EU4,routeleft,0.0,2649.89,0,0.07,idm,60.34,27.98,0.8,10.13 -0.29,160.38,0.0,9.02,1.21,idm_21,1.14,140.14,0.4,left,HBEFA3/PC_G_EU4,routeleft,0.0,2662.63,0,0.07,idm,60.37,28.01,0.79,10.11 -0.39,158.94,0.0,9.05,1.22,idm_21,1.15,140.07,0.5,left,HBEFA3/PC_G_EU4,routeleft,0.0,2675.48,0,0.07,idm,60.41,28.05,0.79,10.08 -0.0,164.78,0.0,63.57,1.2,idm_3,1.13,49.05,0.1,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2624.72,0,0.07,idm,55.94,28.64,0.81,9.48 -0.08,163.5,0.0,63.58,1.21,idm_3,1.13,49.04,0.2,bottom,HBEFA3/PC_G_EU4,routebottom,0.1,2631.03,0,0.07,idm,59.48,28.64,0.81,9.49 -0.16,162.24,0.0,63.59,1.21,idm_3,1.13,49.02,0.3,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2636.67,0,0.07,idm,59.44,28.66,0.8,9.5 -0.23,161.0,0.0,63.61,1.21,idm_3,1.14,48.99,0.4,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2641.63,0,0.07,idm,59.4,28.68,0.79,9.52 -0.31,159.78,0.0,63.63,1.21,idm_3,1.14,48.94,0.5,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2645.91,0,0.06,idm,59.36,28.71,0.79,9.54 -0.0,164.78,0.0,69.65,1.2,idm_4,1.13,34.22,0.1,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2624.72,0,0.07,idm,55.94,38.18,0.81,17.34 -0.08,163.5,0.0,69.65,1.21,idm_4,1.13,34.21,0.2,bottom,HBEFA3/PC_G_EU4,routebottom,0.1,2631.03,0,0.07,idm,59.48,38.19,0.81,17.35 -0.16,162.24,0.0,69.66,1.21,idm_4,1.13,34.19,0.3,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2636.67,0,0.07,idm,59.44,38.21,0.8,17.36 -0.23,161.0,0.0,69.68,1.21,idm_4,1.14,34.15,0.4,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2641.63,0,0.07,idm,59.4,38.23,0.79,17.38 -0.31,159.78,0.0,69.69,1.21,idm_4,1.14,34.11,0.5,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2645.91,0,0.06,idm,59.36,38.26,0.79,17.41 -0.0,164.78,0.0,73.49,1.2,idm_5,1.13,19.04,0.1,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2624.72,0,0.07,idm,55.94,47.73,0.81,26.5 -0.08,163.5,0.0,73.5,1.21,idm_5,1.13,19.02,0.2,bottom,HBEFA3/PC_G_EU4,routebottom,0.1,2631.03,0,0.07,idm,59.48,47.74,0.81,26.51 -0.16,162.24,0.0,73.5,1.21,idm_5,1.13,18.99,0.3,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2636.67,0,0.07,idm,59.44,47.75,0.8,26.53 -0.23,161.0,0.0,73.51,1.21,idm_5,1.14,18.95,0.4,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2641.63,0,0.07,idm,59.4,47.77,0.79,26.55 -0.31,159.78,0.0,73.52,1.21,idm_5,1.14,18.91,0.5,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2645.91,0,0.06,idm,59.36,47.81,0.79,26.58 -0.0,164.78,0.0,74.87,1.2,idm_6,1.13,4.39,0.1,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2624.72,0,0.07,idm,55.94,57.27,0.81,36.34 -0.08,163.5,0.0,74.87,1.21,idm_6,1.13,4.38,0.2,bottom,HBEFA3/PC_G_EU4,routebottom,0.1,2631.03,0,0.07,idm,59.48,57.28,0.81,36.35 -0.16,162.24,0.0,74.87,1.21,idm_6,1.13,4.36,0.3,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2636.67,0,0.07,idm,59.44,57.3,0.8,36.37 -0.23,161.0,0.0,74.87,1.21,idm_6,1.14,4.32,0.4,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2641.63,0,0.07,idm,59.4,57.32,0.79,36.39 -0.31,159.78,0.0,74.87,1.21,idm_6,1.14,4.28,0.5,bottom,HBEFA3/PC_G_EU4,routebottom,0.0,2645.91,0,0.06,idm,59.36,57.35,0.79,36.42 -0.0,164.78,0.0,73.62,1.2,idm_7,1.13,349.16,0.1,right,HBEFA3/PC_G_EU4,routeright,0.0,2624.72,0,0.07,idm,55.94,9.32,0.81,46.26 -0.08,163.5,0.0,73.62,1.21,idm_7,1.13,349.15,0.2,right,HBEFA3/PC_G_EU4,routeright,0.1,2631.03,0,0.07,idm,59.48,9.33,0.81,46.27 -0.16,162.24,0.0,73.61,1.21,idm_7,1.13,349.12,0.3,right,HBEFA3/PC_G_EU4,routeright,0.0,2636.67,0,0.07,idm,59.44,9.34,0.8,46.28 -0.23,161.0,0.0,73.6,1.21,idm_7,1.14,349.07,0.4,right,HBEFA3/PC_G_EU4,routeright,0.0,2641.63,0,0.07,idm,59.4,9.37,0.79,46.31 -0.31,159.78,0.0,73.6,1.21,idm_7,1.14,349.01,0.5,right,HBEFA3/PC_G_EU4,routeright,0.0,2645.91,0,0.06,idm,59.36,9.4,0.79,46.34 -0.0,164.78,0.0,69.89,1.2,idm_8,1.13,334.33,0.1,right,HBEFA3/PC_G_EU4,routeright,0.0,2624.72,0,0.07,idm,55.94,18.86,0.81,55.47 -0.08,163.5,0.0,69.88,1.21,idm_8,1.13,334.32,0.2,right,HBEFA3/PC_G_EU4,routeright,0.1,2631.03,0,0.07,idm,59.48,18.87,0.81,55.47 -0.16,162.24,0.0,69.87,1.21,idm_8,1.13,334.3,0.3,right,HBEFA3/PC_G_EU4,routeright,0.0,2636.67,0,0.07,idm,59.44,18.89,0.8,55.49 -0.23,161.0,0.0,69.86,1.21,idm_8,1.14,334.27,0.4,right,HBEFA3/PC_G_EU4,routeright,0.0,2641.63,0,0.07,idm,59.4,18.91,0.79,55.51 -0.31,159.78,0.0,69.85,1.21,idm_8,1.14,334.22,0.5,right,HBEFA3/PC_G_EU4,routeright,0.0,2645.91,0,0.06,idm,59.36,18.94,0.79,55.54 -0.0,164.78,0.0,63.91,1.2,idm_9,1.13,319.44,0.1,right,HBEFA3/PC_G_EU4,routeright,0.0,2624.72,0,0.07,idm,55.94,28.41,0.81,63.4 -0.08,163.5,0.0,63.9,1.21,idm_9,1.13,319.42,0.2,right,HBEFA3/PC_G_EU4,routeright,0.1,2631.03,0,0.07,idm,59.48,28.42,0.81,63.41 -0.16,162.24,0.0,63.89,1.21,idm_9,1.13,319.39,0.3,right,HBEFA3/PC_G_EU4,routeright,0.0,2636.67,0,0.07,idm,59.44,28.43,0.8,63.42 -0.23,161.0,0.0,63.87,1.21,idm_9,1.14,319.35,0.4,right,HBEFA3/PC_G_EU4,routeright,0.0,2641.63,0,0.07,idm,59.4,28.46,0.79,63.44 -0.31,159.78,0.0,63.85,1.21,idm_9,1.14,319.3,0.5,right,HBEFA3/PC_G_EU4,routeright,0.0,2645.91,0,0.06,idm,59.36,28.49,0.79,63.46 +time,id,x,y,speed,headway,leader_id,follower_id,leader_rel_speed,target_accel_with_noise_with_failsafe,target_accel_no_noise_no_failsafe,target_accel_with_noise_no_failsafe,target_accel_no_noise_with_failsafe,realized_accel,road_grade,edge_id,lane_number,distance,relative_position +0.0,idm_0,36.64,-1.6,0.0,4.545454545454547,idm_1,idm_21,0.0,0.0,0.0,0.0,0.0,0.0,0,bottom,0,0.0,0.0 +0.1,idm_0,36.648322761506634,-1.599834647122385,0.07984158415841586,4.545454545454546,idm_1,idm_21,0.0,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.7984158415841586,0,bottom,0,0.007984158415841587,0.007984158415841587 +0.2,idm_0,36.66480556684144,-1.599507174168713,0.15812219156578355,4.545454545454545,idm_1,idm_21,0.0,0.7906341348144134,0.7906341348144134,0.7906341348144134,0.7906341348144134,0.7828060740736771,0,bottom,0,0.023796377572419945,0.023796377572419945 +0.3,idm_0,36.68928269645688,-1.599020873580327,0.23481302481051264,4.545454545454546,idm_1,idm_21,5.551115123125783e-17,0.7745774157717638,0.7745774157717638,0.7745774157717638,0.7745774157717638,0.7669083324472908,0,bottom,0,0.04727768005347121,0.04727768005347121 +0.0,idm_1,46.477059895666216,-0.2910450274933619,0.0,4.545454545454547,idm_2,idm_0,0.0,0.0,0.0,0.0,0.0,0.0,0,bottom,0,0.0,9.545454545454547 +0.1,idm_1,46.48510950976829,-0.2889238453988948,0.07984158415841586,4.545454545454547,idm_2,idm_0,0.0,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.7984158415841586,0,bottom,0,0.007984158415840879,9.553438703870388 +0.2,idm_1,46.5010513605782,-0.2847229522800698,0.15812219156578355,4.5454545454545485,idm_2,idm_0,0.0,0.7906341348144135,0.7906341348144135,0.7906341348144135,0.7906341348144135,0.7828060740736771,0,bottom,0,0.023796377572418592,9.569250923026964 +0.3,idm_1,46.524725167351825,-0.2784845842789108,0.2348130248105127,4.5454545454545485,idm_2,idm_0,-5.551115123125783e-17,0.7745774157717642,0.7745774157717642,0.7745774157717642,0.7745774157717642,0.7669083324472914,0,bottom,0,0.04727768005347066,9.592732225508016 +0.0,idm_2,55.65270828548022,3.488595652781747,0.0,4.545454545454547,idm_3,idm_1,0.0,0.0,0.0,0.0,0.0,0.0,0,bottom,0,0.0,19.090909090909093 +0.1,idm_2,55.66000796138611,3.4925969566116453,0.07984158415841586,4.545454545454547,idm_3,idm_1,0.0,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.7984158415841586,0,bottom,0,0.007984158415840879,19.098893249324934 +0.2,idm_2,55.67446459778839,3.5005213350840068,0.15812219156578355,4.545454545454547,idm_3,idm_1,0.0,0.7906341348144135,0.7906341348144135,0.7906341348144135,0.7906341348144135,0.7828060740736771,0,bottom,0,0.02379637757242037,19.114705468481514 +0.3,idm_2,55.69593284641682,3.5122891158136613,0.23481302481051264,4.545454545454547,idm_3,idm_1,5.551115123125783e-17,0.7745774157717641,0.7745774157717641,0.7745774157717641,0.7745774157717641,0.7669083324472908,0,bottom,0,0.04727768005347244,19.138186770962566 +0.0,idm_3,63.54122270574333,9.511222705743334,0.0,4.545454545454547,idm_4,idm_2,0.0,0.0,0.0,0.0,0.0,0.0,0,bottom,0,0.0,28.63636363636364 +0.1,idm_3,63.54710894820649,9.517108948206497,0.07984158415841586,4.545454545454549,idm_4,idm_2,0.0,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.7984158415841586,0,bottom,0,0.007984158415840879,28.64434779477948 +0.2,idm_3,63.558766351653254,9.528766351653257,0.15812219156578355,4.545454545454549,idm_4,idm_2,0.0,0.7906341348144138,0.7906341348144138,0.7906341348144138,0.7906341348144138,0.7828060740736771,0,bottom,0,0.02379637757242037,28.660160013936057 +0.3,idm_3,63.57607771154312,9.546077711543122,0.2348130248105127,4.545454545454549,idm_4,idm_2,-5.551115123125783e-17,0.7745774157717643,0.7745774157717643,0.7745774157717643,0.7745774157717643,0.7669083324472914,0,bottom,0,0.04727768005347244,28.683641316417113 +0.0,idm_4,69.61055207686752,17.363529025870548,0.0,4.545454545454547,idm_5,idm_3,0.0,0.0,0.0,0.0,0.0,0.0,0,bottom,0,0.0,38.18181818181819 +0.1,idm_4,69.61489064350748,17.370633428743492,0.07984158415841586,4.545454545454547,idm_5,idm_3,0.0,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.7984158415841586,0,bottom,0,0.007984158415844433,38.18980234023403 +0.2,idm_4,69.62348295380036,17.384703336848084,0.15812219156578355,4.545454545454547,idm_5,idm_3,0.0,0.7906341348144135,0.7906341348144135,0.7906341348144135,0.7906341348144135,0.7828060740736771,0,bottom,0,0.023796377572423918,38.20561455939061 +0.3,idm_4,69.63624261991681,17.40559729011379,0.23481302481051264,4.545454545454547,idm_5,idm_3,0.0,0.7745774157717641,0.7745774157717641,0.7745774157717641,0.7745774157717641,0.7669083324472908,0,bottom,0,0.04727768005347599,38.229095861871656 +0.0,idm_5,73.45066460734851,26.51380415096358,0.0,4.545454545454547,idm_6,idm_4,0.0,0.0,0.0,0.0,0.0,0.0,0,bottom,0,0.0,47.72727272727274 +0.1,idm_5,73.45278578944298,26.521853765065657,0.07984158415841586,4.545454545454547,idm_6,idm_4,-1.3877787807814454e-17,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.8064000000000001,0.7984158415841586,0,bottom,0,0.007984158415844433,47.73525688568858 +0.2,idm_5,73.4569866825618,26.53779561587557,0.15812219156578355,4.545454545454547,idm_6,idm_4,-5.551115123125783e-17,0.7906341348144135,0.7906341348144135,0.7906341348144135,0.7906341348144135,0.7828060740736771,0,bottom,0,0.023796377572423918,47.75106910484515 +0.3,idm_5,73.46322505056297,26.561469422649196,0.23481302481051264,4.545454545454547,idm_6,idm_4,-1.1102230246251563e-16,0.7745774157717641,0.7745774157717641,0.7745774157717641,0.7745774157717641,0.7669083324472908,0,bottom,0,0.04727768005347599,47.77455040732621 diff --git a/tests/fast_tests/test_rewards.py b/tests/fast_tests/test_rewards.py index 3f2e08cde..ac406b545 100644 --- a/tests/fast_tests/test_rewards.py +++ b/tests/fast_tests/test_rewards.py @@ -7,7 +7,6 @@ from flow.core.rewards import average_velocity, min_delay from flow.core.rewards import desired_velocity, boolean_action_penalty from flow.core.rewards import penalize_near_standstill, penalize_standstill -from flow.core.rewards import energy_consumption os.environ["TEST_FLAG"] = "True" @@ -152,31 +151,6 @@ def test_penalize_near_standstill(self): self.assertEqual(penalize_near_standstill(env, thresh=2), -10) self.assertEqual(penalize_near_standstill(env, thresh=0.5), -9) - def test_energy_consumption(self): - """Test the energy consumption method.""" - vehicles = VehicleParams() - vehicles.add("test", num_vehicles=10) - - env_params = EnvParams(additional_params={ - "target_velocity": 10, "max_accel": 1, "max_decel": 1, - "sort_vehicles": False}) - - env, _, _ = ring_road_exp_setup(vehicles=vehicles, - env_params=env_params) - - # check the penalty is zero at speed zero - self.assertEqual(energy_consumption(env, gain=1), 0) - - # change the speed of one vehicle - env.k.vehicle.test_set_speed("test_0", 1) - self.assertEqual(energy_consumption(env), -12.059337750000001) - - # check that stepping change the previous speeds and increases the energy consumption - env.step(rl_actions=None) - env.step(rl_actions=None) - self.assertGreater(env.k.vehicle.get_previous_speed("test_0"), 0.0) - self.assertLess(energy_consumption(env), -12.059337750000001) - def test_boolean_action_penalty(self): """Test the boolean_action_penalty method.""" actions = [False, False, False, False, False] diff --git a/tests/fast_tests/test_scenarios.py b/tests/fast_tests/test_scenarios.py index 5fccdcb3b..2263f3474 100644 --- a/tests/fast_tests/test_scenarios.py +++ b/tests/fast_tests/test_scenarios.py @@ -5,11 +5,8 @@ from flow.networks import BottleneckNetwork, FigureEightNetwork, \ TrafficLightGridNetwork, HighwayNetwork, RingNetwork, MergeNetwork, \ MiniCityNetwork, MultiRingNetwork -from flow.networks import I210SubNetwork from tests.setup_scripts import highway_exp_setup -import flow.config as config - __all__ = [ "MultiRingNetwork", "MiniCityNetwork" ] @@ -136,7 +133,7 @@ def test_ghost_edge(self): self.assertEqual(env.k.network.speed_limit("highway_0"), 30) # =================================================================== # - # With a ghost edge (300m, 25m/s) # + # With a ghost edge # # =================================================================== # # create the network @@ -147,37 +144,7 @@ def test_ghost_edge(self): "speed_limit": 30, "num_edges": 1, "use_ghost_edge": True, - "ghost_speed_limit": 25, - "boundary_cell_length": 300, - }) - ) - env.reset() - - # check the network length - self.assertEqual(env.k.network.length(), 1300.1) - - # check the edge list - self.assertEqual(env.k.network.get_edge_list(), - ["highway_0", "highway_end"]) - - # check the speed limits of the edges - self.assertEqual(env.k.network.speed_limit("highway_0"), 30) - self.assertEqual(env.k.network.speed_limit("highway_end"), 25) - - # =================================================================== # - # With a ghost edge (500m, 10m/s) # - # =================================================================== # - - # create the network - env, _, _ = highway_exp_setup( - net_params=NetParams(additional_params={ - "length": 1000, - "lanes": 4, - "speed_limit": 30, - "num_edges": 1, - "use_ghost_edge": True, - "ghost_speed_limit": 10, - "boundary_cell_length": 500, + "ghost_speed_limit": 25 }) ) env.reset() @@ -191,7 +158,7 @@ def test_ghost_edge(self): # check the speed limits of the edges self.assertEqual(env.k.network.speed_limit("highway_0"), 30) - self.assertEqual(env.k.network.speed_limit("highway_end"), 10) + self.assertEqual(env.k.network.speed_limit("highway_end"), 25) class TestRingNetwork(unittest.TestCase): @@ -254,150 +221,6 @@ def test_additional_net_params(self): ) -class TestI210SubNetwork(unittest.TestCase): - - """Tests I210SubNetwork in flow/networks/i210_subnetwork.py.""" - - def test_additional_net_params(self): - """Ensures that not returning the correct params leads to an error.""" - self.assertTrue( - test_additional_params( - network_class=I210SubNetwork, - additional_params={ - "on_ramp": False, - "ghost_edge": False, - } - ) - ) - - def test_specify_routes(self): - """Validates that the routes are properly specified for the network. - - This is done simply by checking the initial edges routes are specified - from, which alternates based on choice of network configuration. - - This method tests the routes for the following cases: - - 1. on_ramp = False, ghost_edge = False - 2. on_ramp = True, ghost_edge = False - 3. on_ramp = False, ghost_edge = True - 4. on_ramp = True, ghost_edge = True - """ - # test case 1 - network = I210SubNetwork( - name='test-3', - vehicles=VehicleParams(), - net_params=NetParams( - template=os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/test2.net.xml" - ), - additional_params={ - "on_ramp": False, - "ghost_edge": False, - }, - ), - ) - - self.assertEqual( - ['119257914'], - sorted(list(network.specify_routes(network.net_params).keys())) - ) - - del network - - # test case 2 - network = I210SubNetwork( - name='test-3', - vehicles=VehicleParams(), - net_params=NetParams( - template=os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/test2.net.xml" - ), - additional_params={ - "on_ramp": True, - "ghost_edge": True, - }, - ), - ) - - self.assertEqual( - ['119257908#0', - '119257908#1', - '119257908#1-AddedOffRampEdge', - '119257908#1-AddedOnRampEdge', - '119257908#2', - '119257908#3', - '119257914', - '173381935', - '27414342#0', - '27414342#1-AddedOnRampEdge', - '27414345', - 'ghost0'], - sorted(list(network.specify_routes(network.net_params).keys())) - ) - - del network - - # test case 3 - network = I210SubNetwork( - name='test-3', - vehicles=VehicleParams(), - net_params=NetParams( - template=os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/test2.net.xml" - ), - additional_params={ - "on_ramp": False, - "ghost_edge": True, - }, - ), - ) - - self.assertEqual( - ['119257914', 'ghost0'], - sorted(list(network.specify_routes(network.net_params).keys())) - ) - - del network - - # test case 4 - network = I210SubNetwork( - name='test-3', - vehicles=VehicleParams(), - net_params=NetParams( - template=os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/test2.net.xml" - ), - additional_params={ - "on_ramp": True, - "ghost_edge": True, - }, - ), - ) - - self.assertEqual( - ['119257908#0', - '119257908#1', - '119257908#1-AddedOffRampEdge', - '119257908#1-AddedOnRampEdge', - '119257908#2', - '119257908#3', - '119257914', - '173381935', - '27414342#0', - '27414342#1-AddedOnRampEdge', - '27414345', - 'ghost0'], - sorted(list(network.specify_routes(network.net_params).keys())) - ) - - del network - - ############################################################################### # Utility methods # ############################################################################### diff --git a/tests/fast_tests/test_vehicles.py b/tests/fast_tests/test_vehicles.py index 7e1405007..a37b235ff 100644 --- a/tests/fast_tests/test_vehicles.py +++ b/tests/fast_tests/test_vehicles.py @@ -33,7 +33,7 @@ def test_speed_lane_change_modes(self): speed_mode='obey_safe_speed', ), lane_change_params=SumoLaneChangeParams( - lane_change_mode="no_lc_safe", + lane_change_mode="no_lat_collide", ) ) @@ -56,7 +56,7 @@ def test_speed_lane_change_modes(self): self.assertEqual(vehicles.type_parameters["typeB"][ "car_following_params"].speed_mode, 0) self.assertEqual(vehicles.type_parameters["typeB"][ - "lane_change_params"].lane_change_mode, 512) + "lane_change_params"].lane_change_mode, 1621) vehicles.add( "typeC", @@ -89,7 +89,7 @@ def test_controlled_id_params(self): speed_mode="obey_safe_speed", ), lane_change_params=SumoLaneChangeParams( - lane_change_mode="no_lc_safe", + lane_change_mode="no_lat_collide", )) default_mingap = SumoCarFollowingParams().controller_params["minGap"] self.assertEqual(vehicles.types[0]["type_params"]["minGap"], @@ -336,7 +336,6 @@ def test_no_junctions_highway(self): "num_edges": 1, "use_ghost_edge": False, "ghost_speed_limit": 25, - "boundary_cell_length": 300, } net_params = NetParams(additional_params=additional_net_params) vehicles = VehicleParams() @@ -407,7 +406,6 @@ def test_no_junctions_highway(self): "num_edges": 3, "use_ghost_edge": False, "ghost_speed_limit": 25, - "boundary_cell_length": 300, } net_params = NetParams(additional_params=additional_net_params) vehicles = VehicleParams() @@ -477,7 +475,6 @@ def test_no_junctions_highway(self): "num_edges": 3, "use_ghost_edge": False, "ghost_speed_limit": 25, - "boundary_cell_length": 300, } net_params = NetParams(additional_params=additional_net_params) vehicles = VehicleParams() diff --git a/tests/fast_tests/test_visualizers.py b/tests/fast_tests/test_visualizers.py index d2f4a20a4..bc888c498 100644 --- a/tests/fast_tests/test_visualizers.py +++ b/tests/fast_tests/test_visualizers.py @@ -91,114 +91,216 @@ def test_capacity_diagram_generator(self): np.testing.assert_array_almost_equal(std_outflows, expected_stds) def test_time_space_diagram_figure_eight(self): + # check that the exported data matches the expected emission file data + fig8_emission_data = { + 'idm_3': {'pos': [27.25, 28.25, 30.22, 33.17], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.98, 2.95], + 'edge': ['upper_ring', 'upper_ring', 'upper_ring', + 'upper_ring'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_4': {'pos': [56.02, 57.01, 58.99, 61.93], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.98, 2.95], + 'edge': ['upper_ring', 'upper_ring', 'upper_ring', + 'upper_ring'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_5': {'pos': [84.79, 85.78, 87.76, 90.7], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.98, 2.95], + 'edge': ['upper_ring', 'upper_ring', 'upper_ring', + 'upper_ring'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_2': {'pos': [28.77, 29.76, 1.63, 4.58], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.97, 2.95], + 'edge': ['top', 'top', 'upper_ring', 'upper_ring'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_13': {'pos': [106.79, 107.79, 109.77, 112.74], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.98, 2.96], + 'edge': ['lower_ring', 'lower_ring', 'lower_ring', + 'lower_ring'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_9': {'pos': [22.01, 23.0, 24.97, 27.92], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.97, 2.95], + 'edge': ['left', 'left', 'left', 'left'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_6': {'pos': [113.56, 114.55, 116.52, 119.47], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.97, 2.95], + 'edge': ['upper_ring', 'upper_ring', 'upper_ring', + 'upper_ring'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_8': {'pos': [29.44, 0.28, 2.03, 4.78], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.84, 1.76, 2.75], + 'edge': ['right', ':center_0', ':center_0', + ':center_0'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_12': {'pos': [78.03, 79.02, 80.99, 83.94], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.98, 2.95], + 'edge': ['lower_ring', 'lower_ring', 'lower_ring', + 'lower_ring'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_10': {'pos': [20.49, 21.48, 23.46, 26.41], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.98, 2.95], + 'edge': ['lower_ring', 'lower_ring', 'lower_ring', + 'lower_ring'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_11': {'pos': [49.26, 50.25, 52.23, 55.17], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.98, 2.95], + 'edge': ['lower_ring', 'lower_ring', 'lower_ring', + 'lower_ring'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_1': {'pos': [0.0, 0.99, 2.97, 5.91], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.98, 2.95], + 'edge': ['top', 'top', 'top', 'top'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_7': {'pos': [0.67, 1.66, 3.64, 6.58], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 0.99, 1.97, 2.94], + 'edge': ['right', 'right', 'right', 'right'], + 'lane': [0.0, 0.0, 0.0, 0.0]}, + 'idm_0': {'pos': [0.0, 1.0, 2.98, 5.95], + 'time': [1.0, 2.0, 3.0, 4.0], + 'vel': [0.0, 1.0, 1.99, 2.97], + 'edge': ['bottom', 'bottom', 'bottom', 'bottom'], + 'lane': [0.0, 0.0, 0.0, 0.0]} + } dir_path = os.path.dirname(os.path.realpath(__file__)) + actual_emission_data = tsd.import_data_from_emission( + os.path.join(dir_path, 'test_files/fig8_emission.csv')) + self.assertDictEqual(fig8_emission_data, actual_emission_data) + + # test get_time_space_data for figure eight networks flow_params = tsd.get_flow_params( os.path.join(dir_path, 'test_files/fig8.json')) - emission_data = tsd.import_data_from_trajectory( + emission_data, _, _, _ = tsd.import_data_from_trajectory( os.path.join(dir_path, 'test_files/fig8_emission.csv'), flow_params) - segs, _ = tsd.get_time_space_data(emission_data, flow_params) + segs, _ = tsd.get_time_space_data(emission_data, flow_params['network']) expected_segs = np.array([ - [[1., 60.], [2., 59.]], - [[2., 59.], [3., 57.02]], - [[3., 57.02], [4., 54.05]], - [[1., 23.8], [2., 22.81]], - [[2., 22.81], [3., 20.83]], - [[3., 20.83], [4., 17.89]], - [[1., 182.84166941], [2., 181.85166941]], - [[2., 181.85166941], [3., 179.87166941]], - [[3., 179.87166941], [4., 176.92166941]], - [[1., 154.07166941], [2., 153.08166941]], - [[2., 153.08166941], [3., 151.10166941]], - [[3., 151.10166941], [4., 148.16166941]], - [[1., 125.30166941], [2., 124.31166941]], - [[2., 124.31166941], [3., 122.34166941]], - [[3., 122.34166941], [4., 119.39166941]], - [[1., 96.54166941], [2., 95.54166941]], - [[2., 95.54166941], [3., 93.56166941]], - [[3., 93.56166941], [4., 90.59166941]], - [[1., -203.16166941], [2., -202.17166941]], - [[2., -202.17166941], [3., -200.02166941]], - [[3., -200.02166941], [4., -197.07166941]], - [[1., -174.40166941], [2., -173.40166941]], - [[2., -173.40166941], [3., -171.43166941]], - [[3., -171.43166941], [4., -168.48166941]], - [[1., -145.63166941], [2., -144.64166941]], - [[2., -144.64166941], [3., -142.66166941]], - [[3., -142.66166941], [4., -139.72166941]], - [[1., -116.86166941], [2., -115.87166941]], - [[2., -115.87166941], [3., -113.89166941]], - [[3., -113.89166941], [4., -110.95166941]], - [[1., -88.09166941], [2., -87.10166941]], - [[2., -87.10166941], [3., -85.13166941]], - [[3., -85.13166941], [4., -82.18166941]], - [[1., -59.33], [2., -58.34]], - [[2., -58.34], [3., -56.36]], - [[3., -56.36], [4., -53.42]], - [[1., -30.56], [2., -29.72]], - [[2., -29.72], [3., -27.97]], - [[3., -27.97], [4., -25.22]], - [[1., -1.79], [2., -0.8]], - [[2., -0.8], [3., 208.64166941]], - [[3., 208.64166941], [4., 205.69166941]]] + [[1., 263.16166941], [2., 262.16166941]], + [[2., 262.16166941], [3., 260.18166941]], + [[3., 260.18166941], [4., 257.21166941]], + [[1., 226.96166941], [2., 225.97166941]], + [[2., 225.97166941], [3., 223.99166941]], + [[3., 223.99166941], [4., 221.05166941]], + [[1., 386.00333882], [2., 385.01333882]], + [[2., 385.01333882], [3., 383.03333882]], + [[3., 383.03333882], [4., 380.08333882]], + [[1., 357.23333882], [2., 356.24333882]], + [[2., 356.24333882], [3., 354.26333882]], + [[3., 354.26333882], [4., 351.32333882]], + [[1., 328.46333882], [2., 327.47333882]], + [[2., 327.47333882], [3., 325.50333882]], + [[3., 325.50333882], [4., 322.55333882]], + [[1., 299.70333882], [2., 298.70333882]], + [[2., 298.70333882], [3., 296.72333882]], + [[3., 296.72333882], [4., 293.75333882]], + [[1., 0.], [2., 0.99]], + [[2., 0.99], [3., 3.14]], + [[3., 3.14], [4., 6.09]], + [[1., 28.76], [2., 29.76]], + [[2., 29.76], [3., 31.73]], + [[3., 31.73], [4., 34.68]], + [[1., 57.53], [2., 58.52]], + [[2., 58.52], [3., 60.5]], + [[3., 60.5], [4., 63.44]], + [[1., 86.3], [2., 87.29]], + [[2., 87.29], [3., 89.27]], + [[3., 89.27], [4., 92.21]], + [[1., 115.07], [2., 116.06]], + [[2., 116.06], [3., 118.03]], + [[3., 118.03], [4., 120.98]], + [[1., 143.83166941], [2., 144.82166941]], + [[2., 144.82166941], [3., 146.80166941]], + [[3., 146.80166941], [4., 149.74166941]], + [[1., 172.60166941], [2., 173.44166941]], + [[2., 173.44166941], [3., 175.19166941]], + [[3., 175.19166941], [4., 177.94166941]], + [[1., 201.37166941], [2., 202.36166941]], + [[2., 202.36166941], [3., 411.80333882]], + [[3., 411.80333882], [4., 408.85333882]]] ) + expected_speed = np.array([ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, + 0.99, 0.84, 0.99], + [1.99, 1.98, 1.98, 1.98, 1.98, 1.98, 1.97, 1.98, 1.98, 1.98, 1.97, + 1.97, 1.76, 1.97] + ]) - np.testing.assert_array_almost_equal(segs, expected_segs) + np.testing.assert_array_almost_equal(pos[:-1, :], expected_pos) + np.testing.assert_array_almost_equal(speed[:-1, :], expected_speed) def test_time_space_diagram_merge(self): dir_path = os.path.dirname(os.path.realpath(__file__)) + emission_data = tsd.import_data_from_emission( + os.path.join(dir_path, 'test_files/merge_emission.csv')) + flow_params = tsd.get_flow_params( os.path.join(dir_path, 'test_files/merge.json')) - emission_data = tsd.import_data_from_trajectory( + emission_data, _, _, _ = tsd.import_data_from_trajectory( os.path.join(dir_path, 'test_files/merge_emission.csv'), flow_params) - segs, _ = tsd.get_time_space_data(emission_data, flow_params) + segs, _ = tsd.get_time_space_data(emission_data, flow_params['network']) expected_segs = np.array([ - [[2.0000e-01, 7.2949e+02], [4.0000e-01, 7.2953e+02]], - [[4.0000e-01, 7.2953e+02], [6.0000e-01, 7.2961e+02]], - [[6.0000e-01, 7.2961e+02], [8.0000e-01, 7.2973e+02]], - [[8.0000e-01, 7.2973e+02], [1.0000e+00, 7.2988e+02]]] + [[2.0000e-01, 7.2463e+02], [4.0000e-01, 7.2467e+02]], + [[4.0000e-01, 7.2467e+02], [6.0000e-01, 7.2475e+02]], + [[6.0000e-01, 7.2475e+02], [8.0000e-01, 7.2487e+02]], + [[8.0000e-01, 7.2487e+02], [1.0000e+00, 7.2502e+02]]] ) - np.testing.assert_array_almost_equal(segs, expected_segs) + np.testing.assert_array_almost_equal(pos, expected_pos) + np.testing.assert_array_almost_equal(speed, expected_speed) def test_time_space_diagram_I210(self): dir_path = os.path.dirname(os.path.realpath(__file__)) + emission_data = tsd.import_data_from_emission( + os.path.join(dir_path, 'test_files/i210_emission.csv')) + module = __import__("examples.exp_configs.non_rl", fromlist=["i210_subnetwork"]) flow_params = getattr(module, "i210_subnetwork").flow_params - emission_data = tsd.import_data_from_trajectory( + emission_data, _, _, _ = tsd.import_data_from_trajectory( os.path.join(dir_path, 'test_files/i210_emission.csv'), flow_params) - segs, _ = tsd.get_time_space_data(emission_data, flow_params) + segs, _ = tsd.get_time_space_data(emission_data, flow_params['network']) expected_segs = { 1: np.array([ - [[0.8, 5.1], [1.6, 23.37]], - [[1.6, 23.37], [2.4, 42.02]], - [[2.4, 42.02], [3.2, 61.21]], - [[3.2, 61.21], [4., 18.87]], - [[4., 18.87], [4.8, 39.93]], - [[2.4, 5.1], [3.2, 22.97]], - [[3.2, 22.97], [4., 40.73]]] + [[-719.2, 3.77], [-718.4, 22.04]], + [[-718.4, 22.04], [-717.6, 40.69]], + [[-717.6, 40.69], [-716.8, 59.88]], + [[-716.8, 59.88], [-716., 17.54]], + [[-716., 17.54], [-715.2, 38.6]], + [[-717.6, 3.77], [-716.8, 21.64]], + [[-716.8, 21.64], [-716., 39.4]]] ), 2: np.array([ - [[2.4, 5.1], [3.2, 23.98]], - [[3.2, 23.98], [4., 43.18]]] + [[-717.6, 3.77], [-716.8, 22.65]], + [[-716.8, 22.65], [-716., 41.85]]] ), 3: np.array([ - [[0.8, 5.1], [1.6, 23.72]], - [[1.6, 23.72], [2.4, 43.06]], - [[2.4, 43.06], [3.2, 1.33]], - [[3.2, 1.33], [4., 21.65]], - [[4., 21.65], [4.8, 43.46]], - [[2.4, 5.1], [3.2, 23.74]], - [[3.2, 23.74], [4., 42.38]]] + [[-719.2, 3.77], [-718.4, 22.39]], + [[-718.4, 22.39], [-717.6, 41.73]], + [[-717.6, 41.73], [-716.8, 0.]], + [[-716.8, 0.], [-716., 20.32]], + [[-716., 20.32], [-715.2, 42.13]], + [[-717.6, 3.77], [-716.8, 22.41]], + [[-716.8, 22.41], [-716., 41.05]]] ), 4: np.array([ - [[2.4, 5.1], [3.2, 23.6]], - [[3.2, 23.6], [4., 42.46]]] + [[-717.6, 3.77], [-716.8, 22.27]], + [[-716.8, 22.27], [-716., 41.13]]] )} for lane, expected_seg in expected_segs.items(): @@ -206,111 +308,52 @@ def test_time_space_diagram_I210(self): def test_time_space_diagram_ring_road(self): dir_path = os.path.dirname(os.path.realpath(__file__)) + emission_data = tsd.import_data_from_emission( + os.path.join(dir_path, 'test_files/ring_230_emission.csv')) + flow_params = tsd.get_flow_params( os.path.join(dir_path, 'test_files/ring_230.json')) - emission_data = tsd.import_data_from_trajectory( + emission_data, _, _, _ = tsd.import_data_from_trajectory( os.path.join(dir_path, 'test_files/ring_230_emission.csv'), flow_params) - segs, _ = tsd.get_time_space_data(emission_data, flow_params) + segs, _ = tsd.get_time_space_data(emission_data, flow_params['network']) expected_segs = np.array([ - [[1.0000e-01, 0.0000e+00], [2.0000e-01, 1.0000e-02]], - [[2.0000e-01, 1.0000e-02], [3.0000e-01, 2.0000e-02]], - [[3.0000e-01, 2.0000e-02], [4.0000e-01, 5.0000e-02]], - [[4.0000e-01, 5.0000e-02], [5.0000e-01, 8.0000e-02]], - [[5.0000e-01, 8.0000e-02], [6.0000e-01, 1.2000e-01]], - [[1.0000e-01, 9.5500e+00], [2.0000e-01, 9.5500e+00]], - [[2.0000e-01, 9.5500e+00], [3.0000e-01, 9.5700e+00]], - [[3.0000e-01, 9.5700e+00], [4.0000e-01, 9.5900e+00]], - [[4.0000e-01, 9.5900e+00], [5.0000e-01, 9.6200e+00]], - [[5.0000e-01, 9.6200e+00], [6.0000e-01, 9.6600e+00]], - [[1.0000e-01, 9.5550e+01], [2.0000e-01, 9.5560e+01]], - [[2.0000e-01, 9.5560e+01], [3.0000e-01, 9.5580e+01]], - [[3.0000e-01, 9.5580e+01], [4.0000e-01, 9.5600e+01]], - [[4.0000e-01, 9.5600e+01], [5.0000e-01, 9.5630e+01]], - [[5.0000e-01, 9.5630e+01], [6.0000e-01, 9.5670e+01]], - [[1.0000e-01, 1.0510e+02], [2.0000e-01, 1.0511e+02]], - [[2.0000e-01, 1.0511e+02], [3.0000e-01, 1.0512e+02]], - [[3.0000e-01, 1.0512e+02], [4.0000e-01, 1.0515e+02]], - [[4.0000e-01, 1.0515e+02], [5.0000e-01, 1.0518e+02]], - [[5.0000e-01, 1.0518e+02], [6.0000e-01, 1.0522e+02]], - [[1.0000e-01, 1.1465e+02], [2.0000e-01, 1.1465e+02]], - [[2.0000e-01, 1.1465e+02], [3.0000e-01, 1.1467e+02]], - [[3.0000e-01, 1.1467e+02], [4.0000e-01, 1.1469e+02]], - [[4.0000e-01, 1.1469e+02], [5.0000e-01, 1.1472e+02]], - [[5.0000e-01, 1.1472e+02], [6.0000e-01, 1.1476e+02]], - [[1.0000e-01, 1.2429e+02], [2.0000e-01, 1.2430e+02]], - [[2.0000e-01, 1.2430e+02], [3.0000e-01, 1.2431e+02]], - [[3.0000e-01, 1.2431e+02], [4.0000e-01, 1.2434e+02]], - [[4.0000e-01, 1.2434e+02], [5.0000e-01, 1.2437e+02]], - [[5.0000e-01, 1.2437e+02], [6.0000e-01, 1.2441e+02]], - [[1.0000e-01, 1.3384e+02], [2.0000e-01, 1.3384e+02]], - [[2.0000e-01, 1.3384e+02], [3.0000e-01, 1.3386e+02]], - [[3.0000e-01, 1.3386e+02], [4.0000e-01, 1.3388e+02]], - [[4.0000e-01, 1.3388e+02], [5.0000e-01, 1.3391e+02]], - [[1.0000e-01, 1.4338e+02], [2.0000e-01, 1.4339e+02]], - [[2.0000e-01, 1.4339e+02], [3.0000e-01, 1.4341e+02]], - [[3.0000e-01, 1.4341e+02], [4.0000e-01, 1.4343e+02]], - [[4.0000e-01, 1.4343e+02], [5.0000e-01, 1.4346e+02]], - [[1.0000e-01, 1.5293e+02], [2.0000e-01, 1.5294e+02]], - [[2.0000e-01, 1.5294e+02], [3.0000e-01, 1.5295e+02]], - [[3.0000e-01, 1.5295e+02], [4.0000e-01, 1.5297e+02]], - [[4.0000e-01, 1.5297e+02], [5.0000e-01, 1.5301e+02]], - [[1.0000e-01, 1.6247e+02], [2.0000e-01, 1.6248e+02]], - [[2.0000e-01, 1.6248e+02], [3.0000e-01, 1.6250e+02]], - [[3.0000e-01, 1.6250e+02], [4.0000e-01, 1.6252e+02]], - [[4.0000e-01, 1.6252e+02], [5.0000e-01, 1.6255e+02]], - [[1.0000e-01, 1.7202e+02], [2.0000e-01, 1.7203e+02]], - [[2.0000e-01, 1.7203e+02], [3.0000e-01, 1.7204e+02]], - [[3.0000e-01, 1.7204e+02], [4.0000e-01, 1.7207e+02]], - [[4.0000e-01, 1.7207e+02], [5.0000e-01, 1.7210e+02]], - [[1.0000e-01, 1.8166e+02], [2.0000e-01, 1.8167e+02]], - [[2.0000e-01, 1.8167e+02], [3.0000e-01, 1.8169e+02]], - [[3.0000e-01, 1.8169e+02], [4.0000e-01, 1.8171e+02]], - [[4.0000e-01, 1.8171e+02], [5.0000e-01, 1.8174e+02]], - [[1.0000e-01, 1.9090e+01], [2.0000e-01, 1.9100e+01]], - [[2.0000e-01, 1.9100e+01], [3.0000e-01, 1.9110e+01]], - [[3.0000e-01, 1.9110e+01], [4.0000e-01, 1.9140e+01]], - [[4.0000e-01, 1.9140e+01], [5.0000e-01, 1.9170e+01]], - [[1.0000e-01, 1.9121e+02], [2.0000e-01, 1.9122e+02]], - [[2.0000e-01, 1.9122e+02], [3.0000e-01, 1.9123e+02]], - [[3.0000e-01, 1.9123e+02], [4.0000e-01, 1.9126e+02]], - [[4.0000e-01, 1.9126e+02], [5.0000e-01, 1.9129e+02]], - [[1.0000e-01, 2.0075e+02], [2.0000e-01, 2.0076e+02]], - [[2.0000e-01, 2.0076e+02], [3.0000e-01, 2.0078e+02]], - [[3.0000e-01, 2.0078e+02], [4.0000e-01, 2.0081e+02]], - [[4.0000e-01, 2.0081e+02], [5.0000e-01, 2.0085e+02]], - [[1.0000e-01, 2.8640e+01], [2.0000e-01, 2.8640e+01]], - [[2.0000e-01, 2.8640e+01], [3.0000e-01, 2.8660e+01]], - [[3.0000e-01, 2.8660e+01], [4.0000e-01, 2.8680e+01]], - [[4.0000e-01, 2.8680e+01], [5.0000e-01, 2.8710e+01]], - [[1.0000e-01, 3.8180e+01], [2.0000e-01, 3.8190e+01]], - [[2.0000e-01, 3.8190e+01], [3.0000e-01, 3.8210e+01]], - [[3.0000e-01, 3.8210e+01], [4.0000e-01, 3.8230e+01]], - [[4.0000e-01, 3.8230e+01], [5.0000e-01, 3.8260e+01]], - [[1.0000e-01, 4.7730e+01], [2.0000e-01, 4.7740e+01]], - [[2.0000e-01, 4.7740e+01], [3.0000e-01, 4.7750e+01]], - [[3.0000e-01, 4.7750e+01], [4.0000e-01, 4.7770e+01]], - [[4.0000e-01, 4.7770e+01], [5.0000e-01, 4.7810e+01]], - [[1.0000e-01, 5.7270e+01], [2.0000e-01, 5.7280e+01]], - [[2.0000e-01, 5.7280e+01], [3.0000e-01, 5.7300e+01]], - [[3.0000e-01, 5.7300e+01], [4.0000e-01, 5.7320e+01]], - [[4.0000e-01, 5.7320e+01], [5.0000e-01, 5.7350e+01]], - [[1.0000e-01, 6.6920e+01], [2.0000e-01, 6.6930e+01]], - [[2.0000e-01, 6.6930e+01], [3.0000e-01, 6.6940e+01]], - [[3.0000e-01, 6.6940e+01], [4.0000e-01, 6.6970e+01]], - [[4.0000e-01, 6.6970e+01], [5.0000e-01, 6.7000e+01]], - [[1.0000e-01, 7.6460e+01], [2.0000e-01, 7.6470e+01]], - [[2.0000e-01, 7.6470e+01], [3.0000e-01, 7.6490e+01]], - [[3.0000e-01, 7.6490e+01], [4.0000e-01, 7.6510e+01]], - [[4.0000e-01, 7.6510e+01], [5.0000e-01, 7.6540e+01]], - [[1.0000e-01, 8.6010e+01], [2.0000e-01, 8.6020e+01]], - [[2.0000e-01, 8.6020e+01], [3.0000e-01, 8.6030e+01]], - [[3.0000e-01, 8.6030e+01], [4.0000e-01, 8.6060e+01]], - [[4.0000e-01, 8.6060e+01], [5.0000e-01, 8.6090e+01]]] + [[-7.50000000e+01, 0.00000000e+00], [-7.49000000e+01, 7.98415842e-03]], + [[-7.49000000e+01, 7.98415842e-03], [-7.48000000e+01, 2.37963776e-02]], + [[-7.48000000e+01, 2.37963776e-02], [-7.47000000e+01, 4.72776801e-02]], + [[-7.50000000e+01, 9.54545455e+00], [-7.49000000e+01, 9.55343870e+00]], + [[-7.49000000e+01, 9.55343870e+00], [-7.48000000e+01, 9.56925092e+00]], + [[-7.48000000e+01, 9.56925092e+00], [-7.47000000e+01, 9.59273223e+00]], + [[-7.50000000e+01, 1.90909091e+01], [-7.49000000e+01, 1.90988932e+01]], + [[-7.49000000e+01, 1.90988932e+01], [-7.48000000e+01, 1.91147055e+01]], + [[-7.48000000e+01, 1.91147055e+01], [-7.47000000e+01, 1.91381868e+01]], + [[-7.50000000e+01, 2.86363636e+01], [-7.49000000e+01, 2.86443478e+01]], + [[-7.49000000e+01, 2.86443478e+01], [-7.48000000e+01, 2.86601600e+01]], + [[-7.48000000e+01, 2.86601600e+01], [-7.47000000e+01, 2.86836413e+01]], + [[-7.50000000e+01, 3.81818182e+01], [-7.49000000e+01, 3.81898023e+01]], + [[-7.49000000e+01, 3.81898023e+01], [-7.48000000e+01, 3.82056146e+01]], + [[-7.48000000e+01, 3.82056146e+01], [-7.47000000e+01, 3.82290959e+01]], + [[-7.50000000e+01, 4.77272727e+01], [-7.49000000e+01, 4.77352569e+01]], + [[-7.49000000e+01, 4.77352569e+01], [-7.48000000e+01, 4.77510691e+01]], + [[-7.48000000e+01, 4.77510691e+01], [-7.47000000e+01, 4.77745504e+01]]] ) - - np.testing.assert_array_almost_equal(segs, expected_segs) + expected_speed = np.array([ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, + 0.08, 0.08, 0.08, 0.1, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08], + [0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, + 0.16, 0.16, 0.16, 0.2, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16], + [0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, + 0.23, 0.23, 0.23, 0.29, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23], + [0.31, 0.31, 0.31, 0.31, 0.31, 0.31, 0.31, 0.31, 0.31, 0.31, 0.31, + 0.31, 0.31, 0.31, 0.39, 0.31, 0.31, 0.31, 0.31, 0.31, 0.31, 0.31], + [0.41, 0.41, 0.41, 0.41, 0.41, 0.41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0] + ]) + + np.testing.assert_array_almost_equal(pos, expected_pos) + np.testing.assert_array_almost_equal(speed, expected_speed) def test_plot_ray_results(self): dir_path = os.path.dirname(os.path.realpath(__file__))