diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index dd85c56cf..d993ae93a 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -101,7 +101,7 @@ edge_id = "119257908#1-AddedOnRampEdge" custom_callables = { "avg_merge_speed": lambda env: np.nan_to_num(np.mean( - env.k.vehicle.get_speed(env.k.vehicle.get_ids_by_edge(edge_id)))), + env.k.vehicle.get_speed(env.k.vehicle.get_ids()))), "avg_outflow": lambda env: np.nan_to_num( env.k.vehicle.get_outflow_rate(120)), # we multiply by 5 to account for the vehicle length and by 1000 to convert diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 872568cab..1779adf69 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -5,7 +5,6 @@ """ import os -from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy from ray.tune.registry import register_env from flow.controllers import RLController diff --git a/examples/train.py b/examples/train.py index 1f2cd6300..aaa9caddb 100644 --- a/examples/train.py +++ b/examples/train.py @@ -22,7 +22,6 @@ import ray from ray import tune -from ray.tune import run_experiments from ray.tune.registry import register_env try: from ray.rllib.agents.agent import get_agent_class @@ -36,9 +35,9 @@ from flow.utils.registry import make_create_env - def parse_args(args): """Parse training options user can specify in command line. + Returns ------- argparse.Namespace @@ -140,6 +139,7 @@ def setup_exps_rllib(flow_params, policies_to_train=None, ): """Return the relevant components of an RLlib experiment. + Parameters ---------- flow_params : dict diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 409aeb14f..4082eb415 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -16,6 +16,8 @@ "max_decel": 1, # whether we use an obs space that contains adjacent lane info or just the lead obs "lead_obs": True, + # whether the reward should come from local vehicles instead of global rewards + "local_reward": True } @@ -137,35 +139,47 @@ def compute_reward(self, rl_actions, **kwargs): return {} rewards = {} - for rl_id in self.k.vehicle.get_rl_ids(): - if self.env_params.evaluate: - # reward is speed of vehicle if we are in evaluation mode - reward = self.k.vehicle.get_speed(rl_id) - elif kwargs['fail']: - # reward is 0 if a collision occurred - reward = 0 - else: - # reward high system-level velocities - cost1 = average_velocity(self, fail=kwargs['fail']) - - # penalize small time headways - cost2 = 0 - t_min = 1 # smallest acceptable time headway - - lead_id = self.k.vehicle.get_leader(rl_id) - if lead_id not in ["", None] \ - and self.k.vehicle.get_speed(rl_id) > 0: - t_headway = max( - self.k.vehicle.get_headway(rl_id) / - self.k.vehicle.get_speed(rl_id), 0) - cost2 += min((t_headway - t_min) / t_min, 0) - - # weights for cost1, cost2, and cost3, respectively - eta1, eta2 = 1.00, 0.10 - - reward = max(eta1 * cost1 + eta2 * cost2, 0) - - rewards[rl_id] = reward + if self.env_params.additional_params["local_reward"]: + for rl_id in self.k.vehicle.get_rl_ids(): + rewards[rl_id] = 0 + speeds = [] + follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) + speeds.extend([speed for speed in follow_speed if speed >= 0]) + if self.k.vehicle.get_speed(rl_id) >= 0: + speeds.append(self.k.vehicle.get_speed(rl_id)) + if len(speeds) > 0: + # rescale so the q function can estimate it quickly + rewards[rl_id] = np.mean(speeds) / 500.0 + else: + for rl_id in self.k.vehicle.get_rl_ids(): + if self.env_params.evaluate: + # reward is speed of vehicle if we are in evaluation mode + reward = self.k.vehicle.get_speed(rl_id) + elif kwargs['fail']: + # reward is 0 if a collision occurred + reward = 0 + else: + # reward high system-level velocities + cost1 = average_velocity(self, fail=kwargs['fail']) + + # penalize small time headways + cost2 = 0 + t_min = 1 # smallest acceptable time headway + + lead_id = self.k.vehicle.get_leader(rl_id) + if lead_id not in ["", None] \ + and self.k.vehicle.get_speed(rl_id) > 0: + t_headway = max( + self.k.vehicle.get_headway(rl_id) / + self.k.vehicle.get_speed(rl_id), 0) + cost2 += min((t_headway - t_min) / t_min, 0) + + # weights for cost1, cost2, and cost3, respectively + eta1, eta2 = 1.00, 0.10 + + reward = max(eta1 * cost1 + eta2 * cost2, 0) + + rewards[rl_id] = reward return rewards def additional_command(self):