diff --git a/mo_gymnasium/envs/highway/__init__.py b/mo_gymnasium/envs/highway/__init__.py index 00f66d3e..23e8cc2b 100644 --- a/mo_gymnasium/envs/highway/__init__.py +++ b/mo_gymnasium/envs/highway/__init__.py @@ -4,3 +4,12 @@ register(id="mo-highway-v0", entry_point="mo_gymnasium.envs.highway.highway:MOHighwayEnv", nondeterministic=True) register(id="mo-highway-fast-v0", entry_point="mo_gymnasium.envs.highway.highway:MOHighwayEnvFast", nondeterministic=True) + +register(id="mo-intersection-v0", entry_point="mo_gymnasium.envs.highway.intersection:MOIntersectionEnv", nondeterministic=True) + +register(id="mo-merge-v0", entry_point="mo_gymnasium.envs.highway.merge:MOMergeEnv", nondeterministic=True) + +register(id="mo-racetrack-v0", entry_point="mo_gymnasium.envs.highway.racetrack:MORacetrackEnv", nondeterministic=True) + +register(id="mo-roundabout-v0", entry_point="mo_gymnasium.envs.highway.roundabout:MORoundaboutEnv", nondeterministic=True) + diff --git a/mo_gymnasium/envs/highway/intersection.py b/mo_gymnasium/envs/highway/intersection.py new file mode 100644 index 00000000..9076eddb --- /dev/null +++ b/mo_gymnasium/envs/highway/intersection.py @@ -0,0 +1,44 @@ +import numpy as np +from gymnasium.spaces import Box +from gymnasium.utils import EzPickle +from highway_env.envs import IntersectionEnv + + +class MOIntersectionEnv(IntersectionEnv, EzPickle): + """ + ## Description + Multi-objective version of the IntersectionEnv environment. + + See [highway-env](https://github.com/eleurent/highway-env) for more information. + + ## Reward Space + The reward is 4-dimensional: + - 0: high speed reward + - 1: arrived reward + - 2: collision reward + - 3: on road reward + """ + + def __init__(self, *args, **kwargs): + EzPickle.__init__(self, *args, **kwargs) + + super().__init__(*args, **kwargs) + self.reward_space = Box( + low=np.array([0.0, 0.0, -1.0, 0.0]), high=np.array([1.0, 1.0, 0.0, 1.0]), shape=(4,), dtype=np.float64 + ) + self.reward_dim = 4 + + def step(self, action): + obs, reward, terminated, truncated, info = super().step(action) + rewards = info["rewards"] + vec_reward = np.array( + [ + rewards["high_speed_reward"], + rewards["arrived_reward"], + -rewards["collision_reward"], + rewards["on_road_reward"], + ], + dtype=np.float64, + ) + info["original_reward"] = reward + return obs, vec_reward, terminated, truncated, info diff --git a/mo_gymnasium/envs/highway/merge.py b/mo_gymnasium/envs/highway/merge.py new file mode 100644 index 00000000..45e53f1f --- /dev/null +++ b/mo_gymnasium/envs/highway/merge.py @@ -0,0 +1,46 @@ +import numpy as np +from gymnasium.spaces import Box +from gymnasium.utils import EzPickle +from highway_env.envs import MergeEnv + + +class MOMergeEnv(MergeEnv, EzPickle): + """ + ## Description + Multi-objective version of the MergeEnv environment. + + See [highway-env](https://github.com/eleurent/highway-env) for more information. + + ## Reward Space + The reward is 5-dimensional: + - 0: high speed reward + - 1: right lane reward + - 2: collision reward + - 3: lane change reward + - 4: merging speed reward + """ + + def __init__(self, *args, **kwargs): + EzPickle.__init__(self, *args, **kwargs) + + super().__init__(*args, **kwargs) + self.reward_space = Box( + low=np.array([-1.0, 0.0, -1.0, 0.0, 0.0]), high=np.array([1.0, 1.0, 0.0, 1.0, 1.0]), shape=(5,), dtype=np.float32 + ) + self.reward_dim = 5 + + def step(self, action): + obs, reward, terminated, truncated, info = super().step(action) + rewards = info["rewards"] + vec_reward = np.array( + [ + np.clip(rewards["high_speed_reward"], -1.0, 1.0), + rewards["right_lane_reward"], + -rewards["collision_reward"], + rewards["lane_change_reward"], + np.clip(rewards["merging_speed_reward"], 0.0, 1.0), + ], + dtype=np.float32, + ) + info["original_reward"] = reward + return obs, vec_reward, terminated, truncated, info diff --git a/mo_gymnasium/envs/highway/racetrack.py b/mo_gymnasium/envs/highway/racetrack.py new file mode 100644 index 00000000..110c24b0 --- /dev/null +++ b/mo_gymnasium/envs/highway/racetrack.py @@ -0,0 +1,44 @@ +import numpy as np +from gymnasium.spaces import Box +from gymnasium.utils import EzPickle +from highway_env.envs import RacetrackEnv + + +class MORacetrackEnv(RacetrackEnv, EzPickle): + """ + ## Description + Multi-objective version of the RacetrackEnv environment. + + See [highway-env](https://github.com/eleurent/highway-env) for more information. + + ## Reward Space + The reward is 4-dimensional: + - 0: lane centering reward + - 1: action reward + - 2: collision reward + - 3: on road reward + """ + + def __init__(self, *args, **kwargs): + EzPickle.__init__(self, *args, **kwargs) + + super().__init__(*args, **kwargs) + self.reward_space = Box( + low=np.array([0.0, 0.0, -1.0, 0.0]), high=np.array([1.0, 1.0, 0.0, 1.0]), shape=(4,), dtype=np.float32 + ) + self.reward_dim = 4 + + def step(self, action): + obs, reward, terminated, truncated, info = super().step(action) + rewards = info["rewards"] + vec_reward = np.array( + [ + rewards["lane_centering_reward"], + rewards["action_reward"], + -rewards["collision_reward"], + rewards["on_road_reward"], + ], + dtype=np.float32, + ) + info["original_reward"] = reward + return obs, vec_reward, terminated, truncated, info diff --git a/mo_gymnasium/envs/highway/roundabout.py b/mo_gymnasium/envs/highway/roundabout.py new file mode 100644 index 00000000..397fef45 --- /dev/null +++ b/mo_gymnasium/envs/highway/roundabout.py @@ -0,0 +1,44 @@ +import numpy as np +from gymnasium.spaces import Box +from gymnasium.utils import EzPickle +from highway_env.envs import RoundaboutEnv + + +class MORoundaboutEnv(RoundaboutEnv, EzPickle): + """ + ## Description + Multi-objective version of the RoundaboutEnv environment. + + See [highway-env](https://github.com/eleurent/highway-env) for more information. + + ## Reward Space + The reward is 4-dimensional: + - 0: high speed reward + - 1: on road reward + - 2: collision reward + - 3: lane change reward + """ + + def __init__(self, *args, **kwargs): + EzPickle.__init__(self, *args, **kwargs) + + super().__init__(*args, **kwargs) + self.reward_space = Box( + low=np.array([0.0, 0.0, -1.0, 0.0]), high=np.array([1.0, 1.0, 0.0, 1.0]), shape=(4,), dtype=np.float32 + ) + self.reward_dim = 4 + + def step(self, action): + obs, reward, terminated, truncated, info = super().step(action) + rewards = info["rewards"] + vec_reward = np.array( + [ + rewards["high_speed_reward"], + rewards["on_road_reward"], + -rewards["collision_reward"], + rewards["lane_change_reward"], + ], + dtype=np.float32, + ) + info["original_reward"] = reward + return obs, vec_reward, terminated, truncated, info