Skip to content

Commit

Permalink
Add Mujoco v5 environments (#85)
Browse files Browse the repository at this point in the history
Co-authored-by: Lucas Alegre <[email protected]>
Co-authored-by: Mark Towers <[email protected]>
Co-authored-by: Florian Felten <[email protected]>
  • Loading branch information
3 people authored Oct 28, 2024
1 parent 5099244 commit 5b248ed
Show file tree
Hide file tree
Showing 15 changed files with 456 additions and 15 deletions.
75 changes: 66 additions & 9 deletions mo_gymnasium/envs/mujoco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,56 +3,113 @@

register(
id="mo-halfcheetah-v4",
entry_point="mo_gymnasium.envs.mujoco.half_cheetah:MOHalfCheehtahEnv",
entry_point="mo_gymnasium.envs.mujoco.half_cheetah_v4:MOHalfCheehtahEnv",
max_episode_steps=1000,
)

register(
id="mo-halfcheetah-v5",
entry_point="mo_gymnasium.envs.mujoco.half_cheetah_v5:MOHalfCheehtahEnv",
max_episode_steps=1000,
)

register(
id="mo-hopper-v4",
entry_point="mo_gymnasium.envs.mujoco.hopper:MOHopperEnv",
entry_point="mo_gymnasium.envs.mujoco.hopper_v4:MOHopperEnv",
max_episode_steps=1000,
)

register(
id="mo-hopper-v5",
entry_point="mo_gymnasium.envs.mujoco.hopper_v5:MOHopperEnv",
max_episode_steps=1000,
)

register(
id="mo-hopper-2d-v4",
entry_point="mo_gymnasium.envs.mujoco.hopper:MOHopperEnv",
entry_point="mo_gymnasium.envs.mujoco.hopper_v4:MOHopperEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-hopper-2obj-v5",
entry_point="mo_gymnasium.envs.mujoco.hopper_v5:MOHopperEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-walker2d-v4",
entry_point="mo_gymnasium.envs.mujoco.walker2d:MOWalker2dEnv",
entry_point="mo_gymnasium.envs.mujoco.walker2d_v4:MOWalker2dEnv",
max_episode_steps=1000,
)

register(
id="mo-walker2d-v5",
entry_point="mo_gymnasium.envs.mujoco.walker2d_v5:MOWalker2dEnv",
max_episode_steps=1000,
)

register(
id="mo-ant-v4",
entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
entry_point="mo_gymnasium.envs.mujoco.ant_v4:MOAntEnv",
max_episode_steps=1000,
)

register(
id="mo-ant-2d-v4",
entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
entry_point="mo_gymnasium.envs.mujoco.ant_v4:MOAntEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)


register(
id="mo-ant-v5",
entry_point="mo_gymnasium.envs.mujoco.ant_v5:MOAntEnv",
max_episode_steps=1000,
)

register(
id="mo-ant-2obj-v5",
entry_point="mo_gymnasium.envs.mujoco.ant_v5:MOAntEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-swimmer-v4",
entry_point="mo_gymnasium.envs.mujoco.swimmer:MOSwimmerEnv",
entry_point="mo_gymnasium.envs.mujoco.swimmer_v4:MOSwimmerEnv",
max_episode_steps=1000,
)

register(
id="mo-swimmer-v5",
entry_point="mo_gymnasium.envs.mujoco.swimmer_v5:MOSwimmerEnv",
max_episode_steps=1000,
)

register(
id="mo-humanoid-v4",
entry_point="mo_gymnasium.envs.mujoco.humanoid:MOHumanoidEnv",
entry_point="mo_gymnasium.envs.mujoco.humanoid_v4:MOHumanoidEnv",
max_episode_steps=1000,
)

register(
id="mo-humanoid-v5",
entry_point="mo_gymnasium.envs.mujoco.humanoid_v5:MOHumanoidEnv",
max_episode_steps=1000,
)

register(
id="mo-reacher-v4",
entry_point="mo_gymnasium.envs.mujoco.reacher:MOReacherEnv",
entry_point="mo_gymnasium.envs.mujoco.reacher_v4:MOReacherEnv",
max_episode_steps=50,
)

register(
id="mo-reacher-v5",
entry_point="mo_gymnasium.envs.mujoco.reacher_v5:MOReacherEnv",
max_episode_steps=50,
)
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class MOAntEnv(AntEnv, EzPickle):
def __init__(self, cost_objective=True, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, cost_objective, **kwargs)
self.cost_objetive = cost_objective
self._cost_objective = cost_objective
self.reward_dim = 3 if cost_objective else 2
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))

Expand All @@ -39,7 +39,7 @@ def step(self, action):
cost = info["reward_ctrl"]
healthy_reward = info["reward_survive"]

if self.cost_objetive:
if self._cost_objective:
cost /= self._ctrl_cost_weight # Ignore the weight in the original AntEnv
vec_reward = np.array([x_velocity, y_velocity, cost], dtype=np.float32)
else:
Expand Down
60 changes: 60 additions & 0 deletions mo_gymnasium/envs/mujoco/ant_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import numpy as np
from gymnasium.envs.mujoco.ant_v5 import AntEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOAntEnv(AntEnv, EzPickle):
"""
## Description
Multi-objective version of the AntEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/ant/) for more information.
The original Gymnasium's 'Ant-v5' is recovered by the following linear scalarization:
env = mo_gym.make('mo-ant-v4', cost_objective=False)
LinearReward(env, weight=np.array([1.0, 0.0]))
## Reward Space
The reward is 2- or 3-dimensional:
- 0: x-velocity
- 1: y-velocity
- 2: Control cost of the action
If the cost_objective flag is set to False, the reward is 2-dimensional, and the cost is added to other objectives.
A healthy reward and a cost for contact forces is added to all objectives.
A 2-objective version (without the cost objective as a separate objective) can be instantiated via:
env = mo_gym.make('mo-ant-2obj-v5')
## Version History
- v5: Now includes contact forces in the reward and observation.
The 2-objective version has now id 'mo-ant-2obj-v5', instead of 'mo-ant-2d-v4'.
See https://gymnasium.farama.org/environments/mujoco/ant/#version-history
"""

def __init__(self, cost_objective=True, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, cost_objective, **kwargs)
self._cost_objective = cost_objective
self.reward_dim = 3 if cost_objective else 2
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
x_velocity = info["x_velocity"]
y_velocity = info["y_velocity"]
cost = info["reward_ctrl"]
healthy_reward = info["reward_survive"]

if self._cost_objective:
cost /= self._ctrl_cost_weight # Ignore the weight in the original AntEnv
vec_reward = np.array([x_velocity, y_velocity, cost], dtype=np.float32)
else:
vec_reward = np.array([x_velocity, y_velocity], dtype=np.float32)
vec_reward += cost

vec_reward += healthy_reward
vec_reward += info["reward_contact"] # Do not treat contact forces as a separate objective

return observation, vec_reward, terminated, truncated, info
File renamed without changes.
40 changes: 40 additions & 0 deletions mo_gymnasium/envs/mujoco/half_cheetah_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import numpy as np
from gymnasium.envs.mujoco.half_cheetah_v5 import HalfCheetahEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOHalfCheehtahEnv(HalfCheetahEnv, EzPickle):
"""
## Description
Multi-objective version of the HalfCheetahEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/half_cheetah/) for more information.
The original Gymnasium's 'HalfCheetah-v5' is recovered by the following linear scalarization:
env = mo_gym.make('mo-halfcheetah-v5')
LinearReward(env, weight=np.array([1.0, 0.1]))
## Reward Space
The reward is 2-dimensional:
- 0: Reward for running forward
- 1: Control cost of the action
## Version History
- v5: The scales of the control cost has changed from v4.
See https://gymnasium.farama.org/environments/mujoco/half_cheetah/#version-history for other changes.
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, **kwargs)
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
self.reward_dim = 2

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
x_velocity = info["x_velocity"]
neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight # Revert the scale applied in the original environment
vec_reward = np.array([x_velocity, neg_energy_cost], dtype=np.float32)
return observation, vec_reward, terminated, truncated, info
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class MOHopperEnv(HopperEnv, EzPickle):
def __init__(self, cost_objective=True, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, cost_objective, **kwargs)
self.cost_objetive = cost_objective
self._cost_objective = cost_objective
self.reward_dim = 3 if cost_objective else 2
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))

Expand All @@ -53,7 +53,7 @@ def step(self, action):
height = 10 * (z - self.init_qpos[1])
energy_cost = np.sum(np.square(action))

if self.cost_objetive:
if self._cost_objective:
vec_reward = np.array([x_velocity, height, -energy_cost], dtype=np.float32)
else:
vec_reward = np.array([x_velocity, height], dtype=np.float32)
Expand Down
55 changes: 55 additions & 0 deletions mo_gymnasium/envs/mujoco/hopper_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import numpy as np
from gymnasium.envs.mujoco.hopper_v5 import HopperEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOHopperEnv(HopperEnv, EzPickle):
"""
## Description
Multi-objective version of the HopperEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/hopper/) for more information.
The original Gymnasium's 'Hopper-v5' is recovered by the following linear scalarization:
env = mo_gym.make('mo-hopper-v5')
LinearReward(env, weight=np.array([1.0, 0.0, 1e-3]))
## Reward Space
The reward is 3-dimensional:
- 0: Reward for going forward on the x-axis
- 1: Reward for jumping high on the z-axis
- 2: Control cost of the action
If the cost_objective flag is set to False, the reward is 2-dimensional, and the cost is added to other objectives.
A 2-objective version (without the cost objective as a separate objective) can be instantiated via:
env = mo_gym.make('mo-hopper-2obj-v5')
## Version History
- v5: The 2-objective version has now id 'mo-hopper-2obj-v5', instead of 'mo-hopper-2d-v4'.
See https://gymnasium.farama.org/environments/mujoco/hopper/#version-history
"""

def __init__(self, cost_objective=True, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, cost_objective, **kwargs)
self._cost_objective = cost_objective
self.reward_dim = 3 if cost_objective else 2
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
x_velocity = info["x_velocity"]
height = 10 * info["z_distance_from_origin"]
neg_energy_cost = info["reward_ctrl"]
if self._cost_objective:
neg_energy_cost /= self._ctrl_cost_weight # Revert the scale applied in the original environment
vec_reward = np.array([x_velocity, height, neg_energy_cost], dtype=np.float32)
else:
vec_reward = np.array([x_velocity, height], dtype=np.float32)
vec_reward += neg_energy_cost

vec_reward += info["reward_survive"]

return observation, vec_reward, terminated, truncated, info
File renamed without changes.
44 changes: 44 additions & 0 deletions mo_gymnasium/envs/mujoco/humanoid_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import numpy as np
from gymnasium.envs.mujoco.humanoid_v5 import HumanoidEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOHumanoidEnv(HumanoidEnv, EzPickle):
"""
## Description
Multi-objective version of the HumanoidEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/humanoid/) for more information.
The original Gymnasium's 'Humanoid-v5' is recovered by the following linear scalarization:
env = mo_gym.make('mo-humanoid-v5')
LinearReward(env, weight=np.array([1.25, 0.1]))
## Reward Space
The reward is 2-dimensional:
- 0: Reward for running forward (x-velocity)
- 1: Control cost of the action
## Version History:
- v5: Now includes contact forces. See: https://gymnasium.farama.org/environments/mujoco/humanoid/#version-history
The scales of the control cost has changed from v4.
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, **kwargs)
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
self.reward_dim = 2

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
velocity = info["x_velocity"]
neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight # Revert the scale applied in the original environment
vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32)

vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls
vec_reward += info["reward_contact"] # Do not treat contact forces as a separate objective

return observation, vec_reward, terminated, truncated, info
File renamed without changes.
Loading

0 comments on commit 5b248ed

Please sign in to comment.