Skip to content

Commit

Permalink
Hopper and HalfCheetah v5
Browse files Browse the repository at this point in the history
  • Loading branch information
LucasAlegre committed Feb 18, 2024
1 parent 9603038 commit 531aa68
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 4 deletions.
27 changes: 23 additions & 4 deletions mo_gymnasium/envs/mujoco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,44 @@

register(
id="mo-halfcheetah-v4",
entry_point="mo_gymnasium.envs.mujoco.half_cheetah:MOHalfCheehtahEnv",
entry_point="mo_gymnasium.envs.mujoco.half_cheetah_v4:MOHalfCheehtahEnv",
max_episode_steps=1000,
)

register(
id="mo-halfcheetah-v5",
entry_point="mo_gymnasium.envs.mujoco.half_cheetah_v5:MOHalfCheehtahEnv",
max_episode_steps=1000,
)

register(
id="mo-hopper-v4",
entry_point="mo_gymnasium.envs.mujoco.hopper:MOHopperEnv",
entry_point="mo_gymnasium.envs.mujoco.hopper_v4:MOHopperEnv",
max_episode_steps=1000,
)

register(
id="mo-hopper-v5",
entry_point="mo_gymnasium.envs.mujoco.hopper_v5:MOHopperEnv",
max_episode_steps=1000,
)

register(
id="mo-hopper-2d-v4",
entry_point="mo_gymnasium.envs.mujoco.hopper:MOHopperEnv",
entry_point="mo_gymnasium.envs.mujoco.hopper_v4:MOHopperEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-hopper-2d-v5",
entry_point="mo_gymnasium.envs.mujoco.hopper_v5:MOHopperEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-reacher-v4",
entry_point="mo_gymnasium.envs.mujoco.reacher:MOReacherEnv",
entry_point="mo_gymnasium.envs.mujoco.reacher_v4:MOReacherEnv",
max_episode_steps=50,
)
File renamed without changes.
29 changes: 29 additions & 0 deletions mo_gymnasium/envs/mujoco/half_cheetah_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import numpy as np
from gymnasium.envs.mujoco.half_cheetah_v5 import HalfCheetahEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOHalfCheehtahEnv(HalfCheetahEnv, EzPickle):
"""
## Description
Multi-objective version of the HalfCheetahEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/half_cheetah/) for more information.
## Reward Space
The reward is 2-dimensional:
- 0: Reward for running forward
- 1: Control cost of the action
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, **kwargs)
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
self.reward_dim = 2

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
vec_reward = np.array([info["reward_forward"], info["reward_ctrl"]], dtype=np.float32)
return observation, vec_reward, terminated, truncated, info
File renamed without changes.
42 changes: 42 additions & 0 deletions mo_gymnasium/envs/mujoco/hopper_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import numpy as np
from gymnasium.envs.mujoco.hopper_v5 import HopperEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOHopperEnv(HopperEnv, EzPickle):
"""
## Description
Multi-objective version of the HopperEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/hopper/) for more information.
## Reward Space
The reward is 3-dimensional:
- 0: Reward for going forward on the x-axis
- 1: Reward for jumping high on the z-axis
- 2: Control cost of the action
If the cost_objective flag is set to False, the reward is 2-dimensional, and the cost is added to other objectives.
"""

def __init__(self, cost_objective=True, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, cost_objective, **kwargs)
self.cost_objetive = cost_objective
self.reward_dim = 3 if cost_objective else 2
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
x_velocity = info["x_velocity"]
height = 10 * info["z_distance_from_origin"]
energy_cost = np.sum(np.square(action))
if self.cost_objetive:
vec_reward = np.array([x_velocity, height, -energy_cost], dtype=np.float32)
else:
vec_reward = np.array([x_velocity, height], dtype=np.float32)
vec_reward -= self._ctrl_cost_weight * energy_cost

vec_reward += info["reward_survive"]

return observation, vec_reward, terminated, truncated, info
File renamed without changes.

0 comments on commit 531aa68

Please sign in to comment.