Skip to content

Commit

Permalink
Merge branch 'gymnasium-v5' into mujoco-v5
Browse files Browse the repository at this point in the history
  • Loading branch information
Lucas Alegre committed Aug 14, 2024
2 parents cf081b0 + dbddf3a commit 57870fe
Show file tree
Hide file tree
Showing 22 changed files with 787 additions and 621 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ obs, info = env.reset()
next_obs, vector_reward, terminated, truncated, info = env.step(your_agent.act(obs))

# Optionally, you can scalarize the reward function with the LinearReward wrapper
env = mo_gym.LinearReward(env, weight=np.array([0.8, 0.2, 0.2]))
env = mo_gym.wrappers.LinearReward(env, weight=np.array([0.8, 0.2, 0.2]))
```
For details on multi-objective MDP's (MOMDP's) and other MORL definitions, see [A practical guide to multi-objective reinforcement learning and planning](https://link.springer.com/article/10.1007/s10458-022-09552-y).

Expand Down
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ lastpage:
introduction/install
introduction/api
wrappers/wrappers
wrappers/vector_wrappers
examples/morl_baselines
```

Expand Down
20 changes: 20 additions & 0 deletions docs/wrappers/vector_wrappers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
title: "Vector Wrappers"
---

# Vector Wrappers

Similar to the normal wrappers, MO-Gymnasium provides a few wrappers that are specifically designed to work with vectorized environments. They are all available directly from the `mo_gymnasium.wrappers.vector` module.


## `MOSyncVectorEnv`

```{eval-rst}
.. autoclass:: mo_gymnasium.wrappers.vector.MOSyncVectorEnv
```

## `MORecordEpisodeStatistics`

```{eval-rst}
.. autoclass:: mo_gymnasium.wrappers.vector.MORecordEpisodeStatistics
```
16 changes: 8 additions & 8 deletions docs/wrappers/wrappers.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,36 @@ title: "Wrappers"

# Wrappers

A few wrappers inspired from Gymnasium's wrappers are available in MO-Gymnasium. They are all available directly from the `mo_gymnasium` module.
A few wrappers inspired from Gymnasium's wrappers are available in MO-Gymnasium. They are all available directly from the `mo_gymnasium.wrappers` module.


## `LinearReward`


```{eval-rst}
.. autoclass:: mo_gymnasium.LinearReward
.. autoclass:: mo_gymnasium.wrappers.LinearReward
```

## `MONormalizeReward`

```{eval-rst}
.. autoclass:: mo_gymnasium.MONormalizeReward
.. autoclass:: mo_gymnasium.wrappers.MONormalizeReward
```

## `MOClipReward`

```{eval-rst}
.. autoclass:: mo_gymnasium.MOClipReward
.. autoclass:: mo_gymnasium.wrappers.MOClipReward
```

## `MOSyncVectorEnv`
## `MORecordEpisodeStatistics`

```{eval-rst}
.. autoclass:: mo_gymnasium.MOSyncVectorEnv
.. autoclass:: mo_gymnasium.wrappers.MORecordEpisodeStatistics
```

## `MORecordEpisodeStatistics`
## `MOMaxAndSkipObservation`

```{eval-rst}
.. autoclass:: mo_gymnasium.MORecordEpisodeStatistics
.. autoclass:: mo_gymnasium.wrappers.MOMaxAndSkipObservation
```
12 changes: 3 additions & 9 deletions mo_gymnasium/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,10 @@

# Envs
import mo_gymnasium.envs
from mo_gymnasium import wrappers

# Utils
from mo_gymnasium.utils import (
LinearReward,
MOClipReward,
MONormalizeReward,
MORecordEpisodeStatistics,
MOSyncVectorEnv,
make,
)
from mo_gymnasium.utils import make


__version__ = "1.1.0"
__version__ = "1.2.0"
1 change: 0 additions & 1 deletion mo_gymnasium/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,5 @@
import mo_gymnasium.envs.minecart
import mo_gymnasium.envs.mountain_car
import mo_gymnasium.envs.mujoco
import mo_gymnasium.envs.reacher
import mo_gymnasium.envs.resource_gathering
import mo_gymnasium.envs.water_reservoir
4 changes: 2 additions & 2 deletions mo_gymnasium/envs/lunar_lander/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@


register(
id="mo-lunar-lander-v2",
id="mo-lunar-lander-v3",
entry_point="mo_gymnasium.envs.lunar_lander.lunar_lander:MOLunarLander",
max_episode_steps=1000,
)

register(
id="mo-lunar-lander-continuous-v2",
id="mo-lunar-lander-continuous-v3",
entry_point="mo_gymnasium.envs.lunar_lander.lunar_lander:MOLunarLander",
max_episode_steps=1000,
kwargs={"continuous": True},
Expand Down
8 changes: 5 additions & 3 deletions mo_gymnasium/envs/mario/mario.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from gymnasium.utils import EzPickle, seeding

# from stable_baselines3.common.atari_wrappers import MaxAndSkipEnv
from gymnasium.wrappers import GrayScaleObservation, ResizeObservation
from nes_py.nes_env import SCREEN_SHAPE_24_BIT

import mo_gymnasium as mo_gym
Expand All @@ -16,7 +15,7 @@
from mo_gymnasium.envs.mario.joypad_space import JoypadSpace


class MOSuperMarioBros(SuperMarioBrosEnv, EzPickle):
class MOSuperMarioBros(SuperMarioBrosEnv, gym.Env, EzPickle):
"""
## Description
Multi-objective version of the SuperMarioBro environment.
Expand Down Expand Up @@ -202,11 +201,14 @@ def step(self, action):


if __name__ == "__main__":
from gymnasium.wrappers import ResizeObservation
from gymnasium.wrappers.transform_observation import GrayscaleObservation

env = MOSuperMarioBros()
env = JoypadSpace(env, SIMPLE_MOVEMENT)
# env = MaxAndSkipEnv(env, 4)
env = ResizeObservation(env, (84, 84))
env = GrayScaleObservation(env)
env = GrayscaleObservation(env)
# env = FrameStack(env, 4)
env = mo_gym.LinearReward(env)

Expand Down
21 changes: 21 additions & 0 deletions mo_gymnasium/envs/mountain_car/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,24 @@
entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
max_episode_steps=200,
)

register(
id="mo-mountaincar-3d-v0",
entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
max_episode_steps=200,
kwargs={"add_speed_objective": True, "merge_move_penalty": True},
)

register(
id="mo-mountaincar-timemove-v0",
entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
max_episode_steps=200,
kwargs={"merge_move_penalty": True},
)

register(
id="mo-mountaincar-timespeed-v0",
entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
max_episode_steps=200,
kwargs={"remove_move_penalty": True, "add_speed_objective": True},
)
56 changes: 48 additions & 8 deletions mo_gymnasium/envs/mountain_car/mountain_car.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,50 @@ class MOMountainCar(MountainCarEnv, EzPickle):
See [Gymnasium's env](https://gymnasium.farama.org/environments/classic_control/mountain_car_continuous/) for more information.
## Reward space:
The reward space is a 3D vector containing the time penalty, and penalties for reversing and going forward.
By default, the reward space is a 3D vector containing the time penalty, and penalties for reversing and going forward.
- time penalty: -1.0 for each time step
- reverse penalty: -1.0 for each time step the action is 0 (reverse)
- forward penalty: -1.0 for each time step the action is 2 (forward)
Alternatively, the reward can be changed with the following options:
- add_speed_objective: Add an extra objective corresponding to the speed of the car.
- remove_move_penalty: Remove the reverse and forward objectives.
- merge_move_penalty: Merge reverse and forward penalties into a single penalty.
"""

def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
def __init__(
self,
render_mode: Optional[str] = None,
add_speed_objective: bool = False,
remove_move_penalty: bool = False,
merge_move_penalty: bool = False,
goal_velocity=0,
):
super().__init__(render_mode, goal_velocity)
EzPickle.__init__(self, render_mode, goal_velocity)
EzPickle.__init__(self, render_mode, add_speed_objective, remove_move_penalty, merge_move_penalty, goal_velocity)
self.add_speed_objective = add_speed_objective
self.remove_move_penalty = remove_move_penalty
self.merge_move_penalty = merge_move_penalty

self.reward_space = spaces.Box(low=np.array([-1, -1, -1]), high=np.array([-1, 0, 0]), shape=(3,), dtype=np.float32)
self.reward_dim = 3

if self.add_speed_objective:
self.reward_dim += 1

if self.remove_move_penalty:
self.reward_dim -= 2
elif self.merge_move_penalty:
self.reward_dim -= 1

low = np.array([-1] * self.reward_dim)
high = np.zeros(self.reward_dim)
high[0] = -1 # Time penalty is always -1
if self.add_speed_objective:
low[-1] = 0.0
high[-1] = 1.1

self.reward_space = spaces.Box(low=low, high=high, shape=(self.reward_dim,), dtype=np.float32)

def step(self, action: int):
assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid"

Expand All @@ -39,11 +70,20 @@ def step(self, action: int):
velocity = 0

terminated = bool(position >= self.goal_position and velocity >= self.goal_velocity)
# reward = -1.0
reward = np.zeros(3, dtype=np.float32)

reward = np.zeros(self.reward_dim, dtype=np.float32)

reward[0] = 0.0 if terminated else -1.0 # time penalty
reward[1] = 0.0 if action != 0 else -1.0 # reverse penalty
reward[2] = 0.0 if action != 2 else -1.0 # forward penalty

if not self.remove_move_penalty:
if self.merge_move_penalty:
reward[1] = 0.0 if action == 1 else -1.0
else:
reward[1] = 0.0 if action != 0 else -1.0 # reverse penalty
reward[2] = 0.0 if action != 2 else -1.0 # forward penalty

if self.add_speed_objective:
reward[-1] = 15 * abs(velocity)

self.state = (position, velocity)
if self.render_mode == "human":
Expand Down
2 changes: 1 addition & 1 deletion mo_gymnasium/envs/mujoco/reacher_v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
class MOReacherEnv(ReacherEnv):
"""
## Description
Mujoco version of `mo-reacher-v0`, based on [`Reacher-v4` environment](https://gymnasium.farama.org/environments/mujoco/reacher/).
Multi-objective version of the [`Reacher-v4` environment](https://gymnasium.farama.org/environments/mujoco/reacher/).
## Observation Space
The observation is 6-dimensional and contains:
Expand Down
9 changes: 0 additions & 9 deletions mo_gymnasium/envs/reacher/__init__.py

This file was deleted.

Loading

0 comments on commit 57870fe

Please sign in to comment.