diff --git a/CITATION.bib b/CITATION.bib
index acdab6f3..85bae8e6 100644
--- a/CITATION.bib
+++ b/CITATION.bib
@@ -1,6 +1,6 @@
-@inproceedings{Alegre+2022bnaic,
-  author = {Lucas N. Alegre and Florian	Felten and El-Ghazali Talbi and Gr{\'e}goire Danoy and Ann Now{\'e} and Ana L. C. Bazzan and Bruno C. da Silva},
-  title = {{MO-Gym}: A Library of Multi-Objective Reinforcement Learning Environments},
-  booktitle = {Proceedings of the 34th Benelux Conference on Artificial Intelligence BNAIC/Benelearn 2022},
-  year = {2022}
+@inproceedings{felten_toolkit_2023,
+	author = {Felten, Florian and Alegre, Lucas N. and Now{\'e}, Ann and Bazzan, Ana L. C. and Talbi, El Ghazali and Danoy, Gr{\'e}goire and Silva, Bruno C. {\relax da}},
+	title = {A Toolkit for Reliable Benchmarking and Research in Multi-Objective Reinforcement Learning},
+	booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems ({NeurIPS} 2023)},
+	year = {2023}
 }
diff --git a/README.md b/README.md
index aaf999e4..fb5f7885 100644
--- a/README.md
+++ b/README.md
@@ -82,11 +82,11 @@ Maintenance for this project is also contributed by the broader Farama team: [fa
 If you use this repository in your research, please cite:
 
 ```bibtex
-@inproceedings{Alegre+2022bnaic,
-  author = {Lucas N. Alegre and Florian	Felten and El-Ghazali Talbi and Gr{\'e}goire Danoy and Ann Now{\'e} and Ana L. C. Bazzan and Bruno C. da Silva},
-  title = {{MO-Gym}: A Library of Multi-Objective Reinforcement Learning Environments},
-  booktitle = {Proceedings of the 34th Benelux Conference on Artificial Intelligence BNAIC/Benelearn 2022},
-  year = {2022}
+@inproceedings{felten_toolkit_2023,
+	author = {Felten, Florian and Alegre, Lucas N. and Now{\'e}, Ann and Bazzan, Ana L. C. and Talbi, El Ghazali and Danoy, Gr{\'e}goire and Silva, Bruno C. {\relax da}},
+	title = {A Toolkit for Reliable Benchmarking and Research in Multi-Objective Reinforcement Learning},
+	booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems ({NeurIPS} 2023)},
+	year = {2023}
 }
 ```
 
diff --git a/docs/_static/videos/minecart-rgb.gif b/docs/_static/videos/minecart-rgb.gif
new file mode 100644
index 00000000..39ad4172
Binary files /dev/null and b/docs/_static/videos/minecart-rgb.gif differ
diff --git a/docs/_static/videos/mo-ant.gif b/docs/_static/videos/mo-ant.gif
new file mode 100644
index 00000000..9397b4ff
Binary files /dev/null and b/docs/_static/videos/mo-ant.gif differ
diff --git a/docs/_static/videos/mo-humanoid.gif b/docs/_static/videos/mo-humanoid.gif
new file mode 100644
index 00000000..625a40f8
Binary files /dev/null and b/docs/_static/videos/mo-humanoid.gif differ
diff --git a/docs/_static/videos/mo-lunar-lander-continuous.gif b/docs/_static/videos/mo-lunar-lander-continuous.gif
new file mode 100644
index 00000000..2051d754
Binary files /dev/null and b/docs/_static/videos/mo-lunar-lander-continuous.gif differ
diff --git a/docs/_static/videos/mo-swimmer.gif b/docs/_static/videos/mo-swimmer.gif
new file mode 100644
index 00000000..f1dffd63
Binary files /dev/null and b/docs/_static/videos/mo-swimmer.gif differ
diff --git a/docs/_static/videos/mo-walker2d.gif b/docs/_static/videos/mo-walker2d.gif
new file mode 100644
index 00000000..6a2a2e1e
Binary files /dev/null and b/docs/_static/videos/mo-walker2d.gif differ
diff --git a/docs/citing/citing.md b/docs/citing/citing.md
index b90bd5d1..64b21646 100644
--- a/docs/citing/citing.md
+++ b/docs/citing/citing.md
@@ -7,6 +7,17 @@ title: "Citing"
 :end-before: <!-- end citation -->
 ```
 
+MO-Gymnasium (formerly MO-Gym) appeared first in the following workshop publication:
+
+```bibtex
+@inproceedings{Alegre+2022bnaic,
+  author = {Lucas N. Alegre and Florian	Felten and El-Ghazali Talbi and Gr{\'e}goire Danoy and Ann Now{\'e} and Ana L. C. Bazzan and Bruno C. {\relax da} Silva},
+  title = {{MO-Gym}: A Library of Multi-Objective Reinforcement Learning Environments},
+  booktitle = {Proceedings of the 34th Benelux Conference on Artificial Intelligence BNAIC/Benelearn 2022},
+  year = {2022}
+}
+```
+
 ```{toctree}
 :hidden:
 :glob:
diff --git a/docs/environments/all-environments.md b/docs/environments/all-environments.md
deleted file mode 100644
index 0975216a..00000000
--- a/docs/environments/all-environments.md
+++ /dev/null
@@ -1,39 +0,0 @@
----
-title: "Environments"
----
-
-# Available environments
-
-
-MO-Gymnasium includes environments taken from the MORL literature, as well as multi-objective version of classical environments, such as Mujoco.
-
-| Env                                                                                                                                                                                                                                                                | Obs/Action spaces                   | Objectives                                                    | Description                                                                                                                                                                                                                                                     |
-|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| [`deep-sea-treasure-v0`](https://mo-gymnasium.farama.org/environments/deep-sea-treasure/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/deep-sea-treasure.gif" width="200px">                            | Discrete / Discrete                 | `[treasure, time_penalty]`                                    | Agent is a submarine that must collect a treasure while taking into account a time penalty. Treasures values taken from [Yang et al. 2019](https://arxiv.org/pdf/1908.08342.pdf).                                                                               |
-| [`deep-sea-treasure-concave-v0`](https://mo-gymnasium.farama.org/environments/deep-sea-treasure-concave/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/deep-sea-treasure-concave.gif" width="200px">    | Discrete / Discrete                 | `[treasure, time_penalty]`                                    | Agent is a submarine that must collect a treasure while taking into account a time penalty. Treasures values taken from [Vamplew et al. 2010](https://link.springer.com/article/10.1007/s10994-010-5232-5).                                                     |
-| [`deep-sea-treasure-mirrored-v0`](https://mo-gymnasium.farama.org/environments/deep-sea-treasure-mirrored/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/deep-sea-treasure-mirrored.gif" width="200px"> | Discrete / Discrete                 | `[treasure, time_penalty]`                                    | Harder version of the concave DST [Felten et al. 2022](https://www.scitepress.org/Papers/2022/109891/109891.pdf).                                                                                                                                            |
-| [`resource-gathering-v0`](https://mo-gymnasium.farama.org/environments/resource-gathering/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/resource-gathering.gif" width="200px">                         | Discrete / Discrete                 | `[enemy, gold, gem]`                                          | Agent must collect gold or gem. Enemies have a 10% chance of killing the agent. From [Barret & Narayanan 2008](https://dl.acm.org/doi/10.1145/1390156.1390162).                                                                                                 |
-| [`fishwood-v0`](https://mo-gymnasium.farama.org/environments/fishwood/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/screenshots/fishwood.png" width="200px">                                                  | Discrete / Discrete                 | `[fish_amount, wood_amount]`                                  | ESR environment, the agent must collect fish and wood to light a fire and eat. From [Roijers et al. 2018](https://www.researchgate.net/publication/328718263_Multi-objective_Reinforcement_Learning_for_the_Expected_Utility_of_the_Return).                    |
-| [`breakable-bottles-v0`](https://mo-gymnasium.farama.org/environments/breakable-bottles/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/breakable-bottles.gif" width="200px">                            | Discrete (Dictionary) / Discrete    | `[time_penalty, bottles_delivered, potential]`                | Gridworld with 5 cells. The agents must collect bottles from the source location and deliver to the destination. From [Vamplew et al. 2021](https://www.sciencedirect.com/science/article/pii/S0952197621000336).                                               |
-| [`fruit-tree-v0`](https://mo-gymnasium.farama.org/environments/fruit-tree/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/screenshots/fruit-tree.png" width="200px">                                            | Discrete / Discrete                 | `[nutri1, ..., nutri6]`                                       | Full binary tree of depth d=5,6 or 7. Every leaf contains a fruit with a value for the nutrients Protein, Carbs, Fats, Vitamins, Minerals and Water. From [Yang et al. 2019](https://arxiv.org/pdf/1908.08342.pdf).                                             |
-| [`water-reservoir-v0`](https://mo-gymnasium.farama.org/environments/water-reservoir/)   <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/water-reservoir.gif" width="200px">                                | Continuous / Continuous             | `[cost_flooding, deficit_water]`                              | A Water reservoir environment. The agent executes a continuous action, corresponding to the amount of water released by the dam. From [Pianosi et al. 2013](https://iwaponline.com/jh/article/15/2/258/3425/Tree-based-fitted-Q-iteration-for-multi-objective). |
-| [`four-room-v0`](https://mo-gymnasium.farama.org/environments/four-room/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/four-room.gif" width="200px">                                                    | Discrete / Discrete                 | `[item1, item2, item3]`                                       | Agent must collect three different types of items in the map and reach the goal. From [Alegre et al. 2022](https://proceedings.mlr.press/v162/alegre22a.html).                                                                                                  |
-| [`mo-mountaincar-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px">                                     | Continuous / Discrete               | `[time_penalty, reverse_penalty, forward_penalty]`            | Classic Mountain Car env, but with extra penalties for the forward and reverse actions. From [Vamplew et al. 2011](https://www.researchgate.net/publication/220343783_Empirical_evaluation_methods_for_multiobjective_reinforcement_learning_algorithms).       |
-| [`mo-mountaincarcontinuous-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincarcontinuous/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincarcontinuous.gif" width="200px">       | Continuous / Continuous             | `[time_penalty, fuel_consumption_penalty]`                    | Continuous Mountain Car env, but with penalties for fuel consumption.                                                                                                                                                                                           |
-| [`mo-lunar-lander-v2`](https://mo-gymnasium.farama.org/environments/mo-lunar-lander/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-lunar-lander.gif" width="200px">                                  | Continuous / Discrete or Continuous | `[landed, shaped_reward, main_engine_fuel, side_engine_fuel]` | MO version of the `LunarLander-v2` [environment](https://gymnasium.farama.org/environments/box2d/lunar_lander/). Objectives defined similarly as in [Hung et al. 2022](https://openreview.net/forum?id=AwWaBXLIJE).                                             |
-| [`minecart-v0`](https://mo-gymnasium.farama.org/environments/minecart/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/minecart.gif" width="200px">                                                       | Continuous or Image / Discrete      | `[ore1, ore2, fuel]`                                          | Agent must collect two types of ores and minimize fuel consumption. From [Abels et al. 2019](https://arxiv.org/abs/1809.07803v2).                                                                                                                               |
-| [`mo-highway-v0`](https://mo-gymnasium.farama.org/environments/mo-highway/) and `mo-highway-fast-v0` <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-highway.gif" width="200px">                        | Continuous / Discrete               | `[speed, right_lane, collision]`                              | The agent's objective is to reach a high speed while avoiding collisions with neighbouring vehicles and staying on the rightest lane. From [highway-env](https://github.com/eleurent/highway-env).                                                              |
-| [`mo-supermario-v0`](https://mo-gymnasium.farama.org/environments/mo-supermario/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-supermario.gif" width="200px">                                        | Image / Discrete                    | `[x_pos, time, death, coin, enemy]`                           | [:warning: SuperMarioBrosEnv support is limited.] Multi-objective version of [SuperMarioBrosEnv](https://github.com/Kautenja/gym-super-mario-bros). Objectives are defined similarly as in [Yang et al. 2019](https://arxiv.org/pdf/1908.08342.pdf).            |
-| [`mo-reacher-v4`](https://mo-gymnasium.farama.org/environments/mo-reacher/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-reacher.gif" width="200px">                                                 | Continuous / Discrete               | `[target_1, target_2, target_3, target_4]`                    | Mujoco version of `mo-reacher-v0`, based on `Reacher-v4` [environment](https://gymnasium.farama.org/environments/mujoco/reacher/).                                                                                                                              |
-| [`mo-hopper-v4`](https://mo-gymnasium.farama.org/environments/mo-hopper/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-hopper.gif" width="200px">                                                    | Continuous / Continuous             | `[velocity, height, energy]`                                  | Multi-objective version of [Hopper-v4](https://gymnasium.farama.org/environments/mujoco/hopper/) env.                                                                                                                                                           |
-| [`mo-halfcheetah-v4`](https://mo-gymnasium.farama.org/environments/mo-halfcheetah/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-halfcheetah.gif" width="200px">                                     | Continuous / Continuous             | `[velocity, energy]`                                          | Multi-objective version of [HalfCheetah-v4](https://gymnasium.farama.org/environments/mujoco/half_cheetah/) env. Similar to [Xu et al. 2020](https://github.com/mit-gfx/PGMORL).                                                                                |
-
-
-```{toctree}
-:hidden:
-:glob:
-:caption: MO-Gymnasium Environments
-
-./*
-
-```
diff --git a/docs/environments/classical.md b/docs/environments/classical.md
new file mode 100644
index 00000000..3f80ce85
--- /dev/null
+++ b/docs/environments/classical.md
@@ -0,0 +1,25 @@
+---
+title: "Classic Control"
+---
+
+# Classic Control
+
+Multi-objective versions of classical Gymnasium's environments.
+
+| Env                                                                                                                                                                                                                                                                | Obs/Action spaces                   | Objectives                                                    | Description                                                                                                                                                                                                                                                     |
+|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [`mo-mountaincar-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px">                                     | Continuous / Discrete               | `[time_penalty, reverse_penalty, forward_penalty]`            | Classic Mountain Car env, but with extra penalties for the forward and reverse actions. From [Vamplew et al. 2011](https://www.researchgate.net/publication/220343783_Empirical_evaluation_methods_for_multiobjective_reinforcement_learning_algorithms).       |
+| [`mo-mountaincarcontinuous-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincarcontinuous/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincarcontinuous.gif" width="200px">       | Continuous / Continuous             | `[time_penalty, fuel_consumption_penalty]`                    | Continuous Mountain Car env, but with penalties for fuel consumption.                                                                                                                                                                                           |
+| [`mo-lunar-lander-v2`](https://mo-gymnasium.farama.org/environments/mo-lunar-lander/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-lunar-lander.gif" width="200px">                                  | Continuous / Discrete or Continuous | `[landed, shaped_reward, main_engine_fuel, side_engine_fuel]` | MO version of the `LunarLander-v2` [environment](https://gymnasium.farama.org/environments/box2d/lunar_lander/). Objectives defined similarly as in [Hung et al. 2022](https://openreview.net/forum?id=AwWaBXLIJE).                                             |
+
+```{toctree}
+:hidden:
+:glob:
+:caption: MO-Gymnasium Environments
+
+./mo-mountaincar
+./mo-mountaincarcontinuous
+./mo-lunar-lander
+./mo-lunar-lander-continuous
+
+```
diff --git a/docs/environments/grid-world.md b/docs/environments/grid-world.md
new file mode 100644
index 00000000..e9d25520
--- /dev/null
+++ b/docs/environments/grid-world.md
@@ -0,0 +1,36 @@
+---
+title: "Grid-World"
+---
+
+# Grid-World
+
+Environments with discrete observation spaces, e.g., grid-worlds.
+
+| Env                                                                                                                                                                                                                                                                | Obs/Action spaces                   | Objectives                                                    | Description                                                                                                                                                                                                                                                     |
+|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [`deep-sea-treasure-v0`](https://mo-gymnasium.farama.org/environments/deep-sea-treasure/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/deep-sea-treasure.gif" width="200px">                            | Discrete / Discrete                 | `[treasure, time_penalty]`                                    | Agent is a submarine that must collect a treasure while taking into account a time penalty. Treasures values taken from [Yang et al. 2019](https://arxiv.org/pdf/1908.08342.pdf).                                                                               |
+| [`deep-sea-treasure-concave-v0`](https://mo-gymnasium.farama.org/environments/deep-sea-treasure-concave/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/deep-sea-treasure-concave.gif" width="200px">    | Discrete / Discrete                 | `[treasure, time_penalty]`                                    | Agent is a submarine that must collect a treasure while taking into account a time penalty. Treasures values taken from [Vamplew et al. 2010](https://link.springer.com/article/10.1007/s10994-010-5232-5).                                                     |
+| [`deep-sea-treasure-mirrored-v0`](https://mo-gymnasium.farama.org/environments/deep-sea-treasure-mirrored/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/deep-sea-treasure-mirrored.gif" width="200px"> | Discrete / Discrete                 | `[treasure, time_penalty]`                                    | Harder version of the concave DST [Felten et al. 2022](https://www.scitepress.org/Papers/2022/109891/109891.pdf).                                                                                                                                            |
+| [`resource-gathering-v0`](https://mo-gymnasium.farama.org/environments/resource-gathering/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/resource-gathering.gif" width="200px">                         | Discrete / Discrete                 | `[enemy, gold, gem]`                                          | Agent must collect gold or gem. Enemies have a 10% chance of killing the agent. From [Barret & Narayanan 2008](https://dl.acm.org/doi/10.1145/1390156.1390162).                                                                                                 |
+| [`fishwood-v0`](https://mo-gymnasium.farama.org/environments/fishwood/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/screenshots/fishwood.png" width="200px">                                                  | Discrete / Discrete                 | `[fish_amount, wood_amount]`                                  | ESR environment, the agent must collect fish and wood to light a fire and eat. From [Roijers et al. 2018](https://www.researchgate.net/publication/328718263_Multi-objective_Reinforcement_Learning_for_the_Expected_Utility_of_the_Return).                    |
+| [`breakable-bottles-v0`](https://mo-gymnasium.farama.org/environments/breakable-bottles/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/breakable-bottles.gif" width="200px">                            | Discrete (Dictionary) / Discrete    | `[time_penalty, bottles_delivered, potential]`                | Gridworld with 5 cells. The agents must collect bottles from the source location and deliver to the destination. From [Vamplew et al. 2021](https://www.sciencedirect.com/science/article/pii/S0952197621000336).                                               |
+| [`fruit-tree-v0`](https://mo-gymnasium.farama.org/environments/fruit-tree/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/fruit-tree.gif" width="200px">                                            | Discrete / Discrete                 | `[nutri1, ..., nutri6]`                                       | Full binary tree of depth d=5,6 or 7. Every leaf contains a fruit with a value for the nutrients Protein, Carbs, Fats, Vitamins, Minerals and Water. From [Yang et al. 2019](https://arxiv.org/pdf/1908.08342.pdf).                                             |
+| [`four-room-v0`](https://mo-gymnasium.farama.org/environments/four-room/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/four-room.gif" width="200px">                                                    | Discrete / Discrete                 | `[item1, item2, item3]`                                       | Agent must collect three different types of items in the map and reach the goal. From [Alegre et al. 2022](https://proceedings.mlr.press/v162/alegre22a.html).                                                                                                  |
+
+
+```{toctree}
+:hidden:
+:glob:
+:caption: MO-Gymnasium Environments
+
+./deep-sea-treasure
+./deep-sea-treasure-concave
+./deep-sea-treasure-mirrored
+./resource-gathering
+./four-room
+./fruit-tree
+./breakable-bottles
+./fishwood
+
+
+```
diff --git a/docs/environments/misc.md b/docs/environments/misc.md
new file mode 100644
index 00000000..d9cc2295
--- /dev/null
+++ b/docs/environments/misc.md
@@ -0,0 +1,28 @@
+---
+title: "Misc"
+---
+
+# Miscellaneous
+
+MO-Gymnasium also includes other miscellaneous multi-objective environments:
+
+| Env                                                                                                                                                                                                                                                                | Obs/Action spaces                   | Objectives                                                    | Description                                                                                                                                                                                                                                                     |
+|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [`water-reservoir-v0`](https://mo-gymnasium.farama.org/environments/water-reservoir/)   <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/water-reservoir.gif" width="200px">                                | Continuous / Continuous             | `[cost_flooding, deficit_water]`                              | A Water reservoir environment. The agent executes a continuous action, corresponding to the amount of water released by the dam. From [Pianosi et al. 2013](https://iwaponline.com/jh/article/15/2/258/3425/Tree-based-fitted-Q-iteration-for-multi-objective). |
+| [`minecart-v0`](https://mo-gymnasium.farama.org/environments/minecart/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/minecart.gif" width="200px">                                                       | Continuous or Image / Discrete      | `[ore1, ore2, fuel]`                                          | Agent must collect two types of ores and minimize fuel consumption. From [Abels et al. 2019](https://arxiv.org/abs/1809.07803v2).                                                                                                                               |
+| [`mo-highway-v0`](https://mo-gymnasium.farama.org/environments/mo-highway/) and `mo-highway-fast-v0` <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-highway.gif" width="200px">                        | Continuous / Discrete               | `[speed, right_lane, collision]`                              | The agent's objective is to reach a high speed while avoiding collisions with neighbouring vehicles and staying on the rightest lane. From [highway-env](https://github.com/eleurent/highway-env).                                                              |
+| [`mo-supermario-v0`](https://mo-gymnasium.farama.org/environments/mo-supermario/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-supermario.gif" width="200px">                                        | Image / Discrete                    | `[x_pos, time, death, coin, enemy]`                           | [:warning: SuperMarioBrosEnv support is limited.] Multi-objective version of [SuperMarioBrosEnv](https://github.com/Kautenja/gym-super-mario-bros). Objectives are defined similarly as in [Yang et al. 2019](https://arxiv.org/pdf/1908.08342.pdf).            |
+
+
+```{toctree}
+:hidden:
+:glob:
+:caption: MO-Gymnasium Environments
+
+./water-reservoir
+./minecart
+./minecart-deterministic
+./minecart-rgb
+./mo-highway
+./mo-supermario
+```
diff --git a/docs/environments/mujoco.md b/docs/environments/mujoco.md
new file mode 100644
index 00000000..70272d24
--- /dev/null
+++ b/docs/environments/mujoco.md
@@ -0,0 +1,32 @@
+---
+title: "MuJoCo"
+---
+
+# MuJoCo
+
+Multi-objective versions of Mujoco environments.
+
+| Env                                                                                                                                                                                                                                                                | Obs/Action spaces                   | Objectives                                                    | Description                                                                                                                                                                                                                                                     |
+|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [`mo-reacher-v4`](https://mo-gymnasium.farama.org/environments/mo-reacher/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-reacher.gif" width="200px">                                                 | Continuous / Discrete               | `[target_1, target_2, target_3, target_4]`                    | Mujoco version of `mo-reacher-v0`, based on `Reacher-v4` [environment](https://gymnasium.farama.org/environments/mujoco/reacher/).                                                                                                                              |
+| [`mo-hopper-v4`](https://mo-gymnasium.farama.org/environments/mo-hopper/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-hopper.gif" width="200px">                                                    | Continuous / Continuous             | `[velocity, height, energy]`                                  | Multi-objective version of [Hopper-v4](https://gymnasium.farama.org/environments/mujoco/hopper/) env.                                                                                                                                                           |
+| [`mo-halfcheetah-v4`](https://mo-gymnasium.farama.org/environments/mo-halfcheetah/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-halfcheetah.gif" width="200px">                                     | Continuous / Continuous             | `[velocity, energy]`                                          | Multi-objective version of [HalfCheetah-v4](https://gymnasium.farama.org/environments/mujoco/half_cheetah/) env. Similar to [Xu et al. 2020](https://github.com/mit-gfx/PGMORL).                                                                                |
+| [`mo-walker2d-v4`](https://mo-gymnasium.farama.org/environments/mo-walker2d/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-walker2d.gif" width="200px">                                              | Continuous / Continuous             | `[velocity, energy]`                                          | Multi-objective version of [Walker2d-v4](https://gymnasium.farama.org/environments/mujoco/walker2d/) env.                                                                                                                                                       |
+| [`mo-ant-v4`](https://mo-gymnasium.farama.org/environments/mo-ant/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-ant.gif" width="200px">                                                             | Continuous / Continuous             | `[x_velocity, y_velocity, energy]`                            | Multi-objective version of [Ant-v4](https://gymnasium.farama.org/environments/mujoco/ant/) env.                                                                                                                                                                 |
+| [`mo-swimmer-v4`](https://mo-gymnasium.farama.org/environments/mo-swimmer/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-swimmer.gif" width="200px">                                                 | Continuous / Continuous             | `[velocity, energy]`                                          | Multi-objective version of [Swimmer-v4](https://gymnasium.farama.org/environments/mujoco/swimmer/) env.                                                                                                                                                         |
+| [`mo-humanoid-v4`](https://mo-gymnasium.farama.org/environments/mo-humanoid/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-humanoid.gif" width="200px">                                              | Continuous / Continuous             | `[velocity, energy]`                                          | Multi-objective version of [Humonoid-v4](https://gymnasium.farama.org/environments/mujoco/humanoid/) env.                                                                                                                                                       |
+
+
+```{toctree}
+:hidden:
+:glob:
+:caption: MO-Gymnasium Environments
+
+./mo-reacher
+./mo-hopper
+./mo-halfcheetah
+./mo-walker2d
+./mo-ant
+./mo-swimmer
+./mo-humanoid
+```
diff --git a/docs/examples/publications.md b/docs/examples/publications.md
index c2739d12..23176746 100644
--- a/docs/examples/publications.md
+++ b/docs/examples/publications.md
@@ -10,7 +10,14 @@ MO-Gymnasium (formerly MO-Gym) was first published in:
 
 
 List of publications & submissions using MO-Gymnasium (please open a pull request to add missing entries):
+
 - [Sample-Efficient Multi-Objective Learning via Generalized Policy Improvement Prioritization](https://arxiv.org/abs/2301.07784) (Alegre et al., AAMAS 2023)
 - [Hyperparameter Optimization for Multi-Objective Reinforcement Learning](https://arxiv.org/abs/2310.16487v1) (Felten et al., MODeM Workshop 2023)
 - [Multi-Step Generalized Policy Improvement by Leveraging Approximate Models](https://openreview.net/forum?id=KFj0Q1EXvU) (Alegre et al., NeurIPS 2023)
 - [A Toolkit for Reliable Benchmarking and Research in Multi-Objective Reinforcement Learning](https://openreview.net/forum?id=jfwRLudQyj) (Felten et al., NeurIPS 2023)
+- [Distributional Pareto-Optimal Multi-Objective Reinforcement Learning](https://proceedings.neurips.cc/paper_files/paper/2023/hash/32285dd184dbfc33cb2d1f0db53c23c5-Abstract-Conference.html) (Cai et al., NeurIPS 2023)
+- [Welfare and Fairness in Multi-objective Reinforcement Learning](https://arxiv.org/abs/2212.01382) (Fan et al., AAMAS 2023)
+- [Personalized Reinforcement Learning with a Budget of Policies](https://arxiv.org/abs/2401.06514) (Ivanov et al., 2024)
+- [Multi-Objective Reinforcement Learning Based on Decomposition: A Taxonomy and Framework](https://arxiv.org/abs/2311.12495) (Felten et al., 2024)
+- [Multi-objective reinforcement learning for guaranteeing alignment with multiple values](https://alaworkshop2023.github.io/papers/ALA2023_paper_15.pdf) (Rodriguez-Soto et al., 2023)
+- [MOFL/D: A Federated Multi-objective Learning Framework with Decomposition](https://neurips.cc/virtual/2023/79018) (Hartmann et al., 2023)
diff --git a/docs/index.md b/docs/index.md
index a5069a60..fb6d56ff 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -10,11 +10,21 @@ lastpage:
 
 introduction/install
 introduction/api
-environments/all-environments
 wrappers/wrappers
 examples/morl_baselines
 ```
 
+```{toctree}
+:hidden:
+:caption: Environments
+
+environments/grid-world
+environments/classical
+environments/misc
+environments/mujoco
+```
+
+
 ```{toctree}
 :hidden:
 :caption: Tutorials
diff --git a/mo_gymnasium/__init__.py b/mo_gymnasium/__init__.py
index bb596e25..23201d0c 100644
--- a/mo_gymnasium/__init__.py
+++ b/mo_gymnasium/__init__.py
@@ -14,4 +14,4 @@
 )
 
 
-__version__ = "1.0.1"
+__version__ = "1.1.0"
diff --git a/mo_gymnasium/envs/breakable_bottles/breakable_bottles.py b/mo_gymnasium/envs/breakable_bottles/breakable_bottles.py
index 0a6cbe72..93c33821 100644
--- a/mo_gymnasium/envs/breakable_bottles/breakable_bottles.py
+++ b/mo_gymnasium/envs/breakable_bottles/breakable_bottles.py
@@ -25,9 +25,13 @@ class BreakableBottles(Env, EzPickle):
     The observation space is a dictionary with 4 keys:
     - location: the current location of the agent
     - bottles_carrying: the number of bottles the agent is currently carrying (0, 1 or 2)
-    - bottles_delivered: the number of bottles the agent has delivered (0 or 1)
+    - bottles_delivered: the number of bottles the agent has delivered (0, 1 or 2)
     - bottles_dropped: for each location, a boolean flag indicating if that location currently contains a bottle
 
+    Note that this observation space is different from that listed in the paper above. In the paper, bottles_delivered's possible values are listed as (0 or 1),
+    rather than (0, 1 or 2). This is because the paper did not take the terminal state, in which 2 bottles have been delivered, into account when calculating
+    the observation space. As such, the observation space of this implementation is larger than specified in the paper, having 360 possible states instead of 240.
+
     ## Reward Space
     The reward space has 3 dimensions:
     - time penalty: -1 for each time step
@@ -96,11 +100,11 @@ def __init__(
             {
                 "location": Discrete(self.size),
                 "bottles_carrying": Discrete(3),
-                "bottles_delivered": Discrete(2),
+                "bottles_delivered": Discrete(3),
                 "bottles_dropped": MultiBinary(self.size - 2),
             }
         )
-        self.num_observations = 240
+        self.num_observations = 360
 
         self.action_space = Discrete(3)  # LEFT, RIGHT, PICKUP
         self.num_actions = 3
@@ -220,7 +224,7 @@ def get_obs_idx(self, obs):
                 *[[bd > 0] for bd in obs["bottles_dropped"]],
             ]
         )
-        return np.ravel_multi_index(multi_index, tuple([self.size, 3, 2, *([2] * (self.size - 2))]))
+        return np.ravel_multi_index(multi_index, tuple([self.size, 3, 3, *([2] * (self.size - 2))]))
 
     def _get_obs(self):
         return {
diff --git a/mo_gymnasium/envs/lunar_lander/lunar_lander.py b/mo_gymnasium/envs/lunar_lander/lunar_lander.py
index 091190a9..c8d94d6a 100644
--- a/mo_gymnasium/envs/lunar_lander/lunar_lander.py
+++ b/mo_gymnasium/envs/lunar_lander/lunar_lander.py
@@ -6,6 +6,7 @@
     FPS,
     LEG_DOWN,
     MAIN_ENGINE_POWER,
+    MAIN_ENGINE_Y_LOCATION,
     SCALE,
     SIDE_ENGINE_AWAY,
     SIDE_ENGINE_HEIGHT,
@@ -46,7 +47,7 @@ def __init__(self, *args, **kwargs):
     def step(self, action):
         assert self.lander is not None
 
-        # Update wind
+        # Update wind and apply to the lander
         assert self.lander is not None, "You forgot to call reset()"
         if self.enable_wind and not (self.legs[0].ground_contact or self.legs[1].ground_contact):
             # the function used for wind is tanh(sin(2 k x) + sin(pi k x)),
@@ -60,12 +61,13 @@ def step(self, action):
 
             # the function used for torque is tanh(sin(2 k x) + sin(pi k x)),
             # which is proven to never be periodic, k = 0.01
-            torque_mag = math.tanh(math.sin(0.02 * self.torque_idx) + (math.sin(math.pi * 0.01 * self.torque_idx))) * (
-                self.turbulence_power
+            torque_mag = (
+                math.tanh(math.sin(0.02 * self.torque_idx) + (math.sin(math.pi * 0.01 * self.torque_idx)))
+                * self.turbulence_power
             )
             self.torque_idx += 1
             self.lander.ApplyTorque(
-                (torque_mag),
+                torque_mag,
                 True,
             )
 
@@ -74,9 +76,15 @@ def step(self, action):
         else:
             assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid "
 
-        # Engines
+        # Apply Engine Impulses
+
+        # Tip is the (X and Y) components of the rotation of the lander.
         tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
+
+        # Side is the (-Y and X) components of the rotation of the lander.
         side = (-tip[1], tip[0])
+
+        # Generate two random numbers between -1/SCALE and 1/SCALE.
         dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
 
         m_power = 0.0
@@ -87,21 +95,29 @@ def step(self, action):
                 assert m_power >= 0.5 and m_power <= 1.0
             else:
                 m_power = 1.0
+
             # 4 is move a bit downwards, +-2 for randomness
-            ox = tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
-            oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
+            # The components of the impulse to be applied by the main engine.
+            ox = tip[0] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
+            oy = -tip[1] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
+
             impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
-            p = self._create_particle(
-                3.5,  # 3.5 is here to make particle speed adequate
-                impulse_pos[0],
-                impulse_pos[1],
-                m_power,
-            )  # particles are just a decoration
-            p.ApplyLinearImpulse(
-                (ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
-                impulse_pos,
-                True,
-            )
+            if self.render_mode is not None:
+                # particles are just a decoration, with no impact on the physics, so don't add them when not rendering
+                p = self._create_particle(
+                    3.5,  # 3.5 is here to make particle speed adequate
+                    impulse_pos[0],
+                    impulse_pos[1],
+                    m_power,
+                )
+                p.ApplyLinearImpulse(
+                    (
+                        ox * MAIN_ENGINE_POWER * m_power,
+                        oy * MAIN_ENGINE_POWER * m_power,
+                    ),
+                    impulse_pos,
+                    True,
+                )
             self.lander.ApplyLinearImpulse(
                 (-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
                 impulse_pos,
@@ -110,26 +126,39 @@ def step(self, action):
 
         s_power = 0.0
         if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1, 3]):
-            # Orientation engines
+            # Orientation/Side engines
             if self.continuous:
                 direction = np.sign(action[1])
                 s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
                 assert s_power >= 0.5 and s_power <= 1.0
             else:
+                # action = 1 is left, action = 3 is right
                 direction = action - 2
                 s_power = 1.0
+
+            # The components of the impulse to be applied by the side engines.
             ox = tip[0] * dispersion[0] + side[0] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE)
             oy = -tip[1] * dispersion[0] - side[1] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE)
+
+            # The constant 17 is a constant, that is presumably meant to be SIDE_ENGINE_HEIGHT.
+            # However, SIDE_ENGINE_HEIGHT is defined as 14
+            # This causes the position of the thrust on the body of the lander to change, depending on the orientation of the lander.
+            # This in turn results in an orientation dependent torque being applied to the lander.
             impulse_pos = (
                 self.lander.position[0] + ox - tip[0] * 17 / SCALE,
                 self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE,
             )
-            p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
-            p.ApplyLinearImpulse(
-                (ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
-                impulse_pos,
-                True,
-            )
+            if self.render_mode is not None:
+                # particles are just a decoration, with no impact on the physics, so don't add them when not rendering
+                p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
+                p.ApplyLinearImpulse(
+                    (
+                        ox * SIDE_ENGINE_POWER * s_power,
+                        oy * SIDE_ENGINE_POWER * s_power,
+                    ),
+                    impulse_pos,
+                    True,
+                )
             self.lander.ApplyLinearImpulse(
                 (-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
                 impulse_pos,
@@ -140,6 +169,7 @@ def step(self, action):
 
         pos = self.lander.position
         vel = self.lander.linearVelocity
+
         state = [
             (pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
             (pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),
diff --git a/mo_gymnasium/envs/mujoco/__init__.py b/mo_gymnasium/envs/mujoco/__init__.py
index 5b605ff5..4415d577 100644
--- a/mo_gymnasium/envs/mujoco/__init__.py
+++ b/mo_gymnasium/envs/mujoco/__init__.py
@@ -39,6 +39,37 @@
     kwargs={"cost_objective": False},
 )
 
+register(
+    id="mo-walker2d-v4",
+    entry_point="mo_gymnasium.envs.mujoco.walker2d:MOWalker2dEnv",
+    max_episode_steps=1000,
+)
+
+register(
+    id="mo-ant-v4",
+    entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
+    max_episode_steps=1000,
+)
+
+register(
+    id="mo-ant-2d-v4",
+    entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
+    max_episode_steps=1000,
+    kwargs={"cost_objective": False},
+)
+
+register(
+    id="mo-swimmer-v4",
+    entry_point="mo_gymnasium.envs.mujoco.swimmer:MOSwimmerEnv",
+    max_episode_steps=1000,
+)
+
+register(
+    id="mo-humanoid-v4",
+    entry_point="mo_gymnasium.envs.mujoco.humanoid:MOHumanoidEnv",
+    max_episode_steps=1000,
+)
+
 register(
     id="mo-reacher-v4",
     entry_point="mo_gymnasium.envs.mujoco.reacher_v4:MOReacherEnv",
diff --git a/mo_gymnasium/envs/mujoco/ant.py b/mo_gymnasium/envs/mujoco/ant.py
new file mode 100644
index 00000000..cc2ba7ed
--- /dev/null
+++ b/mo_gymnasium/envs/mujoco/ant.py
@@ -0,0 +1,51 @@
+import numpy as np
+from gymnasium.envs.mujoco.ant_v4 import AntEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOAntEnv(AntEnv, EzPickle):
+    """
+    ## Description
+    Multi-objective version of the AntEnv environment.
+
+    See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/ant/) for more information.
+
+    The original Gymnasium's 'Ant-v4' is recovered by the following linear scalarization:
+
+    env = mo_gym.make('mo-ant-v4', cost_objective=False)
+    LinearReward(env, weight=np.array([1.0, 0.0]))
+
+    ## Reward Space
+    The reward is 2- or 3-dimensional:
+    - 0: x-velocity
+    - 1: y-velocity
+    - 2: Control cost of the action
+    If the cost_objective flag is set to False, the reward is 2-dimensional, and the cost is added to other objectives.
+    A healthy reward is added to all objectives.
+    """
+
+    def __init__(self, cost_objective=True, **kwargs):
+        super().__init__(**kwargs)
+        EzPickle.__init__(self, cost_objective, **kwargs)
+        self.cost_objetive = cost_objective
+        self.reward_dim = 3 if cost_objective else 2
+        self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = super().step(action)
+        x_velocity = info["x_velocity"]
+        y_velocity = info["y_velocity"]
+        cost = info["reward_ctrl"]
+        healthy_reward = info["reward_survive"]
+
+        if self.cost_objetive:
+            cost /= self._ctrl_cost_weight  # Ignore the weight in the original AntEnv
+            vec_reward = np.array([x_velocity, y_velocity, cost], dtype=np.float32)
+        else:
+            vec_reward = np.array([x_velocity, y_velocity], dtype=np.float32)
+            vec_reward += cost
+
+        vec_reward += healthy_reward
+
+        return observation, vec_reward, terminated, truncated, info
diff --git a/mo_gymnasium/envs/mujoco/half_cheetah_v4.py b/mo_gymnasium/envs/mujoco/half_cheetah_v4.py
index 8427cc52..0dac5308 100644
--- a/mo_gymnasium/envs/mujoco/half_cheetah_v4.py
+++ b/mo_gymnasium/envs/mujoco/half_cheetah_v4.py
@@ -11,6 +11,11 @@ class MOHalfCheehtahEnv(HalfCheetahEnv, EzPickle):
 
     See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/half_cheetah/) for more information.
 
+    The original Gymnasium's 'HalfCheetah-v4' is recovered by the following linear scalarization:
+
+    env = mo_gym.make('mo-halfcheetah-v4')
+    LinearReward(env, weight=np.array([1.0, 1.0]))
+
     ## Reward Space
     The reward is 2-dimensional:
     - 0: Reward for running forward
diff --git a/mo_gymnasium/envs/mujoco/hopper_v4.py b/mo_gymnasium/envs/mujoco/hopper_v4.py
index 7d35b408..6fe0ed3c 100644
--- a/mo_gymnasium/envs/mujoco/hopper_v4.py
+++ b/mo_gymnasium/envs/mujoco/hopper_v4.py
@@ -11,6 +11,11 @@ class MOHopperEnv(HopperEnv, EzPickle):
 
     See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/hopper/) for more information.
 
+    The original Gymnasium's 'Hopper-v4' is recovered by the following linear scalarization:
+
+    env = mo_gym.make('mo-hopper-v4', cost_objective=False)
+    LinearReward(env, weight=np.array([1.0, 0.0]))
+
     ## Reward Space
     The reward is 3-dimensional:
     - 0: Reward for going forward on the x-axis
diff --git a/mo_gymnasium/envs/mujoco/humanoid.py b/mo_gymnasium/envs/mujoco/humanoid.py
new file mode 100644
index 00000000..12518cd8
--- /dev/null
+++ b/mo_gymnasium/envs/mujoco/humanoid.py
@@ -0,0 +1,34 @@
+import numpy as np
+from gymnasium.envs.mujoco.humanoid_v4 import HumanoidEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOHumanoidEnv(HumanoidEnv, EzPickle):
+    """
+    ## Description
+    Multi-objective version of the HumanoidEnv environment.
+
+    See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/humanoid/) for more information.
+
+    ## Reward Space
+    The reward is 2-dimensional:
+    - 0: Reward for running forward (x-velocity)
+    - 1: Control cost of the action
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        EzPickle.__init__(self, **kwargs)
+        self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
+        self.reward_dim = 2
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = super().step(action)
+        velocity = info["x_velocity"]
+        negative_cost = 10 * info["reward_quadctrl"]
+        vec_reward = np.array([velocity, negative_cost], dtype=np.float32)
+
+        vec_reward += self.healthy_reward  # All objectives are penalyzed when the agent falls
+
+        return observation, vec_reward, terminated, truncated, info
diff --git a/mo_gymnasium/envs/mujoco/swimmer.py b/mo_gymnasium/envs/mujoco/swimmer.py
new file mode 100644
index 00000000..72e5b59e
--- /dev/null
+++ b/mo_gymnasium/envs/mujoco/swimmer.py
@@ -0,0 +1,38 @@
+import numpy as np
+from gymnasium.envs.mujoco.swimmer_v4 import SwimmerEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOSwimmerEnv(SwimmerEnv, EzPickle):
+    """
+    ## Description
+    Multi-objective version of the SwimmerEnv environment.
+
+    See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/swimmer/) for more information.
+
+    The original Gymnasium's 'Swimmer-v4' is recovered by the following linear scalarization:
+
+    env = mo_gym.make('mo-swimmer-v4')
+    LinearReward(env, weight=np.array([1.0, 1e-4]))
+
+    ## Reward Space
+    The reward is 2-dimensional:
+    - 0: Reward for moving forward (x-velocity)
+    - 1: Control cost of the action
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        EzPickle.__init__(self, **kwargs)
+        self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
+        self.reward_dim = 2
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = super().step(action)
+        velocity = info["x_velocity"]
+        energy = -np.sum(np.square(action))
+
+        vec_reward = np.array([velocity, energy], dtype=np.float32)
+
+        return observation, vec_reward, terminated, truncated, info
diff --git a/mo_gymnasium/envs/mujoco/walker2d.py b/mo_gymnasium/envs/mujoco/walker2d.py
new file mode 100644
index 00000000..e3806810
--- /dev/null
+++ b/mo_gymnasium/envs/mujoco/walker2d.py
@@ -0,0 +1,35 @@
+import numpy as np
+from gymnasium.envs.mujoco.walker2d_v4 import Walker2dEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOWalker2dEnv(Walker2dEnv, EzPickle):
+    """
+    ## Description
+    Multi-objective version of the Walker2dEnv environment.
+
+    See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/walker2d/) for more information.
+
+    ## Reward Space
+    The reward is 2-dimensional:
+    - 0: Reward for running forward (x-velocity)
+    - 1: Control cost of the action
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        EzPickle.__init__(self, **kwargs)
+        self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
+        self.reward_dim = 2
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = super().step(action)
+        velocity = info["x_velocity"]
+        energy = -np.sum(np.square(action))
+
+        vec_reward = np.array([velocity, energy], dtype=np.float32)
+
+        vec_reward += self.healthy_reward  # All objectives are penalyzed when the agent falls
+
+        return observation, vec_reward, terminated, truncated, info
diff --git a/tests/test_envs.py b/tests/test_envs.py
index 7e338be4..28af4b0c 100644
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -5,6 +5,7 @@
 import pytest
 from gymnasium.envs.registration import EnvSpec
 from gymnasium.utils.env_checker import check_env, data_equivalence
+from gymnasium.utils.env_match import check_environments_match
 
 import mo_gymnasium as mo_gym
 
@@ -40,6 +41,32 @@ def test_all_env_passive_env_checker(spec):
     env.close()
 
 
+@pytest.mark.parametrize(
+    "gym_id, mo_gym_id",
+    [
+        ("MountainCar-v0", "mo-mountaincar-v0"),
+        ("MountainCarContinuous-v0", "mo-mountaincarcontinuous-v0"),
+        ("LunarLander-v2", "mo-lunar-lander-v2"),
+        # ("Reacher-v4", "mo-reacher-v4"),  # use a different model and action space
+        ("Hopper-v4", "mo-hopper-v4"),
+        ("HalfCheetah-v4", "mo-halfcheetah-v4"),
+        ("Walker2d-v4", "mo-walker2d-v4"),
+        ("Ant-v4", "mo-ant-v4"),
+        ("Swimmer-v4", "mo-swimmer-v4"),
+        ("Humanoid-v4", "mo-humanoid-v4"),
+    ],
+)
+def test_gymnasium_equivalence(gym_id, mo_gym_id, num_steps=100, seed=123):
+    env = gym.make(gym_id)
+    mo_env = mo_gym.LinearReward(mo_gym.make(mo_gym_id))
+
+    # for float rewards, then precision becomes an issue
+    env = gym.wrappers.TransformReward(env, lambda reward: round(reward, 4))
+    mo_env = gym.wrappers.TransformReward(mo_env, lambda reward: round(reward, 4))
+
+    check_environments_match(env, mo_env, num_steps=num_steps, seed=seed, skip_rew=True, info_comparison="keys-superset")
+
+
 # Note that this precludes running this test in multiple threads.
 # However, we probably already can't do multithreading due to some environments.
 SEED = 0