diff --git a/docs/_static/screenshots/breakable-bottles.jpg b/docs/_static/screenshots/breakable-bottles.jpg deleted file mode 100644 index 851fc8d0..00000000 Binary files a/docs/_static/screenshots/breakable-bottles.jpg and /dev/null differ diff --git a/docs/_static/screenshots/breakable-bottles.png b/docs/_static/screenshots/breakable-bottles.png new file mode 100644 index 00000000..b4fe5b3e Binary files /dev/null and b/docs/_static/screenshots/breakable-bottles.png differ diff --git a/docs/_static/videos/breakable-bottles.gif b/docs/_static/videos/breakable-bottles.gif new file mode 100644 index 00000000..4f901d79 Binary files /dev/null and b/docs/_static/videos/breakable-bottles.gif differ diff --git a/docs/environments/all-environments.md b/docs/environments/all-environments.md index b39b3ee7..802b6cc2 100644 --- a/docs/environments/all-environments.md +++ b/docs/environments/all-environments.md @@ -13,7 +13,7 @@ MO-Gymnasium includes environments taken from the MORL literature, as well as mu | [`deep-sea-treasure-concave-v0`](https://mo-gymnasium.farama.org/environments/deep-sea-treasure-concave/)
| Discrete / Discrete | `[treasure, time_penalty]` | Agent is a submarine that must collect a treasure while taking into account a time penalty. Treasures values taken from [Vamplew et al. 2010](https://link.springer.com/article/10.1007/s10994-010-5232-5). | | [`resource-gathering-v0`](https://mo-gymnasium.farama.org/environments/resource-gathering/)
| Discrete / Discrete | `[enemy, gold, gem]` | Agent must collect gold or gem. Enemies have a 10% chance of killing the agent. From [Barret & Narayanan 2008](https://dl.acm.org/doi/10.1145/1390156.1390162). | | [`fishwood-v0`](https://mo-gymnasium.farama.org/environments/fishwood/)
| Discrete / Discrete | `[fish_amount, wood_amount]` | ESR environment, the agent must collect fish and wood to light a fire and eat. From [Roijers et al. 2018](https://www.researchgate.net/publication/328718263_Multi-objective_Reinforcement_Learning_for_the_Expected_Utility_of_the_Return). | -| [`breakable-bottles-v0`](https://mo-gymnasium.farama.org/environments/breakable-bottles/)
| Discrete (Dictionary) / Discrete | `[time_penalty, bottles_delivered, potential]` | Gridworld with 5 cells. The agents must collect bottles from the source location and deliver to the destination. From [Vamplew et al. 2021](https://www.sciencedirect.com/science/article/pii/S0952197621000336). | +| [`breakable-bottles-v0`](https://mo-gymnasium.farama.org/environments/breakable-bottles/)
| Discrete (Dictionary) / Discrete | `[time_penalty, bottles_delivered, potential]` | Gridworld with 5 cells. The agents must collect bottles from the source location and deliver to the destination. From [Vamplew et al. 2021](https://www.sciencedirect.com/science/article/pii/S0952197621000336). | | [`fruit-tree-v0`](https://mo-gymnasium.farama.org/environments/fruit-tree/)
| Discrete / Discrete | `[nutri1, ..., nutri6]` | Full binary tree of depth d=5,6 or 7. Every leaf contains a fruit with a value for the nutrients Protein, Carbs, Fats, Vitamins, Minerals and Water. From [Yang et al. 2019](https://arxiv.org/pdf/1908.08342.pdf). | | [`water-reservoir-v0`](https://mo-gymnasium.farama.org/environments/water-reservoir/)
| Continuous / Continuous | `[cost_flooding, deficit_water]` | A Water reservoir environment. The agent executes a continuous action, corresponding to the amount of water released by the dam. From [Pianosi et al. 2013](https://iwaponline.com/jh/article/15/2/258/3425/Tree-based-fitted-Q-iteration-for-multi-objective). | | [`four-room-v0`](https://mo-gymnasium.farama.org/environments/four-room/)
| Discrete / Discrete | `[item1, item2, item3]` | Agent must collect three different types of items in the map and reach the goal. From [Alegre et al. 2022](https://proceedings.mlr.press/v162/alegre22a.html). | diff --git a/mo_gymnasium/envs/breakable_bottles/assets/Minecraft.ttf b/mo_gymnasium/envs/breakable_bottles/assets/Minecraft.ttf new file mode 100644 index 00000000..85c14725 Binary files /dev/null and b/mo_gymnasium/envs/breakable_bottles/assets/Minecraft.ttf differ diff --git a/mo_gymnasium/envs/breakable_bottles/assets/bottle.png b/mo_gymnasium/envs/breakable_bottles/assets/bottle.png new file mode 100644 index 00000000..b1ff5983 Binary files /dev/null and b/mo_gymnasium/envs/breakable_bottles/assets/bottle.png differ diff --git a/mo_gymnasium/envs/breakable_bottles/assets/elf_left.png b/mo_gymnasium/envs/breakable_bottles/assets/elf_left.png new file mode 100644 index 00000000..bc9e22ea Binary files /dev/null and b/mo_gymnasium/envs/breakable_bottles/assets/elf_left.png differ diff --git a/mo_gymnasium/envs/breakable_bottles/assets/elf_right.png b/mo_gymnasium/envs/breakable_bottles/assets/elf_right.png new file mode 100644 index 00000000..83640315 Binary files /dev/null and b/mo_gymnasium/envs/breakable_bottles/assets/elf_right.png differ diff --git a/mo_gymnasium/envs/breakable_bottles/assets/home.png b/mo_gymnasium/envs/breakable_bottles/assets/home.png new file mode 100644 index 00000000..565d371b Binary files /dev/null and b/mo_gymnasium/envs/breakable_bottles/assets/home.png differ diff --git a/mo_gymnasium/envs/breakable_bottles/assets/mountain_bg1.png b/mo_gymnasium/envs/breakable_bottles/assets/mountain_bg1.png new file mode 100644 index 00000000..e5872ceb Binary files /dev/null and b/mo_gymnasium/envs/breakable_bottles/assets/mountain_bg1.png differ diff --git a/mo_gymnasium/envs/breakable_bottles/assets/mountain_bg2.png b/mo_gymnasium/envs/breakable_bottles/assets/mountain_bg2.png new file mode 100644 index 00000000..8cadf7df Binary files /dev/null and b/mo_gymnasium/envs/breakable_bottles/assets/mountain_bg2.png differ diff --git a/mo_gymnasium/envs/breakable_bottles/breakable_bottles.py b/mo_gymnasium/envs/breakable_bottles/breakable_bottles.py index 8c8abcbd..0a6cbe72 100644 --- a/mo_gymnasium/envs/breakable_bottles/breakable_bottles.py +++ b/mo_gymnasium/envs/breakable_bottles/breakable_bottles.py @@ -1,6 +1,9 @@ +from os import path from typing import Optional +import gymnasium as gym import numpy as np +import pygame from gymnasium import Env from gymnasium.spaces import Box, Dict, Discrete, MultiBinary from gymnasium.utils import EzPickle @@ -47,9 +50,12 @@ class BreakableBottles(Env, EzPickle): ## Credits This environment was originally a contribution of Robert Klassert + The home asset is from https://limezu.itch.io/serenevillagerevamped + The gold, enemy and gem assets are from https://ninjikin.itch.io/treasure + The bottles pixel art was created with the assistance of DALLĀ·E 2. """ - metadata = {"render_modes": ["human"]} + metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4} # actions LEFT = 0 @@ -103,7 +109,25 @@ def __init__( self.reward_space = Box(np.array([-np.inf, 0, -1]), np.array([0, self.bottle_reward * 2, 0])) self.reward_dim = 3 + # pygame + self.cell_size = (64, 64) + self.window_size = ( + self.size * self.cell_size[1], + 1 * self.cell_size[0], + ) + self.clock = None + self.elf_images = [] + self.home_img = None + self.bottle_img = None + self.mountain_bg_img = [] + self.window = None + self.last_action = None + self.direction = self.RIGHT + def step(self, action): + self.last_action = action + if self.last_action != self.PICKUP: + self.direction = self.last_action observation_old = self._get_obs() old_potential = self.potential(observation_old) terminal = False @@ -207,12 +231,80 @@ def _get_obs(self): } def render(self): - if self.render_mode == "human": - print("-----") - print( - f"Location: {self.location}\nCarrying {self.bottles_carrying} bottles.\nDelivered {self.bottles_delivered} so far.\nBottles have been dropped at tiles {'1' if self.bottles_dropped[0] > 0 else ''} {'2' if self.bottles_dropped[1] > 0 else ''} {'3' if self.bottles_dropped[2] > 0 else ''}" + if self.render_mode is None: + assert self.spec is not None + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. mo_gym.make("{self.spec.id}", render_mode="rgb_array")' ) - print("-----") + return + + if self.window is None: + pygame.init() + + if self.render_mode == "human": + pygame.display.init() + pygame.display.set_caption("Breakable Bottles") + self.window = pygame.display.set_mode(self.window_size) + else: + self.window = pygame.Surface(self.window_size) + + if self.clock is None: + self.clock = pygame.time.Clock() + + if not self.elf_images: + hikers = [ + path.join(path.dirname(__file__), "assets/elf_left.png"), + path.join(path.dirname(__file__), "assets/elf_right.png"), + ] + self.elf_images = [pygame.transform.scale(pygame.image.load(f_name), self.cell_size) for f_name in hikers] + if not self.mountain_bg_img: + bg_imgs = [ + path.join(path.dirname(__file__), "assets/mountain_bg1.png"), + path.join(path.dirname(__file__), "assets/mountain_bg2.png"), + ] + self.mountain_bg_img = [ + pygame.transform.scale(pygame.image.load(f_name), self.cell_size) for f_name in bg_imgs + ] + if self.home_img is None: + self.home_img = pygame.transform.scale( + pygame.image.load(path.join(path.dirname(__file__), "assets/home.png")), + self.cell_size, + ) + if self.bottle_img is None: + self.bottle_img = pygame.transform.scale( + pygame.image.load(path.join(path.dirname(__file__), "assets/bottle.png")), + (32, 32), + ) + self.font = pygame.font.Font(path.join(path.dirname(__file__), "assets", "Minecraft.ttf"), 10) + + for i in range(self.size): + self.window.blit( + self.mountain_bg_img[i % 2], + np.array([i, 0]) * self.cell_size[0], + ) + if i == 0: + for k in range(4): + self.window.blit(self.bottle_img, np.array([i, 0]) * self.cell_size[0] + np.array([k * 10, 0])) + if i == self.size - 1: + self.window.blit(self.home_img, np.array([i, 0]) * self.cell_size[0]) + if i == self.location: + self.window.blit(self.elf_images[self.direction], np.array([i, 0]) * self.cell_size[0]) + if i in range(1, self.size - 1): + if self.bottles_dropped[i - 1] > 0: + self.window.blit(self.bottle_img, np.array([i, 0]) * self.cell_size[0]) + img = self.font.render(f"Carrying: {self.bottles_carrying}", True, (0, 0, 0)) + self.window.blit(img, np.array([self.location, 0]) * self.cell_size + np.array([5, 50])) + img = self.font.render(f"Delivered: {self.bottles_delivered}", True, (0, 0, 0)) + self.window.blit(img, np.array([self.size - 1, 0]) * self.cell_size + np.array([4, 5])) + + if self.render_mode == "human": + pygame.event.pump() + pygame.display.update() + self.clock.tick(self.metadata["render_fps"]) + elif self.render_mode == "rgb_array": # rgb_array + return np.transpose(np.array(pygame.surfarray.pixels3d(self.window)), axes=(1, 0, 2)) def close(self): pass @@ -224,16 +316,13 @@ def potential(self, obs): if __name__ == "__main__": - from gymnasium.spaces.utils import flatdim + import mo_gymnasium as mo_gym - env = BreakableBottles(size=5, prob_drop=0.1) - assert flatdim(env.action_space) == 3 - assert flatdim(env.observation_space) == 13 + env = mo_gym.make("breakable-bottles-v0", render_mode="human") done = False - obs = env.reset() + obs, info = env.reset() while True: - env.render() - obs, r, done, info = env.step(env.action_space.sample()) - if done: + obs, r, terminated, truncated, info = env.step(env.action_space.sample()) + if terminated or truncated: break