diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index dc3b097fc..008339d4b 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -24,7 +24,7 @@ If applicable, add screenshots to help explain your problem. **Desktop (please complete the following information):** - OS: [e.g. mac/linux/windows] - - Version [e.g. 0.3.2] + - Version [e.g. 0.3.3] **Additional context** Add any other context about the problem here. diff --git a/CMakeLists.txt b/CMakeLists.txt index 563beb28b..8e9b38f4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.10.0) -project(Griddly VERSION 0.3.2) +project(Griddly VERSION 0.3.3) set(BINARY ${CMAKE_PROJECT_NAME}) diff --git a/bindings/python.cpp b/bindings/python.cpp index 776ad6be6..d662770bf 100644 --- a/bindings/python.cpp +++ b/bindings/python.cpp @@ -12,7 +12,7 @@ namespace griddly { PYBIND11_MODULE(python_griddly, m) { m.doc() = "Griddly python bindings"; - m.attr("version") = "0.3.2"; + m.attr("version") = "0.3.3"; #ifndef NDEBUG spdlog::set_level(spdlog::level::debug); @@ -35,6 +35,9 @@ PYBIND11_MODULE(python_griddly, m) { gdy.def("get_avatar_object", &Py_GDYWrapper::getAvatarObject); gdy.def("create_game", &Py_GDYWrapper::createGame); + // Get list of objects in the order of their assigned ID + gdy.def("get_object_names", &Py_GDYWrapper::getObjectNames); + py::class_> game_process(m, "GameProcess"); @@ -55,6 +58,8 @@ PYBIND11_MODULE(python_griddly, m) { // Get available actions for objects in the current game game_process.def("get_available_actions", &Py_GameWrapper::getAvailableActionNames); game_process.def("get_available_action_ids", &Py_GameWrapper::getAvailableActionIds); + + // Width and height of the game grid game_process.def("get_width", &Py_GameWrapper::getWidth); diff --git a/bindings/wrapper/GDYWrapper.cpp b/bindings/wrapper/GDYWrapper.cpp index 2be9fc232..0e6a71bb1 100644 --- a/bindings/wrapper/GDYWrapper.cpp +++ b/bindings/wrapper/GDYWrapper.cpp @@ -35,6 +35,10 @@ class Py_GDYWrapper { return gdyFactory_->getExternalActionNames(); } + std::vector getObjectNames() const { + return gdyFactory_->getObjectGenerator()->getObjectNames(); + } + py::dict getActionInputMappings() const { auto actionInputsDefinitions = gdyFactory_->getActionInputsDefinitions(); py::dict py_actionInputsDefinitions; diff --git a/docs/about/artwork.rst b/docs/about/artwork.rst index dfecd4444..066721cc0 100644 --- a/docs/about/artwork.rst +++ b/docs/about/artwork.rst @@ -1,6 +1,6 @@ -======= +####### Artwork -======= +####### The Artwork is provided by the `Oryx Design Lab `_. diff --git a/docs/about/community.rst b/docs/about/community.rst index 0721cada8..03fa954d7 100644 --- a/docs/about/community.rst +++ b/docs/about/community.rst @@ -1,6 +1,6 @@ -========= +######### Community -========= +######### Come join the `Griddly Discord `_ community, get support and share game levels that you have created. diff --git a/docs/about/faq.rst b/docs/about/faq.rst index fa6e36f75..f8fcc1fc9 100644 --- a/docs/about/faq.rst +++ b/docs/about/faq.rst @@ -1,7 +1,7 @@ .. _doc_about_faq: -========================== +########################## Frequently Asked Questions -========================== +########################## Nothing here yet! diff --git a/docs/about/halloffame.rst b/docs/about/halloffame.rst index d513622a9..e6c712e60 100644 --- a/docs/about/halloffame.rst +++ b/docs/about/halloffame.rst @@ -1,14 +1,14 @@ -============ +############ Hall of Fame -============ +############ If you create a project that uses Griddly, please let us know and we will link it here. This includes if you use Griddly in any papers, use the griddly engine in another game project and want to share your work. .. note:: You can Be the first! - +******** Academia -======== +******** Please use the following snippet to reference the Griddly project: diff --git a/docs/about/index.rst b/docs/about/index.rst index c772f6047..696f0d8f5 100644 --- a/docs/about/index.rst +++ b/docs/about/index.rst @@ -1,6 +1,6 @@ -===== +##### About -===== +##### .. toctree:: :maxdepth: 1 diff --git a/docs/about/introduction.rst b/docs/about/introduction.rst index e15049b91..aef2218f1 100644 --- a/docs/about/introduction.rst +++ b/docs/about/introduction.rst @@ -1,8 +1,8 @@ .. _doc_about_introduction: -============ +############ Introduction -============ +############ One of the most important things about AI research is data. In many Game Environments the rate of data (rendered frames per second, or state representations per second) is relatively slow meaning very long training times. Researchers can compensate for this problem by parallelising the number of games being played, sometimes on expensive hardward and sometimes on several servers requiring network infrastructure to pass states to the actual learning algorithms. For many researchers and hobbyists who want to learn. This approach is unobtainable and only the research teams with lots of funding and engineers supporting the hardware and infrastrcuture required. @@ -12,26 +12,30 @@ Griddly is an open-source project aimed to be a all-encompassing platform for gr Here are some of the highlighted features: +*********** Flexibility ------------ +*********** Griddly games are defined using a simple configuration language GDY in which you can configure the number of players, how inputs are converted into game mechanics, the objects and how they are rendered and what design of the levels. Read more about :ref:`GDY here` +******************** Speed + Memory Usage --------------------- +******************** The Griddly engine is written entirely in c++ and and uses the `Vulkan API `_ to render observational states. This means that all the games have significantly faster frame rates. Griddly also offers lightweight vectorized state rendering, which can render games states at 30k+ FPS in some games. +***************** Pre-Defined Games ------------------ +***************** Visit the :ref:`games section` here to see which games are currently available. Several games have been ported from the GVGAI and MiniGrid RL environments, which can now be run at significantly higher speeds and less memory overhead. .. note:: More games are being added as Griddly is being developed. Feel free to design your own games and let the discord community see what your have built! +******************** OpenAI Gym Interface --------------------- +******************** Griddly provides an open ai gym interface out-of-the-box which wraps the underlying raw API making Reinforcement Learning research significantly easier. diff --git a/docs/conf.py b/docs/conf.py index e7b0f366d..216887a77 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'Chris Bamford' # The full version, including alpha/beta/rc tags -release = '0.3.2' +release = '0.3.3' # -- General configuration --------------------------------------------------- @@ -33,9 +33,12 @@ extensions = [ 'recommonmark', 'sphinx_rtd_theme', - 'sphinxcontrib.images' + 'sphinxcontrib.images', + 'sphinx.ext.autosectionlabel', ] +autosectionlabel_prefix_document=True + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/docs/games/GriddlyRTS/img/GriddlyRTS-level-Block2D-0.png b/docs/games/GriddlyRTS/img/GriddlyRTS-level-Block2D-0.png index f4ec970e5..543350561 100644 Binary files a/docs/games/GriddlyRTS/img/GriddlyRTS-level-Block2D-0.png and b/docs/games/GriddlyRTS/img/GriddlyRTS-level-Block2D-0.png differ diff --git a/docs/games/GriddlyRTS/img/GriddlyRTS-level-Block2D-1.png b/docs/games/GriddlyRTS/img/GriddlyRTS-level-Block2D-1.png index 365e536a9..04031561a 100644 Binary files a/docs/games/GriddlyRTS/img/GriddlyRTS-level-Block2D-1.png and b/docs/games/GriddlyRTS/img/GriddlyRTS-level-Block2D-1.png differ diff --git a/docs/games/GriddlyRTS/img/GriddlyRTS-tile-harvester-Sprite2D.png b/docs/games/GriddlyRTS/img/GriddlyRTS-tile-harvester-Sprite2D.png index c0fda0b40..b20750d00 100644 Binary files a/docs/games/GriddlyRTS/img/GriddlyRTS-tile-harvester-Sprite2D.png and b/docs/games/GriddlyRTS/img/GriddlyRTS-tile-harvester-Sprite2D.png differ diff --git a/docs/games/GriddlyRTS/img/GriddlyRTS-tile-puncher-Sprite2D.png b/docs/games/GriddlyRTS/img/GriddlyRTS-tile-puncher-Sprite2D.png index 88f120874..0c3816092 100644 Binary files a/docs/games/GriddlyRTS/img/GriddlyRTS-tile-puncher-Sprite2D.png and b/docs/games/GriddlyRTS/img/GriddlyRTS-tile-puncher-Sprite2D.png differ diff --git a/docs/games/GriddlyRTS/img/GriddlyRTS-tile-pusher-Sprite2D.png b/docs/games/GriddlyRTS/img/GriddlyRTS-tile-pusher-Sprite2D.png index e047db70c..6cd64873a 100644 Binary files a/docs/games/GriddlyRTS/img/GriddlyRTS-tile-pusher-Sprite2D.png and b/docs/games/GriddlyRTS/img/GriddlyRTS-tile-pusher-Sprite2D.png differ diff --git a/docs/games/Heal_Or_Die/img/Heal_Or_Die-tile-healer-Sprite2D.png b/docs/games/Heal_Or_Die/img/Heal_Or_Die-tile-healer-Sprite2D.png index 19b1c435f..407587998 100644 Binary files a/docs/games/Heal_Or_Die/img/Heal_Or_Die-tile-healer-Sprite2D.png and b/docs/games/Heal_Or_Die/img/Heal_Or_Die-tile-healer-Sprite2D.png differ diff --git a/docs/games/Heal_Or_Die/img/Heal_Or_Die-tile-warrior-Sprite2D.png b/docs/games/Heal_Or_Die/img/Heal_Or_Die-tile-warrior-Sprite2D.png index 4c555b33b..c4a859553 100644 Binary files a/docs/games/Heal_Or_Die/img/Heal_Or_Die-tile-warrior-Sprite2D.png and b/docs/games/Heal_Or_Die/img/Heal_Or_Die-tile-warrior-Sprite2D.png differ diff --git a/docs/games/Kill_The_King/img/Kill_The_King-level-Sprite2D-0.png b/docs/games/Kill_The_King/img/Kill_The_King-level-Sprite2D-0.png index a56f8e9db..f0853f560 100644 Binary files a/docs/games/Kill_The_King/img/Kill_The_King-level-Sprite2D-0.png and b/docs/games/Kill_The_King/img/Kill_The_King-level-Sprite2D-0.png differ diff --git a/docs/games/Kill_The_King/img/Kill_The_King-tile-archer-Sprite2D.png b/docs/games/Kill_The_King/img/Kill_The_King-tile-archer-Sprite2D.png index bde64c8e3..ff200345c 100644 Binary files a/docs/games/Kill_The_King/img/Kill_The_King-tile-archer-Sprite2D.png and b/docs/games/Kill_The_King/img/Kill_The_King-tile-archer-Sprite2D.png differ diff --git a/docs/games/Kill_The_King/img/Kill_The_King-tile-forest-Sprite2D.png b/docs/games/Kill_The_King/img/Kill_The_King-tile-forest-Sprite2D.png index 482908b11..3c956d05d 100644 Binary files a/docs/games/Kill_The_King/img/Kill_The_King-tile-forest-Sprite2D.png and b/docs/games/Kill_The_King/img/Kill_The_King-tile-forest-Sprite2D.png differ diff --git a/docs/games/Kill_The_King/img/Kill_The_King-tile-healer-Sprite2D.png b/docs/games/Kill_The_King/img/Kill_The_King-tile-healer-Sprite2D.png index faa743f3a..407587998 100644 Binary files a/docs/games/Kill_The_King/img/Kill_The_King-tile-healer-Sprite2D.png and b/docs/games/Kill_The_King/img/Kill_The_King-tile-healer-Sprite2D.png differ diff --git a/docs/games/Kill_The_King/img/Kill_The_King-tile-king-Sprite2D.png b/docs/games/Kill_The_King/img/Kill_The_King-tile-king-Sprite2D.png index a6dc009ef..bc99cebfa 100644 Binary files a/docs/games/Kill_The_King/img/Kill_The_King-tile-king-Sprite2D.png and b/docs/games/Kill_The_King/img/Kill_The_King-tile-king-Sprite2D.png differ diff --git a/docs/games/Kill_The_King/img/Kill_The_King-tile-warrior-Sprite2D.png b/docs/games/Kill_The_King/img/Kill_The_King-tile-warrior-Sprite2D.png index 4c555b33b..c4a859553 100644 Binary files a/docs/games/Kill_The_King/img/Kill_The_King-tile-warrior-Sprite2D.png and b/docs/games/Kill_The_King/img/Kill_The_King-tile-warrior-Sprite2D.png differ diff --git a/docs/games/Push_Mania/img/Push_Mania-level-Sprite2D-0.png b/docs/games/Push_Mania/img/Push_Mania-level-Sprite2D-0.png index d7de8f5e6..80c3d03a5 100644 Binary files a/docs/games/Push_Mania/img/Push_Mania-level-Sprite2D-0.png and b/docs/games/Push_Mania/img/Push_Mania-level-Sprite2D-0.png differ diff --git a/docs/games/Push_Mania/img/Push_Mania-level-Sprite2D-1.png b/docs/games/Push_Mania/img/Push_Mania-level-Sprite2D-1.png index 4e92ccf0a..a71521984 100644 Binary files a/docs/games/Push_Mania/img/Push_Mania-level-Sprite2D-1.png and b/docs/games/Push_Mania/img/Push_Mania-level-Sprite2D-1.png differ diff --git a/docs/games/Push_Mania/img/Push_Mania-tile-hole-Sprite2D.png b/docs/games/Push_Mania/img/Push_Mania-tile-hole-Sprite2D.png index d4a7ccc74..607687709 100644 Binary files a/docs/games/Push_Mania/img/Push_Mania-tile-hole-Sprite2D.png and b/docs/games/Push_Mania/img/Push_Mania-tile-hole-Sprite2D.png differ diff --git a/docs/games/Push_Mania/img/Push_Mania-tile-pusher-Sprite2D.png b/docs/games/Push_Mania/img/Push_Mania-tile-pusher-Sprite2D.png index 37d47b533..ac61a81f7 100644 Binary files a/docs/games/Push_Mania/img/Push_Mania-tile-pusher-Sprite2D.png and b/docs/games/Push_Mania/img/Push_Mania-tile-pusher-Sprite2D.png differ diff --git a/docs/getting-started/action spaces/index.rst b/docs/getting-started/action spaces/index.rst new file mode 100644 index 000000000..86ecacae3 --- /dev/null +++ b/docs/getting-started/action spaces/index.rst @@ -0,0 +1,251 @@ +.. _doc_action_spaces: + +############# +Action Spaces +############# + +******** +Overview +******** + +Griddly provides a common interface for action spaces in python which can be access using: + +.. code-block:: python + + env = gym.make('GDY-[your game here]-v0') + + # This contains a description of the action space + env.action_space + +All actions follow the following format: + +.. code-block:: python + + action = [ + + # (Only required if there is no avatar) + x, # X coordinate of action to perform. + y, # Y coordinate of action to perform. + + # (Only required if there is more than one action type) + action_type, # The type of action to perform (move, gather, attack etc...., + + # (Always required) + action_id, # The ID of the action (These are defined by InputMapping in GDY) + ] + + env.step(action) + +All values in this array are integers. + + +:x, y: + These coordinates are required when the environment does not specify that there is an avatar to control. The coordinates chosen become the location of the action that will be performed. + + For example in a game like chess, or checkers, the coordinates would correspond to the piece that the player wants to move. + +:action_type: + The action type refers to the index of the action type as defined in the GDY. For example `move`, `gather`, `push` etc... + + A list of the registered (and correctly ordered for use in actions) types can be found using ``env.gdy.get_action_names()``. + +:action_id: + The action id is commonly used for the "direction" component of the action. The action_id directly corresponds to the ``InputMapping`` of the action. + +.. note:: if no ``InputMapping`` is set for an action, a default of 4 action ids is applied. These action ids resolve to "UP", "DOWN", "LEFT" and "RIGHT" + +.. note:: All action types include action_id `0` which corresponds to a no-op + + +Sampling +======== + +Sampling the action space is the same as any other environment: + +:env.action_space.sample(): + This will always produce the correct format of actions for the environment that is loaded. + + +Sampling Valid Actions +====================== + +In many environment, certain actions may have no effects at all, for example moving an avatar into an immovable object such as a wall. Or attacking a tile that has no objects. + +Griddly provides some helper methods for reducing the action spaces to only sample valid actions and produce masks for calculating valid policies + +:env.game.get_available_actions(player_id): + Returns a dict of locations of objects that can be controlled and the actions that can be used at those locations + +.. warning:: player_id=0 is reserved for NPCs and internal actions + +:env.game.get_available_action_ids(location, action_names): + Returns a dict of available action_ids at the given location for the given action_names. + +ValidActionSpaceWrapper +----------------------- + +In order to easily support games with large action spaces such as RTS games, several helper functions are included a wrapper ``ValidActionSpaceWrapper``. The ``ValidActionSpaceWrapper`` has two functions: + +- Sampling actions using this wrapper only returns valid actions in the environment. +- Two helper functions are available to create action masks which can be applied during neural network training to force the network to choose only valid actions. + +:env.get_unit_location_mask(player_id, mask_type='full'): + Returns a mask of all the locations in the grid which can be selected by a particular player. + + If ``mask_type == 'full'`` then a mask of dimensions (grid_height, grid_width) is returned. This mask can be used in the case where a one-hot representation of the entire grid is used for location selection. + + If ``mask_type == 'reduced'`` then two masks are returned. One for ``grid_height`` and one for ``grid_width``. This mask can be used when two seperate one-hot representations are used for ``x`` and ``y`` selection. + +.. warning:: player_id=0 is reserved for NPCs and internal actions + +:env.get_unit_action_mask(location, action_names, padded=True): + Returns a mask for the ``action_type`` and and ``action_id`` + + If ``padded == True`` all masks will be returned with the length padded to the size of the largest number of action ids across all the actions. + + If ``padded == False`` all masks are returned with the length of the number of action ids per action. + +.. code-block:: python + + env.reset() # Wrapper must be applied after the reset + + env = ValidActionSpaceWrapper(env) + + unit_location_mask = env.get_unit_location_mask(player_id, mask_type='full') + unit_action_mask = env.get_unit_action_mask(location, action_names, padded=True) + + + + +.. seealso:: A Closer Look at Action Masking in Policy Gradient Algorithms: https://arxiv.org/abs/2006.14171 + + + +******** +Examples +******** + +In this section we break down some example action spaces. In all Griddly environments, ``env.action_space.sample()`` can be used to see what valid action spaces look like. + +Here are some explanations of valid actions in different environments are and how to use them. + +Single Player +============= + +Single Action Type +------------------ + +If the environment has a single action type then only the ``action_id`` needs to be sent to ``env.step``. + +This is usually the case in environments where there is an avatar that can only be moved and there are no special actions defined like ``attack`` or ``pick_up``. + +Assuming that our only ``action_type`` in the environment is ``move`` then the following code can be used to move the avatar in a particular direction: + +.. code-block:: python + + # env.step(action_id) + # OR env.step([action_id]) + + env.step(3) # Move the avatar right + env.step(1) # Move the avatar left + + +Multiple Action Types +--------------------- + +In the case where there may be a more complicated action space, for example if there is an avatar that can "move", but also "attack" in any direction around it, the ``action_type`` and ``action_id`` must both be supplied. + +For example: + +.. code-block:: python + + # env.step([action_type, action_id]) + + env.step([0, 3]) # Move the avatar right + env.step([1, 1]) # Attack to the left of the avatar + +Multi-Agent +=========== + +Multiple Player Actions +----------------------- + +In multi-agent environments, ``env.step`` expects a list of actions for all players. To send actions to individual players in a call to ``env.step``, set ``action_id = 0`` for any of the players that are not performing an action. + +for example: + +.. code-block:: python + + env.step([ + 1, # Action for player 1 + 0 # Action for player 2 (which is a no-op) + ]) + + +Single Action Type +------------------ + +If there is only a single action type available, a list of ``action_id`` values can be sent directly to ``env.step`` + +.. code-block:: python + + env.step([ + 1, # Action for player 1 + 2 # Action for player 2 + ]) + +Multiple Action Types +--------------------- + +If there are multiple action types available, ``env.step`` must contain a list of values for each player giving the ``action_type`` and ``action_id``: + +Given that there are two action types "move" and "attack" and each action type has default ``InputMapping``, the following code can be used to send "move left" to player 1 and "attack forward" to player 2. + +.. code-block:: python + + env.step([ + [0, 1], # Action for player 1 (move left) + [1, 2] # Action for player 2 (attack forward) + ]) + + +Real Time Strategy (RTS) +======================== + +Multiple players, Multiple Action Types, Action Coordinates +----------------------------------------------------------- + +In RTS games, multiple actions for multiple players can be performed in single time-steps. + +Lets say our RTS game has units that have an action ``move`` and an action ``gather`` (to gather resources). Leta also say that there are three units for each player. We can control them in one call to ``env.step``. + +.. code-block:: python + + # env.step([ + # [ # List of actions for player 1 + # [x1, y1, action_type1, action_id1], + # [x2, y2, action_type2, action_id2], + # ... + # ], + # [ # List of actions for player 2 + # [x1, y1, action_type1, action_id1], + # [x2, y2, action_type2, action_id2], + # .. + # ], + # ]) + + env.step([ + # Player 1 + [ + [3, 10, 0, 3], # Move the unit at [3,10] right + [4, 7, 1, 1], # The unit at [4,7] will gather resources in front of it + [4, 4, 0, 0] # The unit at [4, 4] will do nothing. (this can also be ommitted with the same effect) + ], + + # Player 2 + [ + [10, 4, 1, 3], # The unit at [10,4] will gather resources to the right + [13, 2, 1, 1] # The unit at [13,2] will gather resources to the left + ] + ]) + diff --git a/docs/getting-started/gdy/index.rst b/docs/getting-started/gdy/index.rst index 2b296625c..230b66b2d 100644 --- a/docs/getting-started/gdy/index.rst +++ b/docs/getting-started/gdy/index.rst @@ -1,7 +1,8 @@ .. _doc_getting_started_gdy: +######################## Griddly Description YAML -======================== +######################## Griddly Description YAML (GDY) is is the description language the Griddly uses to create environments and configure how to control the objects within it. @@ -135,9 +136,9 @@ This is all thats needed to define a game of Sokoban that has two levels which l .. image:: img/getting-started-level-0.png .. image:: img/getting-started-level-1.png - +********** GDY Schema ----------- +********** To make GDY slightly less scary, there is a GDY json schema which can be integrated with most IDEs to help write GDY files. diff --git a/docs/getting-started/gym/img/block2D.png b/docs/getting-started/gym/img/block2D.png deleted file mode 100644 index b6edff82e..000000000 Binary files a/docs/getting-started/gym/img/block2D.png and /dev/null differ diff --git a/docs/getting-started/gym/img/sprite2D.png b/docs/getting-started/gym/img/sprite2D.png deleted file mode 100644 index 378e118f8..000000000 Binary files a/docs/getting-started/gym/img/sprite2D.png and /dev/null differ diff --git a/docs/getting-started/gym/index.rst b/docs/getting-started/gym/index.rst index b2c5420f0..712ea6a05 100644 --- a/docs/getting-started/gym/index.rst +++ b/docs/getting-started/gym/index.rst @@ -1,7 +1,8 @@ .. _doc_getting_started_gym_advanced: +#################### OpenAI Gym Interface -==================== +#################### Games defined with :ref:`GDY ` files can easily be wrapped by OpenAI's gym interface. @@ -17,11 +18,25 @@ The simplest way to use a pre-made environment is to just use the following code env = gym.make('GDY-Sokoban-v0') env.reset() -.. note:: More examples and a full listing of all the games can be found on the page for each game in :ref:`Games ` +The ``gym.make`` command can also send several Griddly specific parameters to the environment: +.. code-block:: python + + env = gym.make( + 'GDY-Sokoban-v0' + player_observer_type=gd.ObserverType.VECTOR, + global_observer_type=gd.ObserverType.VECTOR, + level=0, + max_steps=None, + ) + +.. seealso:: More examples and a full listing of all the games can be found on the page for each game in :ref:`Games ` -To use the Advanced OpenAI Gym wrapper there's a slightly longer route to set up all the options: +******************************** +Registering Your Own Environment +******************************** +It's easy to register your own GDY files and wrap them with OpenAI gym using the ``GymWrapperFactory``: .. code-block:: python @@ -31,101 +46,29 @@ To use the Advanced OpenAI Gym wrapper there's a slightly longer route to set up if __name__ == '__main__': wrapper = GymWrapperFactory() - wrapper.build_gym_from_yaml( - 'Sokoban-Adv', - 'Single-Player/GVGAI/sokoban.yaml', - player_observer_type=gd.ObserverType.SPRITE_2D, - level=2 - ) + wrapper.build_gym_from_yaml('MyNewEnvironment', 'my_new_env_gdy.yaml') - env = gym.make('GDY-Sokoban-Adv-v0') + env = gym.make('GDY-MyNewEnvironment-v0') env.reset() ------------- - -Lets break this down a bit: - -.. code-block:: python - - wrapper = GymWrapperFactory() - -The wrapper factory is a utility provided by griddly that wraps the native Griddly environment in an OpenAI gym interface. This is always required to create gym environments. - ------------- - -.. code-block:: python - - wrapper.build_gym_from_yaml( - 'Sokoban-Adv', - 'Single-Player/GVGAI/sokoban.yaml', - player_observer_type=gd.ObserverType.SPRITE_2D, - level=2 - ) - -This command builds and loads the gym environment with the information supplied. This particular command will load the mechanics of the sokoban environment and the second level defined in the GDY file. -States will be generated by the SPRITE_2D observer. - ------------- - -.. code-block:: python - - env = gym.make(f'GDY-Sokoban-Adv-v0') - env.reset() - -This is the familiar gym make command that create the env reference. This can now be used as you would use any other gym environment. - - +************** Observer Types ---------------- - -In Griddly when generating an environment you can specify how you want the environment to be rendered. You can do this by telling the ``build_gym_from_yaml`` function which observer type to use. - -There are three supported observer types in Griddly: - -.. note:: for SPRITE_2D and BLOCK_2D observers, each object must define how it should be rendered with an :ref:`Observer Configuration <#/properties/Objects/items/properties/Observers>` - -gd.ObserverType.SPRITE_2D -^^^^^^^^^^^^^^^^^^^^^^^^^ - -Renders sprites defined by the GDY object `Sprite2D` descriptions. - -.. image:: img/sprite2D.png - -gd.ObserverType.BLOCK_2D -^^^^^^^^^^^^^^^^^^^^^^^^^ +************** -Renders sprites defined by the GDY object `Block2D` descriptions. - -If the `player_observer_type` argument is set to `player_observer_type=gd.ObserverType.BLOCK_2D` you should see this: - -.. image:: img/block2D.png - - -gd.ObserverType.VECTOR -^^^^^^^^^^^^^^^^^^^^^^ - -The vector observer is the fastest observer type in Griddly. It can reach tens of thousands of states per second. - -Vector observers will return a tensor of shape [*objects*, *width*, *height*] where each value is either 0 or 1 denoting that there is an object of that type in a particular location. +When generating an environment you can specify how you want the environment to be rendered. You can do this by setting the ``player_observer_type`` and ``global_observer_type`` parameters in the ``gym.make`` function, or the ``build_gym_from_yaml`` function. +.. seealso:: For more information about observation spaces, states and event history see :ref:`Observation Spaces ` +******************* The Global Observer -------------------- +******************* The global observer can be used alongside any of the other observers and will always render the entire environment regardless of how other observers are defined. This means that you can pass vector observations to your agents and then render with sprites or blocks to make awesome demos! .. code-block:: python - wrapper.build_gym_from_yaml( - 'Sokoban-Adv', - 'Single-Player/GVGAI/sokoban.yaml', - player_observer_type=gd.ObserverType.VECTOR, - global_observer_type=gd.ObserverType.SPRITE_2D, - level=2 - ) - - env = gym.make(f'GDY-Sokoban-Adv-v0') + env = gym.make(f'GDY-Sokoban-Adv-v0', global_observer_type=gd.ObserverType.SPRITE_2D) env.reset() env.render(observer='global') \ No newline at end of file diff --git a/docs/getting-started/installation/index.rst b/docs/getting-started/installation/index.rst index 7787e8eae..c8d53ed69 100644 --- a/docs/getting-started/installation/index.rst +++ b/docs/getting-started/installation/index.rst @@ -1,10 +1,12 @@ .. _doc_getting_started_installation: +############ Installation -============ +############ +****** Python ------- +****** Griddly supports versions of python 3.6+. @@ -24,17 +26,17 @@ To create a conda environment with Griddly installed: conda activate griddly pip install griddly - +************* Prerequisites -------------- +************* Griddly uses `Vulkan `_ to render environments. Most modern hardware will support vulkan and the required libraries should be pre-installed on your system. If you are using docker, you can find `images with vulkan `_ pre-installed which may be helpful. - +*************** Other Languages ---------------- +*************** There is no support currently for languages other than python. A java version may be supported in the future. diff --git a/docs/getting-started/observation spaces/img/GriddlyPO.png b/docs/getting-started/observation spaces/img/GriddlyPO.png new file mode 100644 index 000000000..0012c1622 Binary files /dev/null and b/docs/getting-started/observation spaces/img/GriddlyPO.png differ diff --git a/docs/getting-started/observation spaces/img/Spiders-level-Block2D-2.png b/docs/getting-started/observation spaces/img/Spiders-level-Block2D-2.png new file mode 100644 index 000000000..1d46118d2 Binary files /dev/null and b/docs/getting-started/observation spaces/img/Spiders-level-Block2D-2.png differ diff --git a/docs/getting-started/observation spaces/img/Spiders-level-Isometric-2.png b/docs/getting-started/observation spaces/img/Spiders-level-Isometric-2.png new file mode 100644 index 000000000..d92b04773 Binary files /dev/null and b/docs/getting-started/observation spaces/img/Spiders-level-Isometric-2.png differ diff --git a/docs/getting-started/observation spaces/img/Spiders-level-Sprite2D-2.png b/docs/getting-started/observation spaces/img/Spiders-level-Sprite2D-2.png new file mode 100644 index 000000000..bb2661e34 Binary files /dev/null and b/docs/getting-started/observation spaces/img/Spiders-level-Sprite2D-2.png differ diff --git a/docs/getting-started/observation spaces/index.rst b/docs/getting-started/observation spaces/index.rst new file mode 100644 index 000000000..349bf9a7a --- /dev/null +++ b/docs/getting-started/observation spaces/index.rst @@ -0,0 +1,252 @@ +.. _doc_observation_spaces: + +################## +Observation Spaces +################## + +******** +Overview +******** + +Observation spaces in Griddly are highly configurable. In addition to providing pixel-based and vector-based states of environments, Griddly also provides methods of accessing semantic information about the game state itself, such as state data and event history. For pixel and vector-based representations Griddly provides different **observers**. + +What is an **Observer**? +======================== + +An **observer** in Griddly converts the state of an environment to something that is consumable by a machine learning algorithm. It effectively `creates` the observations. In Griddly there are many different options for creating observations. This page will outline how all of these methods can be used. + +Every environment in Griddly has at least two configurable observers; the **player** observer(s) and the **global** observer. + +.. figure:: img/GriddlyPO.png + :align: center + + The "Spider Nest" environment with an isometric player observer (inset) and isometric global observer configured. + +The **player** observer(s) are what the agent in the environment `sees`. The agent might be able to see only a small area around itself (this is known as egocentric partial observability), or it might be able to see the entire environment. + +The **global** observer always gives a view of the entire environment regardless of the number of players and and size of the environment. + +Both observers have a standard OpenAI Gym observation space which can be accessed with ``env.observation_space`` describing the player observation space and ``env.global_observation_space`` describing the global observation space. + + +Single Agent Observations +========================= + +The observations for environments where a single avatar is being controlled are returned in the following way: + +.. code-block:: python + + obs = env.reset() + + # obs = np.array([ ... ]) # Player observation + + obs, reward, done, info = env.step( ... ) + + # obs = np.array([ ... ]) # Player observation + +Multi-Agent Observations +======================== + +When there are multiple agents in an environment the ``env.reset(...)`` and ``env.step(...)`` functions will return the observations of all the of agents as an array of ``np.array`` objects. Each observation in the array will be consistent with the shape of ``env.observation_shape``. + +As an example in an environment with 2 players, the result of ``env.reset(...)`` and ``env.step(...)`` will be: + +.. code-block:: python + + obs = env.reset() + + # obs = [ + # np.array([ ... ]), # Player 1 observation + # np.array([ ... ]) # Player 2 observation + # ] + + obs, reward, done, info = env.step([ ... ]) + + # obs = [ + # np.array([ ... ]), # Player 1 observation + # np.array([ ... ]) # Player 2 observation + # ] + +The global observer can also be returned in the ``env.reset()`` function by setting the ``global_observations`` parameter to ``True``. In this case a dictionary is returned with the ``global`` and ``player`` keys for both observation types. + +.. code-block:: python + + obs = env.reset(global_observations=True) + + # obs = { + # 'global': np.array([ ... ]), # Global observation + # 'player': [ + # np.array([ ... ]), # Player 1 observation + # np.array([ ... ]) # Player 2 observation + # ] + # } + +****** +Pixels +****** + +.. note:: For pixel-based observers, each object must define how it should be rendered with an :ref:`Observer Configuration <#/properties/Objects/items/properties/Observers>` + +The observer type can be chosen when creating the environment with ``gym.make()`` + +.. code-block:: python + + env = gym.make( + 'GDY-MyEnvironmentName-v0', + player_observer_type=XXX, + global_observer_type=YYY + ... + ) + +The options for both the ``player_observer_type`` and ``global_observer_type`` are given below. + +:gd.ObserverType.BLOCK_2D: + Renders sprites defined by the GDY object `Block2D` descriptions. + +.. image:: img/Spiders-level-Block2D-2.png + +:gd.ObserverType.SPRITE_2D: + Renders sprites defined by the GDY object `Sprite2D` descriptions. + +.. image:: img/Spiders-level-Sprite2D-2.png + + +:gd.ObserverType.ISOMETRIC: + Renders sprites defined by the GDY object `Isometric` descriptions. + +.. image:: img/Spiders-level-Isometric-2.png + +****** +Vector +****** + +Vector observers will return a tensor of shape [*objects*, *width*, *height*] where each value is either 0 or 1 denoting that there is an object of that type in a particular location. + +The order of the object index in each [x,y] location can be retrieved by calling ``env.gdy.get_object_names()``. + +As an example in an 5x5 environment that has three types of object: `avatar`, `wall` and `goal`: + +.. code-block:: python + + obs_shape = env.observation_space.shape + + # obs_shape == (3,5,5) + + obs, reward, done, info = env.step( ... ) + + # obs = [ + [ # avatar in these locations + [0,0,0,0,0], + [0,1,0,0,0], + [0,0,0,0,0], + [0,0,0,0,0], + [0,0,0,0,0] + ], + [ # wall in these locations + [1,1,1,1,1], + [1,0,0,0,1], + [1,0,0,0,1], + [1,0,0,0,1], + [1,1,1,1,1] + ], + [ # goal in these locations + [0,0,0,0,0], + [0,0,0,0,0], + [0,0,0,0,0], + [0,0,0,1,0], + [0,0,0,0,0] + ] + ] + + +************** +Semantic State +************** + +A breakdown of the entire environment including internal variable values that the objects may have can be recovered using ``env.get_state()``. + +:env.get_state(): + This function will return data in the following format: + + .. code-block:: javascript + + { + 'GameTicks': 1, + 'GlobalVariables': {}, + 'Objects': [ + { + 'Name': 'avatar', + 'Location': [1, 3], + 'Orientation': 'NONE', + 'PlayerId': 1, + 'Variables': { + '_y': 3, + '_playerId': 1, + '_x': 1 + } + }, + { + 'Name': 'goal', + 'Location': [1, 3], + 'Orientation': 'NONE', + 'PlayerId': 1, + 'Variables': { + '_y': 3, + '_playerId': 1, + '_x': 1 + } + }, + { + + ... + + } + ] + } + + +************* +Event History +************* + +Event history in Griddly contains all the information about actions that have been made by the player, any internal actions that have been executed by any game mechanics, or any delayed actions that have been performed. + +Event history is gathered during ``env.step()`` and returned in the ``info['History']`` variable, but it must be enabled first. + +To enable event history, ``env.enable_history(True)`` can be called after ``gym.make()`` + +The format of event history looks like this: + +.. code-block:: javascript + + [ + { + 'PlayerId': 1, + 'ActionName': 'move', + 'Tick': 0, + 'Reward': 0, + 'Delay': 0, + 'SourceObjectName': 'avatar', + 'DestinationObjectName': '_empty', + 'SourceObjectPlayerId': 1, + 'DestinationObjectPlayerId': 0, + 'SourceLocation': [2.0, 3.0], + 'DestinationLocation': [1.0, 3.0] + }, + { + 'PlayerId': 1, + 'ActionName': 'move', + 'Tick': 0, + 'Reward': 0, + 'Delay': 0, + 'SourceObjectName': 'ball', + 'DestinationObjectName': '_empty', + 'SourceObjectPlayerId': 1, + 'DestinationObjectPlayerId': 0, + 'SourceLocation': [1.0, 3.0], + 'DestinationLocation': [0.0, 3.0] + }, + ... + ] + + diff --git a/docs/getting-started/visualization/img/griddly_rts_global.png b/docs/getting-started/visualization/img/griddly_rts_global.png new file mode 100644 index 000000000..d33f360f9 Binary files /dev/null and b/docs/getting-started/visualization/img/griddly_rts_global.png differ diff --git a/docs/getting-started/visualization/img/griddly_rts_p1.png b/docs/getting-started/visualization/img/griddly_rts_p1.png new file mode 100644 index 000000000..06b56f296 Binary files /dev/null and b/docs/getting-started/visualization/img/griddly_rts_p1.png differ diff --git a/docs/getting-started/visualization/img/griddly_rts_p2.png b/docs/getting-started/visualization/img/griddly_rts_p2.png new file mode 100644 index 000000000..66aec9e1f Binary files /dev/null and b/docs/getting-started/visualization/img/griddly_rts_p2.png differ diff --git a/docs/getting-started/visualization/index.rst b/docs/getting-started/visualization/index.rst new file mode 100644 index 000000000..6a2aee944 --- /dev/null +++ b/docs/getting-started/visualization/index.rst @@ -0,0 +1,100 @@ +.. _doc_visualization: + +############# +Visualization +############# + +To make it easy for you to create high quality and interesting demonstrations of AIs, or be able to analyse behaviour of trained agents, Griddly provides many tools to make this easy. + + +************** +Live Rendering +************** + +OpenAI gym allows gym environments to be rendered using the ``env.render()`` function. However in many Griddly environments you may have a choice between different players and global observations to render. + +Rendering Different Observers +============================= + +To render global observations you simply need to add the parameter ``observer='global'`` to your render function. Additionally if you want to render a particular player you can use ``observer=P`` where P is the 0-indexed player id. + +.. code-block:: python + + env.render(observer='global') # Will render the global observer + + env.render(observer=0) # Will render the observer for player 1 + + env.render(observer=1) # Will render the observer for player 2 + + +Player Highlighting +------------------- + +In games with multiple players, Griddly will highlight all the objects in the environment based on which observer is being used. + +- The global observer will highlight player owned objects with colours which are automatically picked +- The player observer will highlight objects belonging to themselves in green + +.. list-table:: Observer Highlighting + :header-rows: 1 + + * - Global + - Player 1 + - Player 2 + * - .. thumbnail:: img/griddly_rts_global.png + - .. thumbnail:: img/griddly_rts_p1.png + - .. thumbnail:: img/griddly_rts_p2.png + + + +************* +Saving Images +************* + +Griddly includes a tool ``RenderToFile`` that can directly save observations to disk as png files. + +.. code-block:: python + + from griddly.RenderTools import RenderToFile + + render_to_file = RenderToFile() + + ... + + visualization = env.render(observer=.., mode='rgb_array') # Get the observation as an array + + render_to_file.render(visualization, 'my_observation.png') # save the image to disk + +************* +Saving Videos +************* + +Also saving videos with Griddly is simple using the ``VideoRecorder``. + +.. note:: you will need to make sure ffmpeg is installed in your system. + +.. code-block:: python + + from griddly.RenderTools import VideoRecorder + + video_recorder = VideoRecorder() + + ... + + # Start the video recording + observation = env.reset() + video_recorder.start("video_test.mp4", env.observation_space.shape) + + ... + + # Step the environment and record the next frame + obs, reward, done, env = env.step( ... ) + video_recorder.add_frame(obs) + + ... + + # Clean up + video_recorder.close() + + + diff --git a/docs/index.rst b/docs/index.rst index d02ee3775..0dc655aed 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -29,6 +29,8 @@ Griddly documentation. getting-started/installation/index getting-started/gym/index getting-started/gdy/index + getting-started/action spaces/index + getting-started/observation spaces/index .. toctree:: :maxdepth: 2 @@ -37,13 +39,6 @@ Griddly documentation. games/index -.. toctree:: - :maxdepth: 2 - :caption: Griddly Description YAML - :name: sec-gdy - - GDY/index - .. toctree:: :maxdepth: 10 :caption: Tutorials diff --git a/docs/modules/java/java.rst b/docs/modules/java/java.rst deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/modules/python/python.rst b/docs/modules/python/python.rst deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/tutorials/GDY Schema/index.rst b/docs/tutorials/GDY Schema/index.rst index 981ebfa35..d4aa6e0e8 100644 --- a/docs/tutorials/GDY Schema/index.rst +++ b/docs/tutorials/GDY Schema/index.rst @@ -1,7 +1,8 @@ .. _doc_tutorials_gdy_schema: +################### GDY Schema Tutorial -=================== +################### This tutorial will show you how to set up your IDE to help write GDY files. @@ -10,8 +11,9 @@ The GDY Schema defines all the required and optional properties that can be used You can either download the `schema `_ locally or reference it from the github repository +****************** Visual Studio Code ------------------- +****************** * Install the ``redhat.vscode-yaml`` extension from the visual studio code marketplace. @@ -37,8 +39,9 @@ This extension allows json schema files to be used to add syntax support for yam } } +******* PyCharm -------- +******* PyCharm has a feature for YAML validation with JSON schema files built in. diff --git a/docs/tutorials/GDY/actions.rst b/docs/tutorials/GDY/actions.rst index d45cd47c9..a4cde633c 100644 --- a/docs/tutorials/GDY/actions.rst +++ b/docs/tutorials/GDY/actions.rst @@ -1,7 +1,8 @@ .. _doc_tutorials_gdy_actions: +####### Actions -======= +####### Actions are the "mechanics" of any Griddly game. @@ -12,8 +13,9 @@ The **destination** of an action is the object that is affected by the action. Lets look at a few examples to make these ideas more concrete! +***************** Step 1 - Movement --------------------- +***************** We are building the game "Sokoban" so we will first define that our ``avatar`` object which we defined before can move around in empty space. To do that we can define our "move" action as follows: @@ -38,9 +40,9 @@ Finally we have ``Commands`` object in the ``Src`` key. The ``Commands`` object For more information about possible commands that can be run on either the ``Src`` or ``Dst`` objects go `here <#/properties/Actions/items/properties/Behaviours/definitions/behaviourDefinitionCommandList>`_ - +********************** Step 2 - Pushing boxes -------------------------- +********************** To define that we want ``box`` objects to move when the ``avatar`` object moves into them we can add the following code to our ``Behaviours`` list: @@ -70,8 +72,9 @@ The second behaviour we define allows the ``avatar`` object to interact with the .. note:: We have only allowed the ``box`` object to "move" into empty space. If the ``_dest`` location is not empty, i.e. it contains a ``wall`` object or a ``hole`` object, the command will not be executed. This will stop the previous action also, stopping the ``avatar`` from moving also. +********************************** Step 3 - Pushing a box into a hole -------------------------------------- +********************************** Now we can push boxes around in empty space, but we have no defined what will happen if we push the ``box`` into a ``hole`` object. We want to reward the player and also remove the ``box`` object. @@ -89,8 +92,9 @@ Now we can push boxes around in empty space, but we have no defined what will ha Most of this behaviour is hopefully self-explanatory by this point. There are two new commands introduced here ``reward: 1`` which gives the player a reward of value 1 and ``remove: true`` which removes the `Src` object. +*********************** Putting It All Together ------------------------ +*********************** In order to put all these actions together, there is only one minor change to make to the first behaviour. We need to add ``hole`` to the ``Dst`` objects. This adds the ability for avatars to be able to walk on top of ``hole`` objects! diff --git a/docs/tutorials/GDY/environment.rst b/docs/tutorials/GDY/environment.rst index e5acdf726..38c103cd2 100644 --- a/docs/tutorials/GDY/environment.rst +++ b/docs/tutorials/GDY/environment.rst @@ -1,12 +1,14 @@ .. _doc_tutorials_gdy_environment: +########### Environment -=========== +########### The Environment object contains the definition of how the game will be controlled by the player, the conditions underwhich the game ends, and the levels that are contained in the game. +***************************** Step 1 - Player Configuration ------------------------------ +***************************** The ``Player`` configuration @@ -16,9 +18,9 @@ The ``Player`` configuration AvatarObject: avatar - +******************************* Step 2 - Termination Conditions -------------------------------- +******************************* Termination conditions are the rules which decide when the game episodes are complete, and whether the agent wins or loses. @@ -32,8 +34,9 @@ Rules can be defined seperately for ``Win`` and ``Lose``. In Sokoban, we just ha We define here that the agent wins in the case that the number of ``box`` objects in the environment reaches 0. The ``:count`` option can be appended to any object name to return the number of any of the objects. +*************** Step 3 - Levels ---------------- +*************** In our Sokoban game we are going to define two levels. The layout of each level is defined by a level string which is made up of the ``MapCharacter`` characters that were defined in the :ref:`Objects ` section of this tutorial. @@ -66,9 +69,9 @@ the two defined levels will look like this when rendered: .. image:: img/getting-started-level-1.png - +******************************************** Step 4 - Name, TileSize and Background Image --------------------------------------------- +******************************************** -- code-block:: YAML @@ -80,8 +83,9 @@ Here we specify the size of the tiles in pixels ``TileSize``. Also if we want to Additionally every environment must have a unique ``Name`` to differentiate it from other environments. +*********************** Putting it all together ------------------------ +*********************** The environment definition with all the parts described looks like this: diff --git a/docs/tutorials/GDY/gym.rst b/docs/tutorials/GDY/gym.rst index cb9ab7113..04f4d12d9 100644 --- a/docs/tutorials/GDY/gym.rst +++ b/docs/tutorials/GDY/gym.rst @@ -1,13 +1,14 @@ .. _doc_tutorials_gdy_gym: +##################### Playing Griddly Games -===================== +##################### In this short tutorial you will learn how to load a GDY file, convert it to an OpenAI Gym interface and then use the OpenAI Gym interface to play the game with the w,a,s,d keys on your keyboard. - +**************** Step 1 - Imports ----------------- +**************** To play games with the keyboard using the gym interface, the ``play`` function can be used to wrap a gym environment @@ -20,9 +21,9 @@ The only griddly import thats required is the ``GymWrapperFactory``, this is use from griddly import GymWrapperFactory - +********************* Step 2 - Load the GDY ---------------------- +********************* the ``build_gym_from_yaml`` builds the Griddly environment from the GDY file and loads a particular level. @@ -35,8 +36,9 @@ This can then be loaded by OpenAI gym's ``make`` command. The name of the enviro wrapper.build_gym_from_yaml('SokobanTutorial', 'sokoban.yaml', level=0) +************* Step 3 - Play -------------- +************* All thats left is to play the game! diff --git a/docs/tutorials/GDY/index.rst b/docs/tutorials/GDY/index.rst index 9b0444901..45e37350e 100644 --- a/docs/tutorials/GDY/index.rst +++ b/docs/tutorials/GDY/index.rst @@ -1,7 +1,8 @@ .. _doc_tutorials_gdy: +############################# GDY Tutorial - Making Sokoban -============================= +############################# .. note:: It's recommended to follow the short :ref:`GDY Schema tutorial ` which will help validate any issues with GDY files as they are being created. diff --git a/docs/tutorials/GDY/objects.rst b/docs/tutorials/GDY/objects.rst index 0369009d6..ebf88123f 100644 --- a/docs/tutorials/GDY/objects.rst +++ b/docs/tutorials/GDY/objects.rst @@ -1,7 +1,8 @@ .. _doc_tutorials_gdy_objects: +####### Objects -======= +####### In this section we define the objects in the game and what they will look like when being rendered. @@ -9,8 +10,9 @@ In the game Sokoban we control a player ``avatar`` which moves around and pushes This means we have 4 objects in our simple Sokoban game. So we need to define them! +********************** Step 1 - Avatar object ----------------------- +********************** Each object we have to give a unique name to. That unique name can then be used to reference that object in other parts of the GDY configuration. In for the avatar object we will call it ``avatar`` unsurprisingly. @@ -36,8 +38,9 @@ We're going to use this image: The ``Observers`` block defines how each observer type will render this particular object. We are defining a ``Sprite2D`` observer here so we need to supply an image to it. +********************* Step 2 - Wall Objects ---------------------- +********************* Wall objects are slightly more complicated because when they are rendered they actually use 15 different images, for example corner peices, T-peices etc.... @@ -89,8 +92,9 @@ Here we define the ``Name`` and the ``MapCharacter`` like we did when the avatar In the ``Sprite2D`` Object there is now a ``TilingMode`` object which can either be ``WALL_2``, or ``WALL_16``. These tiling modes use 2 or 16 images respectively to render the walls in the game environment. The order of the walls is important to render the walls correctly. +************************ Step 3 - Boxes and holes ------------------------- +************************ Boxes and holes are very similar to ``avatar`` objects. The only difference is that ``hole`` objects have a different ``Z`` value which allows the ``avatar`` object to move on top of them. @@ -118,9 +122,9 @@ Boxes and holes are very similar to ``avatar`` objects. The only difference is t Sprite2D: Image: images/gvgai/oryx/cspell4.png - +*********************** Putting it all together ------------------------ +*********************** Thats it! We've defined our objects and some properties about how they will look in the game. diff --git a/python/benchmarks/MicrortsMining.yaml b/python/benchmarks/MicrortsMining.yaml index 87940a5f6..4616295f4 100644 --- a/python/benchmarks/MicrortsMining.yaml +++ b/python/benchmarks/MicrortsMining.yaml @@ -30,20 +30,6 @@ Environment: M . . . . . . . . . . . . . . . - - - - - - - - - - - - - - Actions: - Name: gather Behaviours: diff --git a/python/benchmarks/benchmark.py b/python/benchmarks/benchmark.py index 43e2545d5..e10893f30 100755 --- a/python/benchmarks/benchmark.py +++ b/python/benchmarks/benchmark.py @@ -6,6 +6,8 @@ import psutil from gym import register +from griddly.util.wrappers import InvalidMaskingRTSWrapper + process = psutil.Process(os.getpid()) @@ -13,7 +15,7 @@ def griddly(gdy_file): from griddly import GymWrapperFactory, gd wrapper = GymWrapperFactory() - wrapper.build_gym_from_yaml("Griddly", gdy_file, player_observer_type=gd.ObserverType.SPRITE_2D, + wrapper.build_gym_from_yaml("Griddly", gdy_file, player_observer_type=gd.ObserverType.VECTOR, level=0) return gym.make(f'GDY-Griddly-v0') @@ -40,20 +42,21 @@ def gvgai_test_old(): if __name__ == '__main__': - #env = griddly('dmlab_pushbox.yaml') + # env = griddly('dmlab_pushbox.yaml') # env = minigrid_test() # env = griddly('Single-Player/Mini-Grid/minigrid-eyeball.yaml') # env = micro_rts_test() - # env = griddly('MicrortsMining.yaml') + env = griddly('MicrortsMining.yaml') # memory usage recorded in these tests is inaccurate because the GVGAI environment is in a different process # env = gvgai_test() # env = gvgai_test_old() - env = griddly('Single-Player/GVGAI/sokoban.yaml') + # env = griddly('Single-Player/GVGAI/sokoban.yaml') env.reset() + #env = InvalidMaskingRTSWrapper(env) start = timer() frames = 0 @@ -61,7 +64,7 @@ def gvgai_test_old(): fps_samples = [] mem_samples = [] - for s in range(10000): + for s in range(100000): frames += 1 obs, reward, done, info = env.step(env.action_space.sample()) @@ -69,6 +72,8 @@ def gvgai_test_old(): # env.render() # rendered_obs = env.render(mode='rgb_array') + #state = env.get_state() + if frames % 1000 == 0: end = timer() fps = (frames / (end - start)) diff --git a/python/docs/sphinxdocs/game_docs.py b/python/docs/sphinxdocs/game_docs.py index d1b0426fd..c56cf2ec9 100644 --- a/python/docs/sphinxdocs/game_docs.py +++ b/python/docs/sphinxdocs/game_docs.py @@ -1,3 +1,4 @@ +import argparse import logging import os import textwrap @@ -142,7 +143,6 @@ def _generate_code_example(self, game_breakdown): env.render(observer='global') # Renders the entire environment """ - code_example_sphinx += 'The most basic way to create a Griddly Gym Environment. ' \ 'Defaults to level 0 and SPRITE_2D rendering.\n\n' code_example_sphinx += f'.. code-block:: python\n\n{textwrap.indent(basic_code_example, " ")}\n\n' @@ -343,7 +343,7 @@ def generate(self): # Save the doc images for doc_image_filename, np_doc_image in doc_images.items(): self._logger.debug(f'Writing image {doc_image_filename}') - renderer.render(np_doc_image, doc_sphinx_root.joinpath(doc_image_filename)) + renderer.render(np_doc_image.swapaxes(0, 2), doc_sphinx_root.joinpath(doc_image_filename)) relative_doc_path = game_doc_root.joinpath('index.rst') @@ -366,7 +366,7 @@ def generate(self): sphinx_string += f' - {taster_sphinx}' # Save the taster images for taster_image_filename, np_taster_image in taster_images.items(): - renderer.render(np_taster_image, self._docs_root.joinpath(taster_image_filename)) + renderer.render(np_taster_image.swapaxes(0, 2), self._docs_root.joinpath(taster_image_filename)) remaining_cols = 2 - col_index @@ -378,7 +378,14 @@ def generate(self): with open(self._docs_root.joinpath('index.rst'), 'w') as f: f.write(sphinx_string) + if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Automatically generate documentation for games.') + parser.add_argument('--filename-suffix', default='.yaml', help='Suffix for filenames to create documentation') + + args = parser.parse_args() + + filename_suffix = args.filename_suffix games_path = Path('../../../resources/games') docs_root = Path('../../../docs/games') @@ -392,7 +399,7 @@ def generate(self): print(f'Directories: {gdy_subdirectory}') for filename in filenames: - if filename.endswith('.yaml'): + if filename.endswith(filename_suffix): category = gdy_subdirectory.parts[0] gdy_file = directory_path.joinpath(filename).resolve() relative_gdy_path = gdy_file.relative_to(games_path.resolve()) diff --git a/python/examples/griddlyrts/griddly_rts_global.png b/python/examples/griddlyrts/griddly_rts_global.png new file mode 100644 index 000000000..92dffddaf Binary files /dev/null and b/python/examples/griddlyrts/griddly_rts_global.png differ diff --git a/python/examples/griddlyrts/griddly_rts_p1.png b/python/examples/griddlyrts/griddly_rts_p1.png new file mode 100644 index 000000000..92dffddaf Binary files /dev/null and b/python/examples/griddlyrts/griddly_rts_p1.png differ diff --git a/python/examples/griddlyrts/griddly_rts_p2.png b/python/examples/griddlyrts/griddly_rts_p2.png new file mode 100644 index 000000000..92dffddaf Binary files /dev/null and b/python/examples/griddlyrts/griddly_rts_p2.png differ diff --git a/python/examples/griddlyrts/play_griddlyrts_gym.py b/python/examples/griddlyrts/play_griddlyrts_gym.py index 62a1f891f..06cb7344c 100644 --- a/python/examples/griddlyrts/play_griddlyrts_gym.py +++ b/python/examples/griddlyrts/play_griddlyrts_gym.py @@ -3,16 +3,16 @@ import gym from griddly import GymWrapperFactory, gd -from griddly.RenderTools import VideoRecorder -from griddly.util.wrappers import InvalidMaskingRTSWrapper +from griddly.RenderTools import VideoRecorder, RenderToFile +from griddly.util.wrappers import ValidActionSpaceWrapper if __name__ == '__main__': wrapper = GymWrapperFactory() wrapper.build_gym_from_yaml("GriddlyRTS-Adv", - 'RTS/Stratega/kill-the-king.yaml', - global_observer_type=gd.ObserverType.SPRITE_2D, - player_observer_type=gd.ObserverType.ISOMETRIC, + 'RTS/GriddlyRTS.yaml', + global_observer_type=gd.ObserverType.VECTOR, + player_observer_type=gd.ObserverType.VECTOR, level=0) env_original = gym.make(f'GDY-GriddlyRTS-Adv-v0') @@ -20,7 +20,7 @@ env_original.reset() - env = InvalidMaskingRTSWrapper(env_original) + env = ValidActionSpaceWrapper(env_original) start = timer() @@ -28,17 +28,22 @@ fps_samples = [] + image_output = RenderToFile() + player1_recorder = VideoRecorder() player1_visualization = env.render(observer=0, mode='rgb_array') player1_recorder.start("player1_video_test.mp4", player1_visualization.shape) + image_output.render(player1_visualization, 'griddly_rts_p1.png') player2_recorder = VideoRecorder() player2_visualization = env.render(observer=1, mode='rgb_array') player2_recorder.start("player2_video_test.mp4", player2_visualization.shape) + image_output.render(player2_visualization, 'griddly_rts_p2.png') global_recorder = VideoRecorder() global_visualization = env.render(observer='global', mode='rgb_array') global_recorder.start("global_video_test.mp4", global_visualization.shape) + image_output.render(global_visualization, 'griddly_rts_global.png') for s in range(10000): @@ -46,22 +51,27 @@ action = env.action_space.sample() - # player_1_mask = env.get_unit_location_mask(0, mask_type='reduced') - # player_2_mask = env.get_unit_location_mask(1) - # - # action_masks = env.get_unit_action_mask([6, 3], ['gather', 'move'], padded=False) + player_1_mask = env.get_unit_location_mask(1) - env.render(observer='global') - env.render(observer=0) - env.render(observer=1) + valid_locations = np.where(player_1_mask == 1) - obs, reward, done, info = env.step(action) + # get masks for all move functions + for x, y in zip(valid_locations[0], valid_locations[1]): + action_names = env.gdy.get_action_names() + action_mask = env.get_unit_action_mask([x,y], action_names) + print(action_mask) + + action_masks = env.get_unit_action_mask([6, 3], ['gather', 'move'], padded=False) - global_observation = env.render(mode='rgb_array', observer='global') + global_obs = env.render(observer='global') + p1_obs = env.render(observer=0) + p2_obs = env.render(observer=1) + + obs, reward, done, info = env.step(action) - global_recorder.add_frame(global_observation) - player1_recorder.add_frame(obs[0].swapaxes(0, 2)) - player2_recorder.add_frame(obs[1].swapaxes(0, 2)) + global_recorder.add_frame(global_obs) + player1_recorder.add_frame(p1_obs) + player2_recorder.add_frame(p2_obs) if done: state = env.get_state() diff --git a/python/examples/nmmo.yaml b/python/examples/nmmo.yaml new file mode 100644 index 000000000..26868604c --- /dev/null +++ b/python/examples/nmmo.yaml @@ -0,0 +1,282 @@ +Actions: +- Behaviours: + - Dst: + Object: gnome + Src: + Commands: + - gt: + Arguments: + - hunger + - 0 + Commands: + - decr: hunger + - incr: health + - eq: + Arguments: + - hunger + - 0 + Commands: + - decr: health + - gt: + Arguments: + - thirst + - 0 + Commands: + - decr: thirst + - incr: health + - eq: + Arguments: + - thirst + - 0 + Commands: + - decr: health + - lt: + Arguments: + - health + - 0 + Commands: + - remove: true + - exec: + Action: tick_state + Delay: 2 + Object: gnome + InputMapping: + Inputs: + 1: + Description: Tick the gnome's internal state + OrientationVector: + - 0 + - 0 + Internal: true + Relative: true + Name: tick_state +- Behaviours: + - Dst: + Object: _empty + Src: + Commands: + - spawn: shrub + Object: shrub + InputMapping: + Inputs: + 1: + Description: Respawn a depleted shrub after some time. + VectorToDest: + - 0 + - 0 + Internal: true + Relative: true + Name: spawn_shrub +- Behaviours: + - Dst: + Commands: + - incr: src.hunger + - incr: src.hunger + - incr: src.hunger + - incr: src.hunger + - incr: src.hunger + - decr: leaves + - exec: + Action: spawn_shrub + Delay: 30 + - remove: true + Object: shrub + Src: + Commands: null + Object: gnome + - Dst: + Object: water + Src: + Commands: + - incr: thirst + - incr: thirst + - incr: thirst + - incr: thirst + - incr: thirst + Object: gnome + Name: gather +- Behaviours: + - Dst: + Commands: + - set_tile: 1 + Object: lava + Src: + Commands: + - reward: -1 + - remove: true + - mov: _dest + - set_tile: 1 + Object: gnome + - Dst: + Object: _empty + Src: + Commands: + - mov: _dest + Object: gnome + Name: move +Environment: + Description: An NMMO-type game. The idea is to set up an open-ended system wherein + agents can progress from hunter\ \ gathering, to agriculture, to basic civilization. + Levels: + - "W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W\ + \ W W W W W W W W W W W \n W S . S . . R L W W . S R . R W . . . . S . . W . .\ + \ . . . . S . . R L . . . L R S W L . . R . S . W \n W . . . R . W . S . W . .\ + \ . . L . . L L R W . . W . . S . W R L L W R . L W . W . . R . W S L W L W \n\ + \ W . . . . . S . W . . L L . . H1 . . S . L . R . . L S . S . . . L . . . . .\ + \ . R . . S L . R . . S W \n W . L L S . W R . . . S . L . . . . S W . R . S .\ + \ . R W . . S R . . . . . . L . . . R . L . . . . W \n W . . . . . . . . . . S\ + \ L . . . . . . . S R . . R . . . W . L . . W . . S . . . S . . . . . S . . W\ + \ \n W . . . . . . R . . . . W . . . . S L . . W . W . . . S . L . R W . . . .\ + \ . . . S S . . W . W . . W \n W . . . . . R L . . R R R . W R . . . . S . . .\ + \ . . . R S . . S R . . W . L . . W . . S . . R . W W \n W R L . . . S . . . .\ + \ . W . R W W . . S . W R . . R L L W . . . . . . L S . . . . L . . R S R . .\ + \ W \n W S R W . . . . . W L S . . S . S W . W . . . . W . . . . S S . R . . .\ + \ L . . . . W . S W . . . L W \n W . W L S S S . . . . . . . S . . . . R . . .\ + \ W R . . . . . . L . . . S . S . . . S L R . . R L R W \n W . . . L . . S . W\ + \ . . S . S . S S S . S S . . . R W . . . . . . . S . R . R L . W . W . . . R\ + \ . W \n W . . . W . R W . . . R . W . . . . . L . W L W W W . . W W . . . R .\ + \ . W . . L . . S . S . . R R W \n W S R . L . W . R S . . W . S W . . . . . .\ + \ . L . . . . . . S . . . . . R . . . . S R L . . R . . W \n W R S . . . . R L\ + \ . . . . . . R . . W . W . L . . . . . . W S S . . . W S . . R . . W R . . .\ + \ . . W \n W . . W . R . L R . R . . . . . . W . S R S S . R . . . . . . . . .\ + \ L L . R . . . . . R . . . S W W \n W W S R L . L . R S . . . R L . . . . . S\ + \ . . . S R W . R . W . S . L S . . S S S . . W . R . . . W \n W . . . . L . .\ + \ S . . S S L . . . . W . W R S S S S . . . W . W S S . . . . . . R . R R . W\ + \ . . R W \n W S . W R . L L . R R . S . . . . . . R W R . . S . R L W W R . .\ + \ W . . W R . R . . . W . . W . . W \n W S . W . . R W . . . . L L . S . L W .\ + \ L . . . S . L W R L . . W . R S . W R . W . . R S L . . . W \n W . W . . W .\ + \ . W . S . . R W . . W . W . R R . W . . . . S . . . L . . R . . S R . S . .\ + \ . . L L W \n W . W W . L . L W R . W . . . . . . S W . . . . . S . . . . W .\ + \ . . . S . W L . S . . W . W . . . W \n W . . . . . . . . . S . R . . . . . S\ + \ R R . S W . S W . . W R R W . . W S . W . W L . . . . R . . W \n W S . R . .\ + \ S . . S W L . . . . W . R R . R . L . W . . S R . . W . . . S . L R . . . .\ + \ . . . . W W \n W . . . . . . . . W . . R . W R . . . . W L S . R . . S S S .\ + \ . L . L . . S L R . . . . . . . . . W \n W S . L W . S . . . . . . . S W . .\ + \ R . L R W . . . . . L . . R R R . W W . L . S S L . . W . . . W \n W . R L .\ + \ S . . S W L R . . . R . R L . . . . . L . . . . L . L R S . . . . . S W W .\ + \ . . S . W . W \n W . . . . R S . . . . . . . . W . R . S S S . . . . . . R .\ + \ W . L . S S L W . R . . W R . . . L . W \n W . . W . S L . R L . . . . S . L\ + \ . . . W W . . . W . W R L . L . S R . S . . . W . . L R . S . . W \n W L . .\ + \ L . . . L R . . . R . . . H2 . . . R . . . . S . L . . . . W . S . . . . . .\ + \ W . . . S . . W \n W L L S . L L R R . . . . . S . . . . . . S . . . . . . .\ + \ . . L R R L S W . . L L . R S . . L W . W \n W W R . S . L . . S . . . L . L\ + \ S W . L R . W R . . W . . . R S . S . W . . R . . . . S . . . . . W \n W . R\ + \ . S S S . . . . . . . S W L . . . . . S W W . . R . . . S L . S L W . . S S\ + \ . . S . . . . . W \n W R . . . W . . . R W . . S . . . S . . W L R . . W . W\ + \ . . L . . . . . . L . . S . . . . R . L . W \n W S L . S R S S R . W . . W .\ + \ S . W . . . W . . . . . . . . S W . L . W . . . R . . . . W . . S L W \n W .\ + \ W R . L . R S S L . . . . . . . R S S . . . . . L S L . L W S L . L . W W .\ + \ . . . W . . . . . W \n W . . . S R . . . W R L . . L . . . . . . . . W W S .\ + \ . S . . . . R L . L S . . . S . . . . . . . W \n W W . . . W . . . . W . S .\ + \ . L . . W S . . . . S . R . . . S L L . . . . . . . . W . . W S W W . W \n W\ + \ . R . . . . L S . . L . . . R . R . . . . . . R . . S . . . L . R . W . R R\ + \ . . . . . . L . . . W \n W R . . . . . . . . . . . L W . . W . . . L S . . .\ + \ S . . . . . . R . . S . . . R S . W W L S . W W \n W . . W R R . . R . . L .\ + \ S S . . W L W . S . S W . . W . S . . W . . . . . . . R . . . . R . . . W \n\ + \ W L . . . . L . . . S . . S . S . . . . . . L . . . . W . . . . R R L W S S\ + \ . L W R L . . . . . S W \n W W R L R . R S . L . . . L L . L . . S L . R . .\ + \ . R R . . L R W R . . . L R . . . . . . . L L . W \n W W S W . . . L . . . .\ + \ R . . . W S W S . . . . . . . R W L . S . W . . . W R S . . . . . . . . . W\ + \ \n W . W W . . . . W . W W R . . . . R . L . L L . R . R . . . . R . L . . .\ + \ W W . R S L L . L . . S W \n W . . . . . R S W . W . . . . . S . . . . . . .\ + \ . L W S . . . . . . . L . . S R . . . . S . . . W W \n W . S W L . . L L . .\ + \ . . . W . . . . . . . L L . . . . . . . S . . . . . S . . . W . . . . . . .\ + \ W \n W L . . . L . . . R W . . . S L S . . R . W . R . L L . . . R W R . W .\ + \ . . . W . . W . R . . . L W \n W L . . W W W W . . L . R . . . . R . L W . .\ + \ . . . . . . S . . R L R . L . . W . . R R . . R W . W \n W W W W W W W W W W\ + \ W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W\ + \ W W" + Name: nmmo + Observers: + Isometric: + BackgroundTile: oryx/oryx_iso_dungeon/grass-1.png + IsoTileDepth: 4 + IsoTileHeight: 16 + TileSize: + - 32 + - 48 + Sprite2D: + BackgroundTile: oryx/oryx_tiny_galaxy/tg_sliced/tg_world/tg_world_floor_panel_metal_a.png + TileSize: 16 + Player: + AvatarObject: gnome + Count: 2 + Observer: + Height: 7 + OffsetX: 0 + OffsetY: 0 + RotateWithAvatar: true + TrackAvatar: true + Width: 7 + Termination: + Lose: null + Win: null + Variables: + - InitialValue: 0 + Name: player_resources + - InitialValue: 0 + Name: player_dead +Objects: +- MapCharacter: S + Name: shrub + Observers: + Isometric: + - Image: oryx/oryx_iso_dungeon/bush-1.png + Variables: + - InitialValue: 1 + Name: leaves +- MapCharacter: W + Name: water + Observers: + Isometric: + - Image: oryx/oryx_iso_dungeon/water-1.png + Offset: + - 0 + - 2 +- MapCharacter: R + Name: rock + Observers: + Isometric: + - Image: oryx/oryx_iso_dungeon/wall-grey-1.png + Offset: + - 0 + - 15 +- MapCharacter: L + Name: lava + Observers: + Isometric: + - Image: oryx/oryx_iso_dungeon/lava-1.png + Offset: + - 0 + - 4 + - Image: oryx/oryx_iso_dungeon/lava-1.png + Offset: + - 0 + - 2 +- InitialActions: + - Action: tick_state + Delay: 20 + MapCharacter: H + Name: gnome + Observers: + Block2D: + - Color: + - 0.6 + - 0.2 + - 0.2 + Scale: 0.5 + Shape: square + Isometric: + - Image: oryx/oryx_iso_dungeon/avatars/gnome-1.png + Sprite2D: + - Image: oryx/oryx_tiny_galaxy/tg_sliced/tg_monsters/tg_monsters_jelly_d1.png + Variables: + - InitialValue: 20 + Name: health + - InitialValue: 20 + Name: hunger + - InitialValue: 20 + Name: thirst + - InitialValue: 0 + Name: resources +Version: '0.1' \ No newline at end of file diff --git a/python/examples/rllib_experiment.py b/python/examples/rllib_experiment.py new file mode 100644 index 000000000..0c06847ea --- /dev/null +++ b/python/examples/rllib_experiment.py @@ -0,0 +1,94 @@ +from pdb import set_trace as T +import gym +from griddly import GymWrapperFactory, gd +import ray +import ray.rllib.agents.ppo as ppo +from ray.tune.registry import register_env +import copy + +reshp = (1, 2, 0) + + +def unregister(): + for env in copy.deepcopy(gym.envs.registry.env_specs): + if 'GDY' in env: + print("Remove {} from registry".format(env)) + del gym.envs.registry.env_specs[env] + + +class RLLibEnv(gym.Env): + def __init__(self, config=None): + self.env = gym.make('GDY-Spider-Nest-v0') + self.env.reset() + observation_space = self.env.observation_space + self.observation_space = gym.spaces.Box(observation_space.low.transpose(reshp), + observation_space.high.transpose(reshp)) + self.action_space = self.env.action_space + + # TODO: necessary step toward having num_workers > 0, but we are still missing python_griddly after this + + # wrapper = GymWrapperFactory() + # wrapper.build_gym_from_yaml( + # 'Spider-Nest', + # 'Single-Player/GVGAI/spider-nest.yaml', + # player_observer_type=gd.ObserverType.VECTOR, + # level=0 + # ) + + def reset(self): + obs = self.env.reset() + obs = obs.transpose(reshp) + + return obs + + def step(self, act): + obs, rew, done, info = self.env.step(act) + obs = obs.transpose(reshp) + # self.render() + + return obs, rew, done, info + + def render(self): + self.env.render(observer="global") + + +if __name__ == '__main__': + unregister() + wrapper = GymWrapperFactory() + wrapper.build_gym_from_yaml( + 'Spider-Nest', + 'Single-Player/GVGAI/spider-nest.yaml', + player_observer_type=gd.ObserverType.VECTOR, + level=0 + ) + ray.init() + rllib_config = { + "framework": "torch", + "num_workers": 0, + "num_envs_per_worker": 1, + "model": { + # "conv_filters": [[32, (13, 9), 1]], + "conv_filters": [[32, (7, 7), 1]], + }, + "env_config": { + "config": None, + }, + + } + + trainer = ppo.PPOTrainer(config=rllib_config, env=RLLibEnv) + + TEST = False + + if not TEST: + while True: + res = trainer.train() + print(res) + + else: + env = RLLibEnv() + env.reset() + + for i in range(1000): + env.step(env.action_space.sample()) + env.render() \ No newline at end of file diff --git a/python/examples/snippet.py b/python/examples/snippet.py index 83bda594a..b90fa5c2b 100644 --- a/python/examples/snippet.py +++ b/python/examples/snippet.py @@ -1,76 +1,28 @@ -from timeit import default_timer as timer -import numpy as np import gym - +import griddly from griddly import GymWrapperFactory, gd -from griddly.RenderTools import VideoRecorder -from griddly.util.wrappers import InvalidMaskingRTSWrapper if __name__ == '__main__': - wrapper = GymWrapperFactory() - - wrapper.build_gym_from_yaml("GriddlyRTS-Adv", - 'RTS/Stratega/kill-the-king.yaml', - global_observer_type=gd.ObserverType.VECTOR, - player_observer_type=gd.ObserverType.VECTOR, - level=0) - - env_original = gym.make(f'GDY-GriddlyRTS-Adv-v0') - # env_original = gym.make(f'GDY-GriddlyRTS-Adv-v0') - - env_original.reset() - - env = InvalidMaskingRTSWrapper(env_original) - - start = timer() - - frames = 0 - - fps_samples = [] - - player1_recorder = VideoRecorder() - player1_visualization = env.render(observer=0, mode='rgb_array') - player1_recorder.start("player1_video_test.mp4", player1_visualization.shape) - - player2_recorder = VideoRecorder() - player2_visualization = env.render(observer=1, mode='rgb_array') - player2_recorder.start("player2_video_test.mp4", player2_visualization.shape) - - global_recorder = VideoRecorder() - global_visualization = env.render(observer='global', mode='rgb_array') - global_recorder.start("global_video_test.mp4", global_visualization.shape) - - for s in range(10000): - - frames += 1 - action = env.action_space.sample() + # wrapper = GymWrapperFactory() + # wrapper.build_gym_from_yaml("Griddly", "nmmo.yaml", + # level=0,player_observer_type=gd.ObserverType.VECTOR, + # global_observer_type=gd.ObserverType.ISOMETRIC) + env = gym.make("GDY-Partially-Observable-Zelda-v0") + env.reset() - obs, reward, done, info = env.step(action) - global_observation = env.render(mode='rgb_array', observer='global') - player1_observation = env.render(observer=0, mode='rgb_array') - player2_observation = env.render(observer=1, mode='rgb_array') + # Replace with your own control algorithm! + for s in range(1000): - global_recorder.add_frame(global_observation) - player1_recorder.add_frame(player1_observation) - player2_recorder.add_frame(player2_observation) + available_actions = env.game.get_available_actions(1) + print(available_actions) - if done: - #state = env.get_state() - #print(state) - print(info) + for action_location, action_names in available_actions.items(): + available_action_ids = env.game.get_available_action_ids(action_location, list(action_names)) + print(available_action_ids) - if frames % 1000 == 0: - end = timer() - fps = (frames / (end - start)) - fps_samples.append(fps) - print(f'fps: {fps}') - frames = 0 - start = timer() + obs, reward, done, info = env.step(env.action_space.sample()) + env.render() # Renders the environment from the perspective of a single player - # Have to close the video recorders - player1_recorder.close() - player2_recorder.close() - global_recorder.close() - print(f'mean fps: {np.mean(fps_samples)}') + env.render(observer='global') # Renders the entire environment \ No newline at end of file diff --git a/python/examples/vectorized.py b/python/examples/vectorized.py index 036782273..264c9868f 100644 --- a/python/examples/vectorized.py +++ b/python/examples/vectorized.py @@ -5,6 +5,8 @@ game = "GDY-Partially-Observable-Zelda-v0" +n_envs = 24 + def make_env(): def _monad(): env = gym.make(game) @@ -12,12 +14,12 @@ def _monad(): return _monad if __name__ == '__main__': - raw_list = [make_env() for _ in range(10)] + raw_list = [make_env() for _ in range(n_envs)] envs = SubprocVecEnv(raw_list) init_obs = envs.reset() for i in range(10000): - envs.step(np.zeros((10,2))) + envs.step(np.zeros((n_envs,2))) envs.render() \ No newline at end of file diff --git a/python/griddly/GymWrapper.py b/python/griddly/GymWrapper.py index c910ec295..8ad2ca728 100644 --- a/python/griddly/GymWrapper.py +++ b/python/griddly/GymWrapper.py @@ -11,8 +11,8 @@ class GymWrapper(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} - def __init__(self, yaml_file=None, level=0, global_observer_type=gd.ObserverType.SPRITE_2D, - player_observer_type=gd.ObserverType.SPRITE_2D, max_steps=None, image_path=None, shader_path=None, + def __init__(self, yaml_file=None, level=0, global_observer_type=gd.ObserverType.VECTOR, + player_observer_type=gd.ObserverType.VECTOR, max_steps=None, image_path=None, shader_path=None, gdy=None, game=None): """ Currently only supporting a single player (player 1 as defined in the environment yaml @@ -59,6 +59,8 @@ def __init__(self, yaml_file=None, level=0, global_observer_type=gd.ObserverType self._player_last_observation = [] self._global_last_observation = None + self.num_action_ids = {} + self._enable_history = False self.game.init(self._is_clone) @@ -227,12 +229,12 @@ def _create_action_space(self): self.player_count = self.gdy.get_player_count() self.action_input_mappings = self.gdy.get_action_input_mappings() - grid_width = self.game.get_width() - grid_height = self.game.get_height() + self.grid_width = self.game.get_width() + self.grid_height = self.game.get_height() self.avatar_object = self.gdy.get_avatar_object() - has_avatar = self.avatar_object is not None and len(self.avatar_object) > 0 + self.has_avatar = self.avatar_object is not None and len(self.avatar_object) > 0 self.action_names = self.gdy.get_action_names() self.action_count = len(self.action_names) @@ -240,23 +242,24 @@ def _create_action_space(self): action_space_parts = [] - if not has_avatar: - action_space_parts.extend([grid_width, grid_height]) + if not self.has_avatar: + action_space_parts.extend([self.grid_width, self.grid_height]) if self.action_count > 1: action_space_parts.append(self.action_count) - max_action_ids = 0 + self.max_action_ids = 0 for action_name, mapping in sorted(self.action_input_mappings.items()): if not mapping['Internal']: num_action_ids = len(mapping['InputMappings']) + 1 - if max_action_ids < num_action_ids: - max_action_ids = num_action_ids + self.num_action_ids[action_name] = num_action_ids + if self.max_action_ids < num_action_ids: + self.max_action_ids = num_action_ids - action_space_parts.append(max_action_ids) + action_space_parts.append(self.max_action_ids) if len(action_space_parts) == 1: - action_space = gym.spaces.Discrete(max_action_ids) + action_space = gym.spaces.Discrete(self.max_action_ids) else: action_space = gym.spaces.MultiDiscrete(action_space_parts) diff --git a/python/griddly/RenderTools.py b/python/griddly/RenderTools.py index 129e19b94..6fe4714bd 100644 --- a/python/griddly/RenderTools.py +++ b/python/griddly/RenderTools.py @@ -68,7 +68,7 @@ def __init__(self): super().__init__() def render(self, observation, string_filename): - imageio.imwrite(string_filename, observation.swapaxes(0, 2)) + imageio.imwrite(string_filename, observation) class VideoRecorder(): diff --git a/python/griddly/util/action_space.py b/python/griddly/util/action_space.py index 25d636d64..930a179f2 100644 --- a/python/griddly/util/action_space.py +++ b/python/griddly/util/action_space.py @@ -1,6 +1,7 @@ import gym import numpy as np + class MultiAgentActionSpace(list): def __init__(self, agents_action_space): for x in agents_action_space: @@ -13,45 +14,102 @@ def sample(self): """ samples action for each agent from uniform distribution""" return [agent_action_space.sample() for agent_action_space in self.agents_action_space] -class ValidatedMultiAgentActionSpace(MultiAgentActionSpace): + +class ValidatedActionSpace(gym.spaces.space.Space, list): """ - The same action space as MultiAgentActionSpace, however sampling this action space only results in valid actions + Sampling this action space only results in valid actions """ - def __init__(self, agents_action_space, masking_wrapper): + def __init__(self, action_space, masking_wrapper): self._masking_wrapper = masking_wrapper - super().__init__(agents_action_space) - def sample(self, player_id=None): + shape = None + dtype = None - sampled_actions = [] - for player_id in range(self.__len__()): + if isinstance(action_space, gym.spaces.Discrete) or isinstance(action_space, gym.spaces.MultiDiscrete): + shape = action_space.shape + dtype = action_space.dtype + elif isinstance(action_space, MultiAgentActionSpace): + shape = action_space[0].shape + dtype = action_space[0].dtype + + self.action_space = action_space + + super().__init__(shape, dtype) + + def __len__(self): + if isinstance(self.action_space, list): + return len(self.action_space) + else: + return 1 + + def __getitem__(self, y): + if isinstance(self.action_space, list): + return self.action_space[y] + else: + raise IndexError() + + def __getattr__(self, name): + if name.startswith('_'): + raise AttributeError("attempted to get missing private attribute '{}'".format(name)) + return getattr(self.action_space, name) + + + def _sample_valid(self, player_id): + # Sample a location with valid actions + + assert player_id <= self._masking_wrapper.player_count, "Player does not exist." + assert player_id > 0, "Player 0 is reserved for internal actions only." - # Sample a location with valid actions - available_actions = [a for a in self._masking_wrapper.env.game.get_available_actions(player_id + 1).items()] - num_available = len(available_actions) - if num_available == 0: - return [0, 0, 0, 0] - else: - available_actions_choice = np.random.choice(num_available) + available_actions = [a for a in self._masking_wrapper.game.get_available_actions(player_id).items()] + num_available = len(available_actions) + if num_available == 0: + return [0, 0, 0, 0] + else: + available_actions_choice = self.np_random.choice(num_available) - location, actions = available_actions[available_actions_choice] + location, actions = available_actions[available_actions_choice] - available_action_ids = [aid for aid in self._masking_wrapper.env.game.get_available_action_ids(location, list( - actions)).items() if len(aid[1])>0] + available_action_ids = [aid for aid in + self._masking_wrapper.game.get_available_action_ids(location, list( + actions)).items() if len(aid[1]) > 0] - num_action_ids = len(available_action_ids) + num_action_ids = len(available_action_ids) - # If there are no available actions at all, we do a NOP (which is any action_name with action_id 0) - if num_action_ids == 0: - action_name_idx = 0 - action_id = 0 - else: - available_action_ids_choice = np.random.choice(num_action_ids) - action_name, action_ids = available_action_ids[available_action_ids_choice] - action_name_idx = self._masking_wrapper.action_names.index(action_name) - action_id = np.random.choice(action_ids) - sampled_actions.append([location[0], location[1], action_name_idx, action_id]) + # If there are no available actions at all, we do a NOP (which is any action_name with action_id 0) + if num_action_ids == 0: + action_name_idx = 0 + action_id = 0 + else: + available_action_ids_choice = self.np_random.choice(num_action_ids) + action_name, action_ids = available_action_ids[available_action_ids_choice] + action_name_idx = self._masking_wrapper.action_names.index(action_name) + action_id = self.np_random.choice(action_ids) + + sampled_action = [] + + # Build the action based on the action_space info + if not self._masking_wrapper.has_avatar: + sampled_action.extend([location[0], location[1]]) + + if self._masking_wrapper.action_count > 1: + sampled_action.append(action_name_idx) + + sampled_action.append(action_id) + + return sampled_action + + def sample(self, player_id=None): + + if player_id is not None: + return self._sample_valid(player_id) + + if self._masking_wrapper.player_count == 1: + return self._sample_valid(1) + + sampled_actions = [] + for player_id in range(self._masking_wrapper.player_count): + sampled_actions.append(self._sample_valid(player_id+1)) - return sampled_actions \ No newline at end of file + return sampled_actions diff --git a/python/griddly/util/breakdown.py b/python/griddly/util/breakdown.py index 509adda41..bbe380341 100644 --- a/python/griddly/util/breakdown.py +++ b/python/griddly/util/breakdown.py @@ -27,7 +27,8 @@ def __exit__(self, type, value, traceback): def render_rgb(self): - observation = np.array(self.game.observe(), copy=False) + # Set copy to true here because we want to use these later after environment resets + observation = np.array(self.game.observe(), copy=True) if self.observer_type == gd.ObserverType.VECTOR: self._vector2rgb = Vector2RGB(10, observation.shape[0]) diff --git a/python/griddly/util/wrappers.py b/python/griddly/util/wrappers.py index e7ec65cfc..68496a18d 100644 --- a/python/griddly/util/wrappers.py +++ b/python/griddly/util/wrappers.py @@ -1,10 +1,10 @@ import gym import numpy as np -from griddly.util.action_space import ValidatedMultiAgentActionSpace +from griddly.util.action_space import ValidatedActionSpace -class InvalidMaskingRTSWrapper(gym.Wrapper): +class ValidActionSpaceWrapper(gym.Wrapper): """ Creates a wrapper that can produce invalid action masking. @@ -28,7 +28,6 @@ def __init__(self, env): raise RuntimeError("Please reset the environment before applying the InvalidMaskingRTSWrapper") super().__init__(env) - self.action_space = self._override_action_space() def get_unit_location_mask(self, player_id, mask_type='full'): @@ -40,10 +39,13 @@ def get_unit_location_mask(self, player_id, mask_type='full'): but allows the agent to still choose invalid actions. """ + assert player_id <= self.player_count, "Player does not exist." + assert player_id > 0, "Player 0 is reserved for internal actions only." + if mask_type == 'full': - grid_mask = np.zeros((self._grid_width, self._grid_height)) - for location, action_names in self.env.game.get_available_actions(player_id + 1).items(): + grid_mask = np.zeros((self.grid_width, self.grid_height)) + for location, action_names in self.env.game.get_available_actions(player_id).items(): grid_mask[[location[0]], [location[1]]] = 1 return grid_mask @@ -51,7 +53,7 @@ def get_unit_location_mask(self, player_id, mask_type='full'): grid_width_mask = np.zeros(self._grid_width) grid_height_mask = np.zeros(self._grid_height) - for location, action_names in self.env.game.get_available_actions(player_id + 1).items(): + for location, action_names in self.env.game.get_available_actions(player_id).items(): grid_width_mask[location[0]] = 1 grid_height_mask[location[1]] = 1 return grid_height_mask, grid_width_mask @@ -67,7 +69,7 @@ def get_unit_action_mask(self, location, action_names, padded=True): """ action_masks = {} for action_name, action_ids in self.env.game.get_available_action_ids(location, action_names).items(): - mask_size = self.max_action_ids if padded else self.valid_action_mappings[action_name] + mask_size = self.max_action_ids if padded else self.num_action_ids[action_name] action_ids_mask = np.zeros(mask_size) # action_id 0 is always a NOP action_ids_mask[0] = 1 @@ -77,8 +79,8 @@ def get_unit_action_mask(self, location, action_names, padded=True): return action_masks def _override_action_space(self): - return ValidatedMultiAgentActionSpace(self.action_space, self) + return ValidatedActionSpace(self.action_space, self) def clone(self): - cloned_env = InvalidMaskingRTSWrapper(self.env.clone()) + cloned_env = ValidActionSpaceWrapper(self.env.clone()) return cloned_env diff --git a/python/setup.py b/python/setup.py index dababff2c..c9904a117 100644 --- a/python/setup.py +++ b/python/setup.py @@ -71,7 +71,7 @@ def griddly_package_data(config='Debug'): setup( name='griddly', - version="0.3.2", + version="0.3.3", author_email="chrisbam4d@gmail.com", description="Griddly Python Libraries", long_description=long_description, diff --git a/python/tests/history_test.py b/python/tests/history_test.py index 17f07e0e0..7c3595754 100644 --- a/python/tests/history_test.py +++ b/python/tests/history_test.py @@ -25,7 +25,7 @@ def build_test_env(test_name, yaml_file, enable_history=True): return env -def test_step_SinglePlayer_HasHistory(test_name): +def test_history_SinglePlayer_HasHistory(test_name): """ Assuming there is a single avatar Action is in form env.step(actionId) @@ -55,7 +55,7 @@ def test_step_SinglePlayer_HasHistory(test_name): assert info['History'] == expected_history -def test_step_SinglePlayer_NoHistory(test_name): +def test_history_SinglePlayer_NoHistory(test_name): """ Assuming there is a single avatar Action is in form env.step(actionId) @@ -75,7 +75,7 @@ def test_step_SinglePlayer_NoHistory(test_name): assert isinstance(sample, int) -def test_step_SinglePlayer_MultipleAction(test_name): +def test_history_SinglePlayer_MultipleAction(test_name): """ There is no avatar Player performing multiple actions in a single step @@ -108,7 +108,7 @@ def test_step_SinglePlayer_MultipleAction(test_name): assert info['History'] == expected_history -def test_step_MultiplePlayer_History(test_name): +def test_history_MultiplePlayer_History(test_name): """ There is an avatar Multiple players @@ -139,7 +139,7 @@ def test_step_MultiplePlayer_History(test_name): assert info['History'] == expected_history -def test_step_MultiplePlayer_MultipleAction_History(test_name): +def test_history_MultiplePlayer_MultipleAction_History(test_name): """ There no avatar, multiple players diff --git a/python/tests/step_test.py b/python/tests/step_test.py index 2cb1e0c40..78547ee79 100644 --- a/python/tests/step_test.py +++ b/python/tests/step_test.py @@ -43,9 +43,12 @@ def test_step_SinglePlayer_SingleActionType_SingleValue(test_name): ) assert env.observation_space.shape == (1, 5, 6) + assert env.global_observation_space.shape == (1, 5, 6) assert env.action_space.shape == () assert env.action_space.n == 5 + assert env.gdy.get_object_names() == ['avatar'] + obs, reward, done, info = env.step(1) avatar_state = get_object_state(env, 'avatar') @@ -66,9 +69,12 @@ def test_step_SinglePlayer_SingleActionType_ArrayValue(test_name): ) assert env.observation_space.shape == (1, 5, 6) + assert env.global_observation_space.shape == (1, 5, 6) assert env.action_space.shape == () assert env.action_space.n == 5 + assert env.gdy.get_object_names() == ['avatar'] + obs, reward, done, info = env.step([1]) assert obs.shape == (1, 5, 6) @@ -97,9 +103,12 @@ def test_step_SinglePlayer_SelectSource_SingleActionType(test_name): ) assert env.observation_space.shape == (1, 5, 6) + assert env.global_observation_space.shape == (1, 5, 6) assert env.action_space.shape == (3,) assert np.all(env.action_space.nvec == [5, 6, 5]) + assert env.gdy.get_object_names() == ['avatar'] + obs, reward, done, info = env.step([2, 3, 1]) assert obs.shape == (1, 5, 6) @@ -131,9 +140,12 @@ def test_step_SinglePlayer_SelectSource_SingleActionType_MultipleAction(test_nam ) assert env.observation_space.shape == (2, 5, 6) + assert env.global_observation_space.shape == (2, 5, 6) assert env.action_space.shape == (3,) assert np.all(env.action_space.nvec == [5, 6, 5]) + assert env.gdy.get_object_names() == ['avatar1', 'avatar2'] + obs, reward, done, info = env.step([ [2, 3, 1], [1, 4, 3], @@ -166,9 +178,12 @@ def test_step_SinglePlayer_MultipleActionType(test_name): ) assert env.observation_space.shape == (1, 5, 6) + assert env.global_observation_space.shape == (1, 5, 6) assert env.action_space.shape == (2,) assert np.all(env.action_space.nvec == [2, 5]) + assert env.gdy.get_object_names() == ['avatar'] + obs, reward, done, info = env.step([0, 1]) assert obs.shape == (1, 5, 6) @@ -205,9 +220,12 @@ def test_step_SinglePlayer_SelectSource_MultipleActionType(test_name): ) assert env.observation_space.shape == (1, 5, 6) + assert env.global_observation_space.shape == (1, 5, 6) assert env.action_space.shape == (4,) assert np.all(env.action_space.nvec == [5, 6, 2, 5]) + assert env.gdy.get_object_names() == ['avatar'] + obs, reward, done, info = env.step([2, 3, 0, 1]) assert obs.shape == (1, 5, 6) @@ -248,9 +266,12 @@ def test_step_SinglePlayer_SelectSource_MultipleActionType_MultipleAction(test_n ) assert env.observation_space.shape == (2, 5, 6) + assert env.global_observation_space.shape == (2, 5, 6) assert env.action_space.shape == (4,) assert np.all(env.action_space.nvec == [5, 6, 2, 5]) + assert env.gdy.get_object_names() == ['avatar1', 'avatar2'] + obs, reward, done, info = env.step([ [2, 3, 0, 1], [1, 4, 0, 1] @@ -291,8 +312,8 @@ def test_step_MultiplePlayer_SingleActionType_SingleValue(test_name): Multiple players env.step([ - [actionId_player1], - [actionId_player2] + actionId_player1, + actionId_player2 ]) """ env = build_test_env( @@ -303,6 +324,9 @@ def test_step_MultiplePlayer_SingleActionType_SingleValue(test_name): assert len(env.observation_space) == 2 assert len(env.action_space) == 2 + assert env.global_observation_space.shape == (1, 5, 6) + assert env.gdy.get_object_names() == ['avatar'] + for p in range(env.player_count): assert env.observation_space[p].shape == (1, 5, 6) assert env.action_space[p].shape == () @@ -346,6 +370,9 @@ def test_step_MultiplePlayer_SingleActionType_ArrayValue(test_name): assert len(env.observation_space) == 2 assert len(env.action_space) == 2 + assert env.global_observation_space.shape == (1, 5, 6) + assert env.gdy.get_object_names() == ['avatar'] + for p in range(env.player_count): assert env.observation_space[p].shape == (1, 5, 6) assert env.action_space[p].shape == () @@ -392,6 +419,9 @@ def test_step_MultiplePlayer_MultipleActionType(test_name): assert len(env.observation_space) == 2 assert len(env.action_space) == 2 + assert env.global_observation_space.shape == (1, 5, 6) + assert env.gdy.get_object_names() == ['avatar'] + for p in range(env.player_count): assert env.observation_space[p].shape == (1, 5, 6) assert env.action_space[p].shape == (2,) @@ -421,6 +451,54 @@ def test_step_MultiplePlayer_MultipleActionType(test_name): assert sample[1].shape == (2,) +def test_step_MultiplePlayer_SelectSource_SingleActionType(test_name): + """ + There no avatar, multiple players, single action type + + env.step([ + [x1, y1, actionId1], + [x2, y2, actionId2] + ]) + """ + env = build_test_env( + test_name, + "tests/gdy/test_step_MultiPlayer_SelectSource_SingleActionType.yaml" + ) + + assert len(env.observation_space) == 2 + assert len(env.action_space) == 2 + + assert env.global_observation_space.shape == (1, 5, 6) + assert env.gdy.get_object_names() == ['avatar'] + + for p in range(env.player_count): + assert env.observation_space[p].shape == (1, 5, 6) + assert env.action_space[p].shape == (3,) + assert np.all(env.action_space[p].nvec == [5, 6, 5]) + + obs, reward, done, info = env.step([ + [1, 3, 1], + [3, 3, 3], + ]) + + assert obs[0].shape == (1, 5, 6) + assert reward[0] == 0 + assert obs[1].shape == (1, 5, 6) + assert reward[1] == 0 + assert not done + assert info == {} + + player1_avatar_state = get_object_state(env, 'avatar', player=1) + player2_avatar_state = get_object_state(env, 'avatar', player=2) + + assert player1_avatar_state['Location'] == [0, 3] + assert player2_avatar_state['Location'] == [4, 3] + + sample = env.action_space.sample() + assert len(sample) == 2 + assert sample[0].shape == (3,) + assert sample[1].shape == (3,) + def test_step_MultiplePlayer_SelectSource_MultipleActionType(test_name): """ There no avatar, multiple players @@ -438,6 +516,9 @@ def test_step_MultiplePlayer_SelectSource_MultipleActionType(test_name): assert len(env.observation_space) == 2 assert len(env.action_space) == 2 + assert env.global_observation_space.shape == (1, 5, 6) + assert env.gdy.get_object_names() == ['avatar'] + for p in range(env.player_count): assert env.observation_space[p].shape == (1, 5, 6) assert env.action_space[p].shape == (4,) @@ -489,6 +570,9 @@ def test_step_MultiplePlayer_SelectSource_SingleActionType_MultipleAction(test_n assert len(env.observation_space) == 2 assert len(env.action_space) == 2 + assert env.global_observation_space.shape == (2, 5, 6) + assert env.gdy.get_object_names() == ['avatar1', 'avatar2'] + for p in range(env.player_count): assert env.observation_space[p].shape == (2, 5, 6) assert env.action_space[p].shape == (3,) @@ -551,6 +635,9 @@ def test_step_MultiplePlayer_SelectSource_MultipleActionType_MultipleAction(test assert len(env.observation_space) == 2 assert len(env.action_space) == 2 + assert env.global_observation_space.shape == (2, 5, 6) + assert env.gdy.get_object_names() == ['avatar1', 'avatar2'] + for p in range(env.player_count): assert env.observation_space[p].shape == (2, 5, 6) assert env.action_space[p].shape == (4,) diff --git a/python/tests/valid_action_space_wrapper_test.py b/python/tests/valid_action_space_wrapper_test.py new file mode 100644 index 000000000..4ee2aaea0 --- /dev/null +++ b/python/tests/valid_action_space_wrapper_test.py @@ -0,0 +1,485 @@ +import numpy as np +import gym +import pytest +from griddly import GymWrapperFactory, gd +from griddly.util.wrappers import ValidActionSpaceWrapper + + +@pytest.fixture +def test_name(request): + return request.node.name + + +def build_test_env(test_name, yaml_file): + wrapper_factory = GymWrapperFactory() + + wrapper_factory.build_gym_from_yaml( + test_name, + yaml_file, + global_observer_type=gd.ObserverType.VECTOR, + player_observer_type=gd.ObserverType.VECTOR, + ) + + env = gym.make(f'GDY-{test_name}-v0') + env.reset() + env = ValidActionSpaceWrapper(env) + return env + + +def get_object_state(env, object_name, player=1): + state = env.get_state() + for object in state['Objects']: + if object['Name'] == object_name and object['PlayerId'] == player: + return object + + +def check_valid_actions(sampled_actions, possible_actions): + for possible in possible_actions: + if np.all(sampled_actions == possible): + return True + + return False + + +def test_vasw_SinglePlayer_SingleActionType(test_name): + env = build_test_env( + test_name, + "tests/gdy/test_step_SinglePlayer_SingleActionType.yaml" + ) + + assert env.action_space.shape == () + assert env.action_space.n == 5 + + # Test masks + unit_location_mask = env.get_unit_location_mask(1) + + expected_location_mask = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask == expected_location_mask) + action_mask = env.get_unit_action_mask((2, 3), ["move"]) + + expected_action_id_mask = [1, 1, 1, 1, 1] + + assert np.all(action_mask['move'] == expected_action_id_mask) + + possible_actions = [[0], [1], [2], [3], [4]] + + # Sample a valid action + env.action_space.seed(0) + sampled = env.action_space.sample() + + assert check_valid_actions(sampled, possible_actions) + + +def test_vasw_SinglePlayer_SelectSource_SingleActionType(test_name): + env = build_test_env( + test_name, + "tests/gdy/test_step_SinglePlayer_SelectSource_SingleActionType.yaml" + ) + + assert env.action_space.shape == (3,) + assert np.all(env.action_space.nvec == [5, 6, 5]) + + # Test masks + unit_location_mask = env.get_unit_location_mask(1) + + expected_location_mask = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask == expected_location_mask) + action_mask = env.get_unit_action_mask((2, 3), ["move"]) + + expected_action_id_mask = [1, 1, 1, 1, 1] + + assert np.all(action_mask['move'] == expected_action_id_mask) + + possible_actions = [ + [2, 3, 0], + [2, 3, 1], + [2, 3, 2], + [2, 3, 3], + [2, 3, 4] + ] + + # Sample a valid action + sampled = env.action_space.sample() + + assert check_valid_actions(sampled, possible_actions) + + +def test_vasw_SinglePlayer_MultipleActionType(test_name): + env = build_test_env( + test_name, + "tests/gdy/test_step_SinglePlayer_MultipleActionType.yaml" + ) + + assert env.action_space.shape == (2,) + assert np.all(env.action_space.nvec == [2, 5]) + + # Test masks + unit_location_mask = env.get_unit_location_mask(1) + + expected_location_mask = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask == expected_location_mask) + action_mask1 = env.get_unit_action_mask((2, 3), ["move"]) + expected_action_id_mask1 = [1, 1, 1, 1, 1] + assert np.all(action_mask1['move'] == expected_action_id_mask1) + + action_mask2 = env.get_unit_action_mask((2, 3), ["other_move"]) + expected_action_id_mask2 = [1, 1, 1, 1, 1] + assert np.all(action_mask2['other_move'] == expected_action_id_mask2) + + all_action_mask = env.get_unit_action_mask((2, 3), ["move", "other_move"]) + + assert np.all(all_action_mask['move'] == expected_action_id_mask1) + assert np.all(all_action_mask['other_move'] == expected_action_id_mask2) + + possible_actions = [ + [0, 0], [1, 0], + [0, 1], [1, 1], + [0, 2], [1, 2], + [0, 3], [1, 3], + [0, 4], [1, 4], + ] + + # Sample a valid action + sampled = env.action_space.sample() + + assert check_valid_actions(sampled, possible_actions) + + +def test_vasw_SinglePlayer_SelectSource_MultipleActionType(test_name): + env = build_test_env( + test_name, + "tests/gdy/test_step_SinglePlayer_SelectSource_MultipleActionType.yaml" + ) + + assert env.action_space.shape == (4,) + assert np.all(env.action_space.nvec == [5, 6, 2, 5]) + + # Test masks + unit_location_mask = env.get_unit_location_mask(1) + + expected_location_mask = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask == expected_location_mask) + action_mask1 = env.get_unit_action_mask((2, 3), ["move"]) + expected_action_id_mask1 = [1, 1, 1, 1, 1] + assert np.all(action_mask1['move'] == expected_action_id_mask1) + + action_mask2 = env.get_unit_action_mask((2, 3), ["other_move"]) + expected_action_id_mask2 = [1, 1, 1, 1, 1] + assert np.all(action_mask2['other_move'] == expected_action_id_mask2) + + all_action_mask = env.get_unit_action_mask((2, 3), ["move", "other_move"]) + + assert np.all(all_action_mask['move'] == expected_action_id_mask1) + assert np.all(all_action_mask['other_move'] == expected_action_id_mask2) + + possible_actions = [ + [2, 3, 0, 0], [2, 3, 1, 0], [1, 4, 0, 0], [1, 4, 1, 0], + [2, 3, 0, 1], [2, 3, 1, 1], [1, 4, 0, 1], [1, 4, 1, 1], + [2, 3, 0, 2], [2, 3, 1, 2], [1, 4, 0, 2], [1, 4, 1, 2], + [2, 3, 0, 3], [2, 3, 1, 3], [1, 4, 0, 3], [1, 4, 1, 3], + [2, 3, 0, 4], [2, 3, 1, 4], [1, 4, 0, 4], [1, 4, 1, 4], + ] + + # Sample a valid action + sampled = env.action_space.sample() + + assert check_valid_actions(sampled, possible_actions) + + +def test_vasw_MultiPlayer_SingleActionType(test_name): + env = build_test_env( + test_name, + "tests/gdy/test_step_MultiPlayer_SingleActionType.yaml" + ) + + assert len(env.action_space) == 2 + + for p in range(env.player_count): + assert env.action_space[p].shape == () + assert env.action_space[p].n == 5 + + # Test masks + unit_location_mask1 = env.get_unit_location_mask(1) + + expected_location_mask1 = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask1 == expected_location_mask1) + + unit_location_mask2 = env.get_unit_location_mask(2) + + expected_location_mask2 = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask2 == expected_location_mask2) + + action_mask = env.get_unit_action_mask((1, 3), ["move"]) + expected_action_id_mask = [1, 1, 1, 1, 1] + assert np.all(action_mask['move'] == expected_action_id_mask) + + action_mask = env.get_unit_action_mask((3, 3), ["move"]) + expected_action_id_mask = [1, 1, 1, 1, 1] + assert np.all(action_mask['move'] == expected_action_id_mask) + + possible_actions = [[0], [1], [2], [3], [4]] + + # Sample a valid action + env.action_space.seed(0) + sampled = env.action_space.sample() + + # Test that we sample both players + assert check_valid_actions(sampled[0], possible_actions) + assert check_valid_actions(sampled[1], possible_actions) + + +def test_vasw_MultiplePlayer_SelectSource_SingleActionType(test_name): + env = build_test_env( + test_name, + "tests/gdy/test_step_MultiPlayer_SelectSource_SingleActionType.yaml" + ) + + assert len(env.action_space) == 2 + + for p in range(env.player_count): + assert env.action_space[p].shape == (3,) + assert np.all(env.action_space[p].nvec == [5, 6, 5]) + + # Test masks + unit_location_mask1 = env.get_unit_location_mask(1) + + expected_location_mask1 = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask1 == expected_location_mask1) + + unit_location_mask2 = env.get_unit_location_mask(2) + + expected_location_mask2 = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask2 == expected_location_mask2) + + action_mask = env.get_unit_action_mask((1, 3), ["move"]) + expected_action_id_mask = [1, 1, 1, 1, 1] + assert np.all(action_mask['move'] == expected_action_id_mask) + + action_mask = env.get_unit_action_mask((3, 3), ["move"]) + expected_action_id_mask = [1, 1, 1, 1, 1] + assert np.all(action_mask['move'] == expected_action_id_mask) + + possible_actions = [ + [ + [1, 3, 0], + [1, 3, 1], + [1, 3, 2], + [1, 3, 3], + [1, 3, 4] + ], + [ + [3, 3, 0], + [3, 3, 1], + [3, 3, 2], + [3, 3, 3], + [3, 3, 4] + ], + + ] + + # Sample a valid action + env.action_space.seed(0) + sampled = env.action_space.sample() + + # Test that we sample both players + assert check_valid_actions(sampled[0], possible_actions[0]) + assert check_valid_actions(sampled[1], possible_actions[1]) + +def test_vasw_MultiplePlayer_MultipleActionType(test_name): + env = build_test_env( + test_name, + "tests/gdy/test_step_MultiPlayer_MultipleActionType.yaml" + ) + + assert len(env.action_space) == 2 + + for p in range(env.player_count): + assert env.action_space[p].shape == (2,) + assert np.all(env.action_space[p].nvec == [2, 5]) + + # Test masks + unit_location_mask1 = env.get_unit_location_mask(1) + + expected_location_mask1 = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask1 == expected_location_mask1) + + unit_location_mask2 = env.get_unit_location_mask(2) + + expected_location_mask2 = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask2 == expected_location_mask2) + + action_mask = env.get_unit_action_mask((1, 3), ["move"]) + expected_action_id_mask = [1, 1, 1, 1, 1] + assert np.all(action_mask['move'] == expected_action_id_mask) + + action_mask = env.get_unit_action_mask((3, 3), ["move"]) + expected_action_id_mask = [1, 1, 1, 1, 1] + assert np.all(action_mask['move'] == expected_action_id_mask) + + possible_actions = [ + [ + [0, 0], [1, 0], + [0, 1], [1, 1], + [0, 2], [1, 2], + [0, 3], [1, 3], + [0, 4], [1, 4], + ], + [ + [0, 0], [1, 0], + [0, 1], [1, 1], + [0, 2], [1, 2], + [0, 3], [1, 3], + [0, 4], [1, 4], + ], + + ] + + # Sample a valid action + env.action_space.seed(0) + sampled = env.action_space.sample() + + # Test that we sample both players + assert check_valid_actions(sampled[0], possible_actions[0]) + assert check_valid_actions(sampled[1], possible_actions[1]) + + +def test_vasw_MultiplePlayer_SelectSource_MultipleActionType(test_name): + env = build_test_env( + test_name, + "tests/gdy/test_step_MultiPlayer_SelectSource_MultipleActionType.yaml" + ) + + assert len(env.action_space) == 2 + + for p in range(env.player_count): + assert env.action_space[p].shape == (4,) + assert np.all(env.action_space[p].nvec == [5, 6, 2, 5]) + + # Test masks + unit_location_mask1 = env.get_unit_location_mask(1) + + expected_location_mask1 = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask1 == expected_location_mask1) + + unit_location_mask2 = env.get_unit_location_mask(2) + + expected_location_mask2 = [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0] + ] + + assert np.all(unit_location_mask2 == expected_location_mask2) + + action_mask = env.get_unit_action_mask((1, 3), ["move"]) + expected_action_id_mask = [1, 1, 1, 1, 1] + assert np.all(action_mask['move'] == expected_action_id_mask) + + action_mask = env.get_unit_action_mask((3, 3), ["move"]) + expected_action_id_mask = [1, 1, 1, 1, 1] + assert np.all(action_mask['move'] == expected_action_id_mask) + + possible_actions = [ + [ + [1, 3, 0, 0], [1, 3, 1, 0], + [1, 3, 0, 1], [1, 3, 1, 1], + [1, 3, 0, 2], [1, 3, 1, 2], + [1, 3, 0, 3], [1, 3, 1, 3], + [1, 3, 0, 4], [1, 3, 1, 4] + ], + [ + [3, 3, 0, 0], [3, 3, 1, 0], + [3, 3, 0, 1], [3, 3, 1, 1], + [3, 3, 0, 2], [3, 3, 1, 2], + [3, 3, 0, 3], [3, 3, 1, 3], + [3, 3, 0, 4], [3, 3, 1, 4] + ], + + ] + + # Sample a valid action + env.action_space.seed(0) + sampled = env.action_space.sample() + + # Test that we sample both players + assert check_valid_actions(sampled[0], possible_actions[0]) + assert check_valid_actions(sampled[1], possible_actions[1]) diff --git a/src/Griddly/Core/GDY/Objects/ObjectGenerator.cpp b/src/Griddly/Core/GDY/Objects/ObjectGenerator.cpp index b8e6c7df7..8ab3ad9e6 100644 --- a/src/Griddly/Core/GDY/Objects/ObjectGenerator.cpp +++ b/src/Griddly/Core/GDY/Objects/ObjectGenerator.cpp @@ -25,7 +25,8 @@ void ObjectGenerator::defineNewObject(std::string objectName, uint32_t zIdx, cha objectDefinitions_.insert({objectName, std::make_shared(objectDefinition)}); objectChars_[mapChar] = objectName; - objectIds_.insert({objectName, objectCount_++}); + objectIds_.insert({objectName, objectNames_.size()}); + objectNames_.push_back(objectName); } void ObjectGenerator::defineActionBehaviour( @@ -230,6 +231,10 @@ std::string &ObjectGenerator::getObjectNameFromMapChar(char character) { return objectCharIt->second; } +const std::vector &ObjectGenerator::getObjectNames() const { + return objectNames_; +} + std::shared_ptr &ObjectGenerator::getObjectDefinition(std::string objectName) { auto objectDefinitionIt = objectDefinitions_.find(objectName); if (objectDefinitionIt == objectDefinitions_.end()) { diff --git a/src/Griddly/Core/GDY/Objects/ObjectGenerator.hpp b/src/Griddly/Core/GDY/Objects/ObjectGenerator.hpp index 2a174d64b..712df031a 100644 --- a/src/Griddly/Core/GDY/Objects/ObjectGenerator.hpp +++ b/src/Griddly/Core/GDY/Objects/ObjectGenerator.hpp @@ -54,11 +54,13 @@ class ObjectGenerator : public std::enable_shared_from_this { virtual std::unordered_map> getObjectDefinitions() const; + virtual const std::vector& getObjectNames() const; + private: std::unordered_map objectChars_; std::unordered_map> objectDefinitions_; std::unordered_map objectIds_; - uint32_t objectCount_ = 0; + std::vector objectNames_; std::string avatarObject_; std::unordered_map actionInputsDefinitions_; diff --git a/src/Griddly/Core/GameProcess.cpp b/src/Griddly/Core/GameProcess.cpp index 163b8d247..8b642c16b 100644 --- a/src/Griddly/Core/GameProcess.cpp +++ b/src/Griddly/Core/GameProcess.cpp @@ -211,7 +211,7 @@ std::shared_ptr GameProcess::getObserver() { std::unordered_map> GameProcess::getAvailableActionNames(uint32_t playerId) const { std::unordered_map> availableActionNames; - // TODO: we can cache alot of this if there are many players so it only needs to be created once. + // TODO: we can cache a lot of this if there are many players so it only needs to be created once. std::unordered_set internalActions; auto actionInputsDefinitions = gdyFactory_->getActionInputsDefinitions(); for (auto actionInputDefinition : actionInputsDefinitions) { @@ -221,6 +221,7 @@ std::unordered_map> GameProcess::get } // For every object in the grid return the actions that the object can perform + // TODO: do not iterate over all the objects if we have avatars. for (auto object : grid_->getObjects()) { if (playerId == object->getPlayerId()) { auto actions = object->getAvailableActionNames();