diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a01d4e4..d6497dc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,7 +28,7 @@ jobs: python3 -m pip install --upgrade pip python3 -m pip install pytest python3 -m pip install pytest-cov - python3 -m pip install -e .[tests] + python3 -m pip install -e .[tests, gym] - name: Tests run: | diff --git a/dev/Gym codelet.ipynb b/dev/Gym codelet.ipynb new file mode 100644 index 0000000..ea44177 --- /dev/null +++ b/dev/Gym codelet.ipynb @@ -0,0 +1,360 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Optional, Any\n", + "\n", + "import gymnasium as gym\n", + "from gymnasium.wrappers import TransformAction, TransformObservation\n", + "\n", + "import cst_python as cst\n", + "from cst_python.python.gym import GymCodelet" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "env = gym.make(\"Blackjack-v1\")\n", + "\n", + "env = TransformObservation(env, \n", + " lambda obs:{\"player_sum\":obs[0], \"dealer_card\":obs[1], \"usable_ace\":obs[2]}, \n", + " gym.spaces.Dict({\"player_sum\":env.observation_space[0], \"dealer_card\":env.observation_space[1], \"usable_ace\":env.observation_space[2]}))\n", + "\n", + "env = TransformAction(env, \n", + " lambda action:action[\"hit\"], \n", + " gym.spaces.Dict({\"hit\":env.action_space}))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mind = cst.Mind()\n", + "gym_codelet = GymCodelet(mind, env)\n", + "mind.insert_codelet(gym_codelet)\n", + "\n", + "mind.start()\n", + "gym_codelet.seed_memory.set_info(42)\n", + "gym_codelet.reset_memory.set_info(not gym_codelet.reset_memory.get_info())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413462, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413462, evaluation=0.0, I=15, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413462, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732724413462, evaluation=0.0, I=False, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413462, evaluation=0.0, I=0, name=reward])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Observation {'player_sum': 25, 'dealer_card': 2, 'usable_ace': 0}\n" + ] + }, + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"hit\"].set_info(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413492, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413492, evaluation=0.0, I=25, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413492, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732724413492, evaluation=0.0, I=True, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413492, evaluation=0.0, I=-1.0, name=reward])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.reset_memory.set_info(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413554, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413554, evaluation=0.0, I=15, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413554, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732724413554, evaluation=0.0, I=False, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413554, evaluation=0.0, I=0, name=reward])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Observation {'player_sum': 15, 'dealer_card': 2, 'usable_ace': 0}\n" + ] + }, + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"hit\"].set_info(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413580, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413580, evaluation=0.0, I=15, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413580, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732724413580, evaluation=0.0, I=True, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413580, evaluation=0.0, I=1.0, name=reward])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "env = gym.make(\"Blackjack-v1\")\n", + "mind = cst.Mind()\n", + "\n", + "gym_codelet = GymCodelet(mind, env)\n", + "mind.insert_codelet(gym_codelet)\n", + "\n", + "mind.start()\n", + "gym_codelet.seed_memory.set_info(42)\n", + "gym_codelet.reset_memory.set_info(not gym_codelet.reset_memory.get_info())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'observation': MemoryObject [idmemoryobject=0, timestamp=1732724413609, evaluation=0.0, I=(15, 2, 0), name=observation]},\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413609, evaluation=0.0, I=False, name=terminated1],\n", + " MemoryObject [idmemoryobject=2, timestamp=1732724413609, evaluation=0.0, I=0, name=reward1])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Observation (25, 2, 0)\n" + ] + }, + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"action\"].set_info(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'observation': MemoryObject [idmemoryobject=0, timestamp=1732724413632, evaluation=0.0, I=(25, 2, 0), name=observation]},\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413632, evaluation=0.0, I=True, name=terminated1],\n", + " MemoryObject [idmemoryobject=2, timestamp=1732724413632, evaluation=0.0, I=-1.0, name=reward1])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/index.rst b/docs/index.rst index 06ffaa8..a8afed6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,6 +19,7 @@ _examples/Implementing a Architecture _examples/Publisher-Subscriber _examples/Activation and Monitoring + _examples/Gymnasium Integration .. toctree:: :maxdepth: 4 diff --git a/examples/Gymnasium Integration.ipynb b/examples/Gymnasium Integration.ipynb new file mode 100644 index 0000000..ad81ade --- /dev/null +++ b/examples/Gymnasium Integration.ipynb @@ -0,0 +1,714 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Gymnasium Integration\n", + "\n", + "[![Open in Colab](https://img.shields.io/badge/Open%20in%20Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/H-IAAC/CST-Python/blob/main/examples/Gymnasium%20Integration.ipynb) [![Open in Github](https://img.shields.io/badge/Open%20in%20Github-100000?style=for-the-badge&logo=github&logoColor=white)](https://github.com/H-IAAC/CST-Python/blob/main/examples/Gymnasium%20Integration.ipynb)\n", + "\n", + "[Gymnasium](https://gymnasium.farama.org/) is the library that defines the most widely used interface for creating environments for reinforcement learning problems. CST-Python provides an interface for interacting with environments using a cognitive agent." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lets start by importing the CST-Python and other required modules:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " import cst_python as cst\n", + " import gymnasium as gym\n", + "except:\n", + " !python3 -m pip install cst_python[gym]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "from gymnasium.wrappers import TransformAction, TransformObservation\n", + "\n", + "from cst_python.python.gym import GymCodelet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The GymCodelet\n", + "\n", + "The GymCodelet is the main interface with environments. Before we use it, we need to create the environment and the agent's mind.\n", + "\n", + "The environment we gonna use is the Blackjack card game. See the [environment documentation](https://gymnasium.farama.org/environments/toy_text/blackjack/) for more details about the game and the environment." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "env = gym.make(\"Blackjack-v1\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "mind = cst.Mind()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With the mind and environment, we can create the codelet, insert it inside the mind and start it:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "gym_codelet = GymCodelet(mind, env)\n", + "mind.insert_codelet(gym_codelet)\n", + "\n", + "mind.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "One important detail is that the GymCodelet always runs in the [Publisher-Subscriber](https://h-iaac.github.io/CST-Python/_build/html/_examples/Publisher-Subscriber.html) mode.\n", + "\n", + "It creates two important memories for starting the environment: the seed memory and the reset memory.\n", + "\n", + "We gonna set the environment seed to 42 to exemplify how it works, and restart the environment: " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.seed_memory.set_info(42)\n", + "gym_codelet.reset_memory.set_info(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we look the observation memories, we gonna see a single memory with the environment provided observation, a tuple with the player current sum, dealer showing card value and usable ace:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "tags": [ + "observation0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'observation': MemoryObject [idmemoryobject=0, timestamp=1732730372039, evaluation=0.0, I=(15, 2, 0), name=observation]}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "tags": [ + "observation1" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(15, 2, 0)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories[\"observation\"].get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The step count memory shows the steps since the episode start:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "tags": [ + "step_count" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.step_count_memory.get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The action memories also contains a single \"action\" memory:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "tags": [ + "action0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'action': MemoryObject [idmemoryobject=1, timestamp=1732730372025, evaluation=0.0, I=1, name=action]}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We gonna set it to `1` for a hit." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"action\"].set_info(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When the action memory changes, the codelet executes a step in the environment. We can see that the step count and observation changes:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "tags": [ + "step_count+observation0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1, (25, 2, 0))" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.step_count_memory.get_info(), gym_codelet.observation_memories[\"observation\"].get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we busted, the environment terminated:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "tags": [ + "terminated0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.terminated_memory.get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And the step reward is -1 as we lost:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "tags": [ + "reward0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "-1.0" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.reward_memory.get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We gonna start a new episode. Observes that the codelet resets the environment each time the reset memory timestamp changes, even if the content is the same. The first observation is the same as before, since we setted the environment seed:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "tags": [ + "observation2" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(15, 2, 0)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.reset_memory.set_info(True)\n", + "gym_codelet.observation_memories[\"observation\"].get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This time, we gonna choose to stick:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "tags": [ + "observation3" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(15, 2, 0)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"action\"].set_info(0)\n", + "gym_codelet.observation_memories[\"observation\"].get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And we won this game:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "tags": [ + "terminated+reward0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(True, 1.0)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.terminated_memory.get_info(), gym_codelet.reward_memory.get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dict Spaces\n", + "\n", + "So far, we have used the codelet to map all observations in the environment to a single memory with a generic name. However, if the environment has observation and action spaces of type Dict, the Codelet will map each observation and each action to a specific memory.\n", + "\n", + "Let's see this." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "env = gym.make(\"Blackjack-v1\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Different from before, we will use TransformObservation and TransformAction to transform the original observations and actions into Dict Spaces:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "env = TransformObservation(env, \n", + " lambda obs:{\"player_sum\":obs[0], \"dealer_card\":obs[1], \"usable_ace\":obs[2]}, \n", + " gym.spaces.Dict({\"player_sum\":env.observation_space[0], \"dealer_card\":env.observation_space[1], \"usable_ace\":env.observation_space[2]}))\n", + "\n", + "env = TransformAction(env, \n", + " lambda action:action[\"hit\"], \n", + " gym.spaces.Dict({\"hit\":env.action_space}))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's create and start the agent and environment just like before:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mind = cst.Mind()\n", + "gym_codelet = GymCodelet(mind, env)\n", + "mind.insert_codelet(gym_codelet)\n", + "\n", + "mind.start()\n", + "\n", + "gym_codelet.seed_memory.set_info(42)\n", + "gym_codelet.reset_memory.set_info(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This time, we can see that the observation memories changed, with a single memory for each observation:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "tags": [ + "observation4" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732730372367, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732730372367, evaluation=0.0, I=15, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732730372367, evaluation=0.0, I=0, name=usable_ace]}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "tags": [ + "observation5" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dealer_card': 2, 'player_sum': 15, 'usable_ace': 0}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{memory_name:gym_codelet.observation_memories[memory_name].get_info() for memory_name in gym_codelet.observation_memories}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The action memory also changed it's name:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "tags": [ + "action1" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'hit': MemoryObject [idmemoryobject=3, timestamp=1732730372365, evaluation=0.0, I=0, name=hit]}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Just like before, we choose to stick:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"hit\"].set_info(0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And won:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "tags": [ + "terminated+reward1" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(True, 1.0)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.terminated_memory.get_info(), gym_codelet.reward_memory.get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next steps\n", + "\n", + "The idea is not to use the Codelet to manually interface with the environment like this example, but to create a cognitive architecture to perform the environment's task.\n", + "\n", + "Another possibility is to combine GymCodelet with MemoryStorage to use gym environments with a remote cognitive agent or in CST-Java." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/README.md b/examples/README.md index 2116e3e..621060e 100644 --- a/examples/README.md +++ b/examples/README.md @@ -5,4 +5,5 @@ Here we have some examples of how to use the CST-Python: - [Introduction to CST-Python](https://h-iaac.github.io/CST-Python/_build/html/_examples/Introduction%20to%20CST-Python.html): what is CST-Python, and basics about how to use it. - [Implementing a Architecture](https://h-iaac.github.io/CST-Python/_build/html/_examples/Implementing%20a%20Architecture.html): how to implement a cognitive architecture using CST-Python. - [Publisher-Subscriber](https://h-iaac.github.io/CST-Python/_build/html/_examples/Publisher-Subscriber.html): using the publisher-subscriber mechanism for synchronous codelets. -- [Activation and Monitoring](https://h-iaac.github.io/CST-Python/_build/html/_examples/Activation%20and%20Monitoring.html): using codelet's activation value and monitoring the agent. \ No newline at end of file +- [Activation and Monitoring](https://h-iaac.github.io/CST-Python/_build/html/_examples/Activation%20and%20Monitoring.html): using codelet's activation value and monitoring the agent. +- [Gymnasium Integration](https://h-iaac.github.io/CST-Python/_build/html/_examples/Gymnasium%20Integration.html): using gymnasium environments with CST. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 0f8634c..f5c97c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ Homepage = "https://hiaac.unicamp.br" tests = ["mypy", "testbook", "ipython", "ipykernel", "numpy", "matplotlib"] doc_generation = ["sphinx", "sphinx_rtd_theme", "nbsphinx", "sphinx-mdinclude==0.5.4"] dev = ["cffconvert"] +gym = ["gymnasium"] [tool.setuptools] include-package-data = true diff --git a/src/cst_python/python/gym/__init__.py b/src/cst_python/python/gym/__init__.py new file mode 100644 index 0000000..2f2b3d4 --- /dev/null +++ b/src/cst_python/python/gym/__init__.py @@ -0,0 +1 @@ +from .gym_codelet import GymCodelet \ No newline at end of file diff --git a/src/cst_python/python/gym/gym_codelet.py b/src/cst_python/python/gym/gym_codelet.py new file mode 100644 index 0000000..3c99e50 --- /dev/null +++ b/src/cst_python/python/gym/gym_codelet.py @@ -0,0 +1,278 @@ +from typing import Optional, Any, cast, Mapping + +try: + import gymnasium as gym +except ModuleNotFoundError: + import gym # type: ignore + +from cst_python.core.entities import Codelet, Mind, Memory, MemoryObject + +class GymCodelet(Codelet): + ''' + Codelet to interface with gymnasium/gym environments. Creates memories for the observation, + action, reward, reset, terminated, truncated, info and seed; and updates them stepping the + environment with the action. + ''' + + _last_indexes : dict[str, int] = {"reward":-1, "reset":-1, + "terminated":-1, "truncated":-1, + "info":-1, "seed":-1, + "step_count":-1} + + def __init__(self, mind:Mind, env:gym.Env): + ''' + GymCodelet constructor. + + Always runs automatically in publish-subscribe mode. + + Args: + mind (Mind): agent's mind. + env (gym.Env): environment to interface. + ''' + super().__init__() + + assert mind._raw_memory is not None # RawMemory cannot be None for creating memories + + self.env = env + + self.observation_memories = self.space_to_memories(mind, env.observation_space) + self.action_memories = self.space_to_memories(mind, env.action_space, action=True) + + self._common_memories : dict[str, MemoryObject] = {} + for name in ["reward", "reset", "terminated", "truncated", "info", "seed", "step_count"]: + self._last_indexes[name] += 1 + + memory_name = name + if self._last_indexes[name] != 0: + memory_name += str(self._last_indexes[name]) + + self._common_memories[name] = cast(MemoryObject, mind.create_memory_object(memory_name)) + + self._common_memories["reward"].set_info(0.0) + self._common_memories["reset"].set_info(False) + self._common_memories["terminated"].set_info(False) + self._common_memories["truncated"].set_info(False) + self._common_memories["info"].set_info({}) + self._common_memories["seed"].set_info(None) + self._common_memories["step_count"].set_info(0) + + + self.is_memory_observer = True + for memory_name in self.action_memories: + memory = self.action_memories[memory_name] + memory.add_memory_observer(self) + self._common_memories["reset"].add_memory_observer(self) + + self._last_reset = 0 + + @property + def reward_memory(self) -> MemoryObject: + ''' + Memory that contains the environment reward (float). + ''' + return self._common_memories["reward"] + + @property + def reset_memory(self) -> MemoryObject: + ''' + Memory that contains the environment reset. + + If timestamp changes, the codelet resets the environment. + ''' + return self._common_memories["reset"] + + @property + def terminated_memory(self) -> MemoryObject: + ''' + Memory that contains the environment terminated state. + ''' + return self._common_memories["terminated"] + + @property + def truncated_memory(self) -> MemoryObject: + ''' + Memory that contains the environment truncated state. + ''' + return self._common_memories["truncated"] + + @property + def info_memory(self) -> MemoryObject: + ''' + Memory that contains the environment info. + ''' + return self._common_memories["info"] + + @property + def seed_memory(self) -> MemoryObject: + ''' + Memory that contains the seed to use in the environment reset. + ''' + return self._common_memories["seed"] + + @property + def step_count_memory(self) -> MemoryObject: + ''' + Memory that contains the step count for the current environment + episode. + ''' + return self._common_memories["step_count"] + + def access_memory_objects(self) -> None: #NOSONAR + pass + + def calculate_activation(self) -> None: #NOSONAR + pass + + def proc(self) -> None: + if self._last_reset < self.reset_memory.get_timestamp(): + self._last_reset = self.reset_memory.get_timestamp() + + observation, info = self.env.reset(seed=self.seed_memory.get_info()) + reward = 0.0 + terminated = False + truncated = False + step_count = 0 + + else: + action = self.memories_to_space(self.action_memories, self.env.action_space) + observation, r, terminated, truncated, info = self.env.step(action) + reward = float(r) #SupportsFloat to float + + step_count = self.step_count_memory.get_info()+1 + + self.reward_memory.set_info(reward) + self.terminated_memory.set_info(terminated) + self.truncated_memory.set_info(truncated) + self.info_memory.set_info(info) + self.step_count_memory.set_info(step_count) + + self.sample_to_memories(observation, self.observation_memories) + + @classmethod + def reset_indexes(cls) -> None: + ''' + Reset the indexes for setting the sufix of new memories. + ''' + cls._last_indexes : dict[str, int] = {"reward":-1, "reset":-1, + "terminated":-1, "truncated":-1, + "info":-1, "seed":-1, + "step_count":-1} + + @classmethod + def space_to_memories(cls, mind:Mind, + space:gym.Space, + action:bool=False, + memory_prefix:Optional[str]=None) -> dict[str, MemoryObject]: + ''' + Creates memories from a gym Space definition. + + Args: + mind (Mind): mind to create the memories. + space (gym.Space): space defining the memories to create. + If gym.space.Dict, creates a memory for each element, + creates a single memory otherwise. + action (bool, optional): If True, creates a memory with 'action' + name for non Dict space, uses 'observation' name otherwise. + Defaults to False. + memory_prefix (Optional[str], optional): prefix to memories name. + Defaults to None. + + Returns: + dict[str, MemoryObject]: created memories, indexed by the space + element name or 'action'/'observation'. + ''' + assert mind._raw_memory is not None # RawMemory cannot be None for creating memories + + if memory_prefix is None: + memory_prefix = "" + + memories : dict[str, MemoryObject] = {} + + if isinstance(space, gym.spaces.Dict): + for space_name in space: + subspace = space[space_name] + + name = space_name + if space_name in cls._last_indexes: + cls._last_indexes[space_name] += 1 + name += str(cls._last_indexes[space_name]) + else: + cls._last_indexes[space_name] = 0 + name = memory_prefix+name + + info = subspace.sample() + memory = cast(MemoryObject, mind.create_memory_object(name, info)) + memories[space_name] = memory + + else: + if action: + space_name = "action" + else: + space_name = "observation" + + name = space_name + if space_name in cls._last_indexes: + cls._last_indexes[space_name] += 1 + name += str(cls._last_indexes[space_name]) + else: + cls._last_indexes[space_name] = 0 + + name = memory_prefix+name + + info = space.sample() + memory = cast(MemoryObject, mind.create_memory_object(name, info)) + memories[space_name] = memory + + + return memories + + @classmethod + def sample_to_memories(cls, sample:Mapping[str, Any]|Any, + memories:Mapping[str, Memory]) -> None: + ''' + Writes a gym.Space sample to memories. + + Args: + sample (Mapping[str, Any] | Any): sample to write in the memories. + memories (Mapping[str, Memory]): memories corresponding to + the space elements. + ''' + if isinstance(sample, dict): + for name in sample: + element = sample[name] + memory = memories[name] + + memory.set_info(element) + else: + memory = memories[next(iter(memories))] + memory.set_info(sample) + + + @classmethod + def memories_to_space(cls, memories:Mapping[str, Memory], + space:gym.Space) -> dict[str, Any]|Any: + ''' + Convert the memories info to the space sample. + + Args: + memories (Mapping[str, Memory]): memories to get the sample. + space (gym.Space): space the sample belongs + + Raises: + ValueError: if the generated sample from the memories + doesn't belongs to the space + + Returns: + dict[str, Any]|Any: converted sample. + ''' + if isinstance(space, gym.spaces.Dict): + sample = {} + for memory_name in memories: + sample[memory_name] = memories[memory_name].get_info() + else: + sample = memories[next(iter(memories))].get_info() + + if not space.contains(sample): + raise ValueError("Memories do not correspond to an element of the Space.") + + return sample \ No newline at end of file diff --git a/tests/cst_python/python/__init__.py b/tests/cst_python/python/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cst_python/python/gym/__init__.py b/tests/cst_python/python/gym/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cst_python/python/gym/test_gym_codelet.py b/tests/cst_python/python/gym/test_gym_codelet.py new file mode 100644 index 0000000..5651016 --- /dev/null +++ b/tests/cst_python/python/gym/test_gym_codelet.py @@ -0,0 +1,124 @@ +from contextlib import redirect_stdout +import math +import unittest +import time +import threading +import io + +import gymnasium as gym +from gymnasium.spaces import Box, Dict +import numpy as np +from numpy.testing import assert_array_almost_equal + +from cst_python import MemoryObject, Mind +from cst_python.python.gym import GymCodelet + +class TestGymCodelet(unittest.TestCase): + def setUp(self) -> None: + ... + + def test_space_to_memories(self) -> None: + space = Box(-1, 1, (2,)) + mind = Mind() + + GymCodelet.reset_indexes() + + memories = GymCodelet.space_to_memories(mind, space) + keys = list(memories.keys()) + assert len(keys) == 1 + assert keys[0] == "observation" + memory = memories[keys[0]] + assert memory.get_name() == "observation" + assert space.contains(memory.get_info()) + + memories = GymCodelet.space_to_memories(mind, space) + memory = memories[next(iter(memories))] + assert memory.get_name() == "observation1" + + space = Dict({"x":Box(-1, 1, (2,)), "y":Box(-2, 1, (1,))}) + memories = GymCodelet.space_to_memories(mind, space) + keys = list(memories.keys()) + assert len(keys) == 2 + assert "x" in keys + assert "y" in keys + assert memories["x"].get_name() == "x" + assert memories["y"].get_name() == "y" + + memories = GymCodelet.space_to_memories(mind, space) + keys = list(memories.keys()) + assert len(keys) == 2 + assert "x" in keys + assert "y" in keys + assert memories["x"].get_name() == "x1" + assert memories["y"].get_name() == "y1" + + def test_sample_to_memories(self) -> None: + space = Box(-1, 1, (2,)) + sample = space.sample() + memories = {"observation":MemoryObject()} + + GymCodelet.sample_to_memories(sample, memories) + + assert_array_almost_equal(memories["observation"].get_info(), sample) + + + space = Dict({"x":Box(-1, 1, (2,)), "y":Box(-2, 1, (1,))}) + sample = space.sample() + memories = {"x":MemoryObject(), "y":MemoryObject()} + + GymCodelet.sample_to_memories(sample, memories) + + assert_array_almost_equal(memories["x"].get_info(), sample["x"]) + assert_array_almost_equal(memories["y"].get_info(), sample["y"]) + + def test_memories_to_space(self) -> None: + space = Box(-1, 1, (2,)) + sample = space.sample() + memories = {"observation":MemoryObject()} + memories["observation"].set_info(sample) + + reconstruced_sample = GymCodelet.memories_to_space(memories, space) + assert space.contains(reconstruced_sample) + assert_array_almost_equal(reconstruced_sample, sample) + + space = Dict({"x":Box(-1, 1, (2,)), "y":Box(-2, 1, (1,))}) + sample = space.sample() + memories = {"x":MemoryObject(), "y":MemoryObject()} + memories["x"].set_info(sample["x"]) + memories["y"].set_info(sample["y"]) + + reconstruced_sample = GymCodelet.memories_to_space(memories, space) + assert space.contains(reconstruced_sample) + assert_array_almost_equal(reconstruced_sample["x"], sample["x"]) + assert_array_almost_equal(reconstruced_sample["y"], sample["y"]) + + def test_episode(self) -> None: + env = gym.make("MountainCar-v0") + mind = Mind() + gym_codelet = GymCodelet(mind, env) + + mind.start() + + assert gym_codelet.step_count_memory.get_info() == 0 + gym_codelet.reset_memory.set_info(True) + assert gym_codelet.step_count_memory.get_info() == 0 + gym_codelet.action_memories["action"].set_info(1) + assert gym_codelet.step_count_memory.get_info() == 1 + gym_codelet.action_memories["action"].set_info(1) + assert gym_codelet.step_count_memory.get_info() == 2 + time.sleep(1e-3) #Minimum time for memory timestamp comparation is 1 ms + gym_codelet.reset_memory.set_info(True) + assert gym_codelet.step_count_memory.get_info() == 0 + + def test_env_memories(self) -> None: + env = gym.make("Blackjack-v1") + mind = Mind() + gym_codelet = GymCodelet(mind, env) + + assert len(gym_codelet.observation_memories) == 1 + assert "observation" in gym_codelet.observation_memories + assert env.observation_space.contains(gym_codelet.observation_memories["observation"].get_info()) + + assert len(gym_codelet.action_memories) == 1 + assert "action" in gym_codelet.action_memories + assert env.action_space.contains(gym_codelet.action_memories["action"].get_info()) \ No newline at end of file diff --git a/tests/examples/test_gym_integration.py b/tests/examples/test_gym_integration.py new file mode 100644 index 0000000..f1f7e55 --- /dev/null +++ b/tests/examples/test_gym_integration.py @@ -0,0 +1,45 @@ +import os +import re + +from testbook import testbook +from testbook.client import TestbookNotebookClient + +from ..utils import get_examples_path + +examples_path = get_examples_path() + +@testbook(os.path.join(examples_path, "Gymnasium Integration.ipynb"), execute=True) +def test_gym_integration(tb :TestbookNotebookClient): + + expected_result = {"observation0":"{'observation': MemoryObject [idmemoryobject=0, timestamp=, evaluation=0.0, I=(15, 2, 0), name=observation]}", + "observation1":"(15, 2, 0)", + "step_count":"0", + "action0":"{'action': MemoryObject [idmemoryobject=1, timestamp=, evaluation=0.0, I=, name=action]}", + "step_count+observation0":"(1, (25, 2, 0))", + "terminated0":"True", + "reward0":"-1.0", + "observation2":"(15, 2, 0)", + "observation3":"(15, 2, 0)", + "terminated+reward0":"(True, 1.0)", + + "observation4":'''{'dealer_card': MemoryObject [idmemoryobject=0, timestamp=, evaluation=0.0, I=2, name=dealer_card], + 'player_sum': MemoryObject [idmemoryobject=1, timestamp=, evaluation=0.0, I=15, name=player_sum], + 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=, evaluation=0.0, I=0, name=usable_ace]}''', + + "observation5":"{'dealer_card': 2, 'player_sum': 15, 'usable_ace': 0}", + "action1":"{'hit': MemoryObject [idmemoryobject=3, timestamp=, evaluation=0.0, I=, name=hit]}", + "terminated+reward1":"(True, 1.0)" + } + + clear_info = ["action0", "action1"] + + for tag in expected_result: + result = tb.cell_output_text(tag) + result = re.sub(r"timestamp=[0-9]+", "timestamp=", result) + + if tag in clear_info: + result = re.sub(r"I=[0-9]+", "I=", result) + + assert result == expected_result[tag] + +