From da47ccbe81dcd345d47a29cfc9f25f78c62698b4 Mon Sep 17 00:00:00 2001 From: Elton Cardoso do Nascimento <43186596+EltonCN@users.noreply.github.com> Date: Tue, 26 Nov 2024 19:30:08 -0300 Subject: [PATCH 1/6] Gym experiment --- dev/Gym codelet.ipynb | 548 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 548 insertions(+) create mode 100644 dev/Gym codelet.ipynb diff --git a/dev/Gym codelet.ipynb b/dev/Gym codelet.ipynb new file mode 100644 index 0000000..8ec8e5e --- /dev/null +++ b/dev/Gym codelet.ipynb @@ -0,0 +1,548 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Optional, Any\n", + "\n", + "import gymnasium as gym\n", + "from gymnasium.wrappers import TransformAction, TransformObservation\n", + "\n", + "import cst_python as cst\n", + "from cst_python.core.entities import Memory, MemoryObject" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class GymCodelet(cst.Codelet):\n", + " _last_indexes = {\"reward\":-1, \"reset\":-1, \"terminated\":-1, \"truncated\":-1, \"info\":-1, \"seed\":-1}\n", + "\n", + " def __init__(self, mind:cst.Mind, env:gym.Env):\n", + " super().__init__()\n", + " \n", + " self.env = env\n", + " \n", + " self.observation_memories = self.space_to_memories(mind, env.observation_space)\n", + " self.action_memories = self.space_to_memories(mind, env.action_space, action=True)\n", + "\n", + " self._common_memories : dict[str, MemoryObject] = {}\n", + " for name in [\"reward\", \"reset\", \"terminated\", \"truncated\", \"info\", \"seed\"]:\n", + " self._last_indexes[name] += 1\n", + "\n", + " memory_name = name\n", + " if self._last_indexes[name] != 0:\n", + " memory_name += str(self._last_indexes[name])\n", + " \n", + " self._common_memories[name] = mind.create_memory_object(memory_name)\n", + "\n", + " self._common_memories[\"reward\"].set_info(0.0)\n", + " self._common_memories[\"reset\"].set_info(False)\n", + " self._common_memories[\"terminated\"].set_info(False)\n", + " self._common_memories[\"truncated\"].set_info(False)\n", + " self._common_memories[\"info\"].set_info({})\n", + " self._common_memories[\"seed\"].set_info(None)\n", + "\n", + "\n", + " self.is_memory_observer = True\n", + " for memory_name in self.action_memories:\n", + " memory = self.action_memories[memory_name]\n", + " memory.add_memory_observer(self)\n", + " self._common_memories[\"reset\"].add_memory_observer(self)\n", + "\n", + " self._last_reset = self._common_memories[\"reset\"].get_timestamp()\n", + "\n", + " @property\n", + " def reward_memory(self) -> MemoryObject:\n", + " return self._common_memories[\"reward\"]\n", + " \n", + " @property\n", + " def reset_memory(self) -> MemoryObject:\n", + " return self._common_memories[\"reset\"]\n", + " \n", + " @property\n", + " def terminated_memory(self) -> MemoryObject:\n", + " return self._common_memories[\"terminated\"]\n", + " \n", + " @property\n", + " def truncated_memory(self) -> MemoryObject:\n", + " return self._common_memories[\"truncated\"]\n", + " \n", + " @property\n", + " def info_memory(self) -> MemoryObject:\n", + " return self._common_memories[\"info\"]\n", + " \n", + " @property\n", + " def seed_memory(self) -> MemoryObject:\n", + " return self._common_memories[\"seed\"]\n", + "\n", + " def access_memory_objects(self):\n", + " pass\n", + "\n", + " def calculate_activation(self):\n", + " pass\n", + "\n", + " def proc(self):\n", + " if self._last_reset < self.reset_memory.get_timestamp():\n", + " self._last_reset = self.reset_memory.get_timestamp()\n", + "\n", + " observation, info = self.env.reset(seed=self.seed_memory.get_info())\n", + " reward = 0\n", + " terminated = False\n", + " truncated = False\n", + "\n", + " else:\n", + " action = self.memories_to_space(self.action_memories, self.env.action_space)\n", + " observation, reward, terminated, truncated, info = self.env.step(action)\n", + "\n", + " print(\"Observation\", observation)\n", + " \n", + " self.reward_memory.set_info(reward)\n", + " self.terminated_memory.set_info(terminated)\n", + " self.truncated_memory.set_info(truncated)\n", + " self.info_memory.set_info(info)\n", + "\n", + " self.sample_to_memories(observation, self.observation_memories)\n", + "\n", + " @classmethod\n", + " def space_to_memories(cls, mind:cst.Mind, \n", + " space:gym.Space,\n", + " action:bool=False) -> dict[str, cst.MemoryObject]:\n", + " memories = {}\n", + "\n", + " if isinstance(space, gym.spaces.Dict):\n", + " for space_name in space:\n", + " subspace = space[space_name]\n", + "\n", + " name = space_name\n", + " if space_name in cls._last_indexes:\n", + " cls._last_indexes[space_name] += 1\n", + " name += str(cls._last_indexes[space_name])\n", + " else:\n", + " cls._last_indexes[space_name] = 0\n", + "\n", + " info = subspace.sample()\n", + " memory = mind.create_memory_object(name, info)\n", + " memories[space_name] = memory\n", + " \n", + " else:\n", + " if action:\n", + " space_name = \"action\"\n", + " else:\n", + " space_name = \"observation\"\n", + "\n", + " name = space_name\n", + " if space_name in cls._last_indexes:\n", + " cls._last_indexes[space_name] += 1\n", + " name += str(cls._last_indexes[space_name])\n", + " else:\n", + " cls._last_indexes[space_name] = 0\n", + "\n", + " info = space.sample()\n", + " memory = mind.create_memory_object(name, info)\n", + " memories[space_name] = memory\n", + " \n", + "\n", + " return memories\n", + " \n", + " @classmethod\n", + " def sample_to_memories(cls, sample:dict[str, Any]|Any, memories:dict[str, Memory]) -> None:\n", + " if isinstance(sample, dict):\n", + " for name in sample:\n", + " element = sample[name]\n", + " memory = memories[name]\n", + " \n", + " memory.set_info(element)\n", + " else:\n", + " memory = memories[next(iter(memories))]\n", + " memory.set_info(sample)\n", + " \n", + "\n", + " @classmethod\n", + " def memories_to_space(cls, memories:dict[str, Memory], space:gym.spaces.Dict) -> dict[str, Any]|Any:\n", + " if isinstance(space, gym.spaces.Dict):\n", + " sample = {}\n", + " for memory_name in memories:\n", + " sample[memory_name] = memories[memory_name].get_info()\n", + " else:\n", + " sample = memories[next(iter(memories))].get_info()\n", + "\n", + " if not space.contains(sample):\n", + " raise ValueError(\"Memories do not correspond to an element of the Space.\")\n", + " \n", + " return sample" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "env = gym.make(\"Blackjack-v1\")\n", + "\n", + "env = TransformObservation(env, \n", + " lambda obs:{\"player_sum\":obs[0], \"dealer_card\":obs[1], \"usable_ace\":obs[2]}, \n", + " gym.spaces.Dict({\"player_sum\":env.observation_space[0], \"dealer_card\":env.observation_space[1], \"usable_ace\":env.observation_space[2]}))\n", + "\n", + "env = TransformAction(env, \n", + " lambda action:action[\"hit\"], \n", + " gym.spaces.Dict({\"hit\":env.action_space}))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GymCodelet execution\n" + ] + }, + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mind = cst.Mind()\n", + "gym_codelet = GymCodelet(mind, env)\n", + "mind.insert_codelet(gym_codelet)\n", + "\n", + "mind.start()\n", + "gym_codelet.seed_memory.set_info(42)\n", + "gym_codelet.reset_memory.set_info(not gym_codelet.reset_memory.get_info())" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732659816658, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732659816658, evaluation=0.0, I=15, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732659816658, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732659816658, evaluation=0.0, I=False, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732659816658, evaluation=0.0, I=0, name=reward])" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Observation {'player_sum': 25, 'dealer_card': 2, 'usable_ace': 0}\n", + "GymCodelet execution\n" + ] + }, + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"hit\"].set_info(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732659816687, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732659816687, evaluation=0.0, I=25, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732659816687, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732659816687, evaluation=0.0, I=True, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732659816687, evaluation=0.0, I=-1.0, name=reward])" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GymCodelet execution\n" + ] + }, + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.reset_memory.set_info(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732659816736, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732659816736, evaluation=0.0, I=15, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732659816736, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732659816736, evaluation=0.0, I=False, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732659816736, evaluation=0.0, I=0, name=reward])" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Observation {'player_sum': 15, 'dealer_card': 2, 'usable_ace': 0}\n", + "GymCodelet execution\n" + ] + }, + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"hit\"].set_info(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732659816814, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732659816814, evaluation=0.0, I=15, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732659816814, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732659816814, evaluation=0.0, I=True, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732659816814, evaluation=0.0, I=1.0, name=reward])" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GymCodelet execution\n" + ] + }, + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "env = gym.make(\"Blackjack-v1\")\n", + "mind = cst.Mind()\n", + "\n", + "gym_codelet = GymCodelet(mind, env)\n", + "mind.insert_codelet(gym_codelet)\n", + "\n", + "mind.start()\n", + "gym_codelet.seed_memory.set_info(42)\n", + "gym_codelet.reset_memory.set_info(not gym_codelet.reset_memory.get_info())" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'observation': MemoryObject [idmemoryobject=0, timestamp=1732659816913, evaluation=0.0, I=(15, 2, 0), name=observation]},\n", + " MemoryObject [idmemoryobject=4, timestamp=1732659816913, evaluation=0.0, I=False, name=terminated1],\n", + " MemoryObject [idmemoryobject=2, timestamp=1732659816913, evaluation=0.0, I=0, name=reward1])" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Observation (25, 2, 0)\n", + "GymCodelet execution\n" + ] + }, + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"action\"].set_info(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'observation': MemoryObject [idmemoryobject=0, timestamp=1732659816947, evaluation=0.0, I=(25, 2, 0), name=observation]},\n", + " MemoryObject [idmemoryobject=4, timestamp=1732659816947, evaluation=0.0, I=True, name=terminated1],\n", + " MemoryObject [idmemoryobject=2, timestamp=1732659816947, evaluation=0.0, I=-1.0, name=reward1])" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 6826ea4bd5e561d587fa3cea565e151a3800adbc Mon Sep 17 00:00:00 2001 From: Elton Cardoso do Nascimento <43186596+EltonCN@users.noreply.github.com> Date: Wed, 27 Nov 2024 13:59:41 -0300 Subject: [PATCH 2/6] GymCodelet integrated --- dev/Gym codelet.ipynb | 300 +++++------------------ src/cst_python/python/gym/__init__.py | 1 + src/cst_python/python/gym/gym_codelet.py | 255 +++++++++++++++++++ 3 files changed, 312 insertions(+), 244 deletions(-) create mode 100644 src/cst_python/python/gym/__init__.py create mode 100644 src/cst_python/python/gym/gym_codelet.py diff --git a/dev/Gym codelet.ipynb b/dev/Gym codelet.ipynb index 8ec8e5e..aab533e 100644 --- a/dev/Gym codelet.ipynb +++ b/dev/Gym codelet.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 56, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -12,176 +12,12 @@ "from gymnasium.wrappers import TransformAction, TransformObservation\n", "\n", "import cst_python as cst\n", - "from cst_python.core.entities import Memory, MemoryObject" + "from cst_python.python.gym import GymCodelet" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class GymCodelet(cst.Codelet):\n", - " _last_indexes = {\"reward\":-1, \"reset\":-1, \"terminated\":-1, \"truncated\":-1, \"info\":-1, \"seed\":-1}\n", - "\n", - " def __init__(self, mind:cst.Mind, env:gym.Env):\n", - " super().__init__()\n", - " \n", - " self.env = env\n", - " \n", - " self.observation_memories = self.space_to_memories(mind, env.observation_space)\n", - " self.action_memories = self.space_to_memories(mind, env.action_space, action=True)\n", - "\n", - " self._common_memories : dict[str, MemoryObject] = {}\n", - " for name in [\"reward\", \"reset\", \"terminated\", \"truncated\", \"info\", \"seed\"]:\n", - " self._last_indexes[name] += 1\n", - "\n", - " memory_name = name\n", - " if self._last_indexes[name] != 0:\n", - " memory_name += str(self._last_indexes[name])\n", - " \n", - " self._common_memories[name] = mind.create_memory_object(memory_name)\n", - "\n", - " self._common_memories[\"reward\"].set_info(0.0)\n", - " self._common_memories[\"reset\"].set_info(False)\n", - " self._common_memories[\"terminated\"].set_info(False)\n", - " self._common_memories[\"truncated\"].set_info(False)\n", - " self._common_memories[\"info\"].set_info({})\n", - " self._common_memories[\"seed\"].set_info(None)\n", - "\n", - "\n", - " self.is_memory_observer = True\n", - " for memory_name in self.action_memories:\n", - " memory = self.action_memories[memory_name]\n", - " memory.add_memory_observer(self)\n", - " self._common_memories[\"reset\"].add_memory_observer(self)\n", - "\n", - " self._last_reset = self._common_memories[\"reset\"].get_timestamp()\n", - "\n", - " @property\n", - " def reward_memory(self) -> MemoryObject:\n", - " return self._common_memories[\"reward\"]\n", - " \n", - " @property\n", - " def reset_memory(self) -> MemoryObject:\n", - " return self._common_memories[\"reset\"]\n", - " \n", - " @property\n", - " def terminated_memory(self) -> MemoryObject:\n", - " return self._common_memories[\"terminated\"]\n", - " \n", - " @property\n", - " def truncated_memory(self) -> MemoryObject:\n", - " return self._common_memories[\"truncated\"]\n", - " \n", - " @property\n", - " def info_memory(self) -> MemoryObject:\n", - " return self._common_memories[\"info\"]\n", - " \n", - " @property\n", - " def seed_memory(self) -> MemoryObject:\n", - " return self._common_memories[\"seed\"]\n", - "\n", - " def access_memory_objects(self):\n", - " pass\n", - "\n", - " def calculate_activation(self):\n", - " pass\n", - "\n", - " def proc(self):\n", - " if self._last_reset < self.reset_memory.get_timestamp():\n", - " self._last_reset = self.reset_memory.get_timestamp()\n", - "\n", - " observation, info = self.env.reset(seed=self.seed_memory.get_info())\n", - " reward = 0\n", - " terminated = False\n", - " truncated = False\n", - "\n", - " else:\n", - " action = self.memories_to_space(self.action_memories, self.env.action_space)\n", - " observation, reward, terminated, truncated, info = self.env.step(action)\n", - "\n", - " print(\"Observation\", observation)\n", - " \n", - " self.reward_memory.set_info(reward)\n", - " self.terminated_memory.set_info(terminated)\n", - " self.truncated_memory.set_info(truncated)\n", - " self.info_memory.set_info(info)\n", - "\n", - " self.sample_to_memories(observation, self.observation_memories)\n", - "\n", - " @classmethod\n", - " def space_to_memories(cls, mind:cst.Mind, \n", - " space:gym.Space,\n", - " action:bool=False) -> dict[str, cst.MemoryObject]:\n", - " memories = {}\n", - "\n", - " if isinstance(space, gym.spaces.Dict):\n", - " for space_name in space:\n", - " subspace = space[space_name]\n", - "\n", - " name = space_name\n", - " if space_name in cls._last_indexes:\n", - " cls._last_indexes[space_name] += 1\n", - " name += str(cls._last_indexes[space_name])\n", - " else:\n", - " cls._last_indexes[space_name] = 0\n", - "\n", - " info = subspace.sample()\n", - " memory = mind.create_memory_object(name, info)\n", - " memories[space_name] = memory\n", - " \n", - " else:\n", - " if action:\n", - " space_name = \"action\"\n", - " else:\n", - " space_name = \"observation\"\n", - "\n", - " name = space_name\n", - " if space_name in cls._last_indexes:\n", - " cls._last_indexes[space_name] += 1\n", - " name += str(cls._last_indexes[space_name])\n", - " else:\n", - " cls._last_indexes[space_name] = 0\n", - "\n", - " info = space.sample()\n", - " memory = mind.create_memory_object(name, info)\n", - " memories[space_name] = memory\n", - " \n", - "\n", - " return memories\n", - " \n", - " @classmethod\n", - " def sample_to_memories(cls, sample:dict[str, Any]|Any, memories:dict[str, Memory]) -> None:\n", - " if isinstance(sample, dict):\n", - " for name in sample:\n", - " element = sample[name]\n", - " memory = memories[name]\n", - " \n", - " memory.set_info(element)\n", - " else:\n", - " memory = memories[next(iter(memories))]\n", - " memory.set_info(sample)\n", - " \n", - "\n", - " @classmethod\n", - " def memories_to_space(cls, memories:dict[str, Memory], space:gym.spaces.Dict) -> dict[str, Any]|Any:\n", - " if isinstance(space, gym.spaces.Dict):\n", - " sample = {}\n", - " for memory_name in memories:\n", - " sample[memory_name] = memories[memory_name].get_info()\n", - " else:\n", - " sample = memories[next(iter(memories))].get_info()\n", - "\n", - " if not space.contains(sample):\n", - " raise ValueError(\"Memories do not correspond to an element of the Space.\")\n", - " \n", - " return sample" - ] - }, - { - "cell_type": "code", - "execution_count": 58, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -199,23 +35,16 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 4, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GymCodelet execution\n" - ] - }, { "data": { "text/plain": [ "-1" ] }, - "execution_count": 59, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -232,20 +61,20 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732659816658, evaluation=0.0, I=2, name=dealer_card],\n", - " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732659816658, evaluation=0.0, I=15, name=player_sum],\n", - " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732659816658, evaluation=0.0, I=0, name=usable_ace]},\n", - " MemoryObject [idmemoryobject=6, timestamp=1732659816658, evaluation=0.0, I=False, name=terminated],\n", - " MemoryObject [idmemoryobject=4, timestamp=1732659816658, evaluation=0.0, I=0, name=reward])" + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413462, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413462, evaluation=0.0, I=15, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413462, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732724413462, evaluation=0.0, I=False, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413462, evaluation=0.0, I=0, name=reward])" ] }, - "execution_count": 60, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -256,15 +85,14 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Observation {'player_sum': 25, 'dealer_card': 2, 'usable_ace': 0}\n", - "GymCodelet execution\n" + "Observation {'player_sum': 25, 'dealer_card': 2, 'usable_ace': 0}\n" ] }, { @@ -273,7 +101,7 @@ "-1" ] }, - "execution_count": 61, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -284,20 +112,20 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732659816687, evaluation=0.0, I=2, name=dealer_card],\n", - " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732659816687, evaluation=0.0, I=25, name=player_sum],\n", - " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732659816687, evaluation=0.0, I=0, name=usable_ace]},\n", - " MemoryObject [idmemoryobject=6, timestamp=1732659816687, evaluation=0.0, I=True, name=terminated],\n", - " MemoryObject [idmemoryobject=4, timestamp=1732659816687, evaluation=0.0, I=-1.0, name=reward])" + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413492, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413492, evaluation=0.0, I=25, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413492, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732724413492, evaluation=0.0, I=True, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413492, evaluation=0.0, I=-1.0, name=reward])" ] }, - "execution_count": 62, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -308,23 +136,16 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 8, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GymCodelet execution\n" - ] - }, { "data": { "text/plain": [ "-1" ] }, - "execution_count": 63, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -335,20 +156,20 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732659816736, evaluation=0.0, I=2, name=dealer_card],\n", - " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732659816736, evaluation=0.0, I=15, name=player_sum],\n", - " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732659816736, evaluation=0.0, I=0, name=usable_ace]},\n", - " MemoryObject [idmemoryobject=6, timestamp=1732659816736, evaluation=0.0, I=False, name=terminated],\n", - " MemoryObject [idmemoryobject=4, timestamp=1732659816736, evaluation=0.0, I=0, name=reward])" + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413554, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413554, evaluation=0.0, I=15, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413554, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732724413554, evaluation=0.0, I=False, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413554, evaluation=0.0, I=0, name=reward])" ] }, - "execution_count": 64, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -359,15 +180,14 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Observation {'player_sum': 15, 'dealer_card': 2, 'usable_ace': 0}\n", - "GymCodelet execution\n" + "Observation {'player_sum': 15, 'dealer_card': 2, 'usable_ace': 0}\n" ] }, { @@ -376,7 +196,7 @@ "-1" ] }, - "execution_count": 65, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -387,20 +207,20 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732659816814, evaluation=0.0, I=2, name=dealer_card],\n", - " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732659816814, evaluation=0.0, I=15, name=player_sum],\n", - " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732659816814, evaluation=0.0, I=0, name=usable_ace]},\n", - " MemoryObject [idmemoryobject=6, timestamp=1732659816814, evaluation=0.0, I=True, name=terminated],\n", - " MemoryObject [idmemoryobject=4, timestamp=1732659816814, evaluation=0.0, I=1.0, name=reward])" + "({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413580, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413580, evaluation=0.0, I=15, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413580, evaluation=0.0, I=0, name=usable_ace]},\n", + " MemoryObject [idmemoryobject=6, timestamp=1732724413580, evaluation=0.0, I=True, name=terminated],\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413580, evaluation=0.0, I=1.0, name=reward])" ] }, - "execution_count": 66, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -411,23 +231,16 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 12, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GymCodelet execution\n" - ] - }, { "data": { "text/plain": [ "-1" ] }, - "execution_count": 67, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -446,18 +259,18 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "({'observation': MemoryObject [idmemoryobject=0, timestamp=1732659816913, evaluation=0.0, I=(15, 2, 0), name=observation]},\n", - " MemoryObject [idmemoryobject=4, timestamp=1732659816913, evaluation=0.0, I=False, name=terminated1],\n", - " MemoryObject [idmemoryobject=2, timestamp=1732659816913, evaluation=0.0, I=0, name=reward1])" + "({'observation': MemoryObject [idmemoryobject=0, timestamp=1732724413609, evaluation=0.0, I=(15, 2, 0), name=observation]},\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413609, evaluation=0.0, I=False, name=terminated1],\n", + " MemoryObject [idmemoryobject=2, timestamp=1732724413609, evaluation=0.0, I=0, name=reward1])" ] }, - "execution_count": 68, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -468,15 +281,14 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Observation (25, 2, 0)\n", - "GymCodelet execution\n" + "Observation (25, 2, 0)\n" ] }, { @@ -485,7 +297,7 @@ "-1" ] }, - "execution_count": 69, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -496,18 +308,18 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "({'observation': MemoryObject [idmemoryobject=0, timestamp=1732659816947, evaluation=0.0, I=(25, 2, 0), name=observation]},\n", - " MemoryObject [idmemoryobject=4, timestamp=1732659816947, evaluation=0.0, I=True, name=terminated1],\n", - " MemoryObject [idmemoryobject=2, timestamp=1732659816947, evaluation=0.0, I=-1.0, name=reward1])" + "({'observation': MemoryObject [idmemoryobject=0, timestamp=1732724413632, evaluation=0.0, I=(25, 2, 0), name=observation]},\n", + " MemoryObject [idmemoryobject=4, timestamp=1732724413632, evaluation=0.0, I=True, name=terminated1],\n", + " MemoryObject [idmemoryobject=2, timestamp=1732724413632, evaluation=0.0, I=-1.0, name=reward1])" ] }, - "execution_count": 70, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } diff --git a/src/cst_python/python/gym/__init__.py b/src/cst_python/python/gym/__init__.py new file mode 100644 index 0000000..2f2b3d4 --- /dev/null +++ b/src/cst_python/python/gym/__init__.py @@ -0,0 +1 @@ +from .gym_codelet import GymCodelet \ No newline at end of file diff --git a/src/cst_python/python/gym/gym_codelet.py b/src/cst_python/python/gym/gym_codelet.py new file mode 100644 index 0000000..e702319 --- /dev/null +++ b/src/cst_python/python/gym/gym_codelet.py @@ -0,0 +1,255 @@ +from typing import Optional, Any, cast, Mapping + +try: + import gymnasium as gym +except ModuleNotFoundError: + import gym # type: ignore + +from cst_python.core.entities import Codelet, Mind, Memory, MemoryObject + +class GymCodelet(Codelet): + ''' + Codelet to interface with gymnasium/gym environments. Creates memories for the observation, + action, reward, reset, terminated, truncated, info and seed; and updates them stepping the + environment with the action. + ''' + + _last_indexes : dict[str, int] = {"reward":-1, "reset":-1, "terminated":-1, "truncated":-1, "info":-1, "seed":-1} + + def __init__(self, mind:Mind, env:gym.Env): + ''' + GymCodelet constructor. + + Always runs automatically in publish-subscribe mode. + + Args: + mind (Mind): agent's mind. + env (gym.Env): environment to interface. + ''' + super().__init__() + + assert mind._raw_memory is not None # RawMemory cannot be None for creating memories + + self.env = env + + self.observation_memories = self.space_to_memories(mind, env.observation_space) + self.action_memories = self.space_to_memories(mind, env.action_space, action=True) + + self._common_memories : dict[str, MemoryObject] = {} + for name in ["reward", "reset", "terminated", "truncated", "info", "seed"]: + self._last_indexes[name] += 1 + + memory_name = name + if self._last_indexes[name] != 0: + memory_name += str(self._last_indexes[name]) + + self._common_memories[name] = cast(MemoryObject, mind.create_memory_object(memory_name)) + + self._common_memories["reward"].set_info(0.0) + self._common_memories["reset"].set_info(False) + self._common_memories["terminated"].set_info(False) + self._common_memories["truncated"].set_info(False) + self._common_memories["info"].set_info({}) + self._common_memories["seed"].set_info(None) + + + self.is_memory_observer = True + for memory_name in self.action_memories: + memory = self.action_memories[memory_name] + memory.add_memory_observer(self) + self._common_memories["reset"].add_memory_observer(self) + + self._last_reset = self._common_memories["reset"].get_timestamp() + + @property + def reward_memory(self) -> MemoryObject: + ''' + Memory that contains the environment reward (float). + ''' + return self._common_memories["reward"] + + @property + def reset_memory(self) -> MemoryObject: + ''' + Memory that contains the environment reset. + + If timestamp changes, the codelet resets the environment. + ''' + return self._common_memories["reset"] + + @property + def terminated_memory(self) -> MemoryObject: + ''' + Memory that contains the environment terminated state. + ''' + return self._common_memories["terminated"] + + @property + def truncated_memory(self) -> MemoryObject: + ''' + Memory that contains the environment truncated state. + ''' + return self._common_memories["truncated"] + + @property + def info_memory(self) -> MemoryObject: + ''' + Memory that contains the environment info. + ''' + return self._common_memories["info"] + + @property + def seed_memory(self) -> MemoryObject: + ''' + Memory that contains the seed to use in the environment reset. + ''' + return self._common_memories["seed"] + + + def access_memory_objects(self) -> None: #NOSONAR + pass + + def calculate_activation(self) -> None: #NOSONAR + pass + + def proc(self) -> None: + if self._last_reset < self.reset_memory.get_timestamp(): + self._last_reset = self.reset_memory.get_timestamp() + + observation, info = self.env.reset(seed=self.seed_memory.get_info()) + reward = 0.0 + terminated = False + truncated = False + + else: + action = self.memories_to_space(self.action_memories, self.env.action_space) + observation, r, terminated, truncated, info = self.env.step(action) + reward = float(r) #SupportsFloat to float + + print("Observation", observation) + + self.reward_memory.set_info(reward) + self.terminated_memory.set_info(terminated) + self.truncated_memory.set_info(truncated) + self.info_memory.set_info(info) + + self.sample_to_memories(observation, self.observation_memories) + + @classmethod + def space_to_memories(cls, mind:Mind, + space:gym.Space, + action:bool=False, + memory_prefix:Optional[str]=None) -> dict[str, MemoryObject]: + ''' + Creates memories from a gym Space definition. + + Args: + mind (Mind): mind to create the memories. + space (gym.Space): space defining the memories to create. + If gym.space.Dict, creates a memory for each element, + creates a single memory otherwise. + action (bool, optional): If True, creates a memory with 'action' + name for non Dict space, uses 'observation' name otherwise. + Defaults to False. + memory_prefix (Optional[str], optional): prefix to memories name. + Defaults to None. + + Returns: + dict[str, MemoryObject]: created memories, indexed by the space + element name or 'action'/'observation'. + ''' + assert mind._raw_memory is not None # RawMemory cannot be None for creating memories + + if memory_prefix is None: + memory_prefix = "" + + memories : dict[str, MemoryObject] = {} + + if isinstance(space, gym.spaces.Dict): + for space_name in space: + subspace = space[space_name] + + name = space_name + if space_name in cls._last_indexes: + cls._last_indexes[space_name] += 1 + name += str(cls._last_indexes[space_name]) + else: + cls._last_indexes[space_name] = 0 + name = memory_prefix+name + + info = subspace.sample() + memory = cast(MemoryObject, mind.create_memory_object(name, info)) + memories[space_name] = memory + + else: + if action: + space_name = "action" + else: + space_name = "observation" + + name = space_name + if space_name in cls._last_indexes: + cls._last_indexes[space_name] += 1 + name += str(cls._last_indexes[space_name]) + else: + cls._last_indexes[space_name] = 0 + + name = memory_prefix+name + + info = space.sample() + memory = cast(MemoryObject, mind.create_memory_object(name, info)) + memories[space_name] = memory + + + return memories + + @classmethod + def sample_to_memories(cls, sample:Mapping[str, Any]|Any, + memories:Mapping[str, Memory]) -> None: + ''' + Writes a gym.Space sample to memories. + + Args: + sample (Mapping[str, Any] | Any): sample to write in the memories. + memories (Mapping[str, Memory]): memories corresponding to + the space elements. + ''' + if isinstance(sample, dict): + for name in sample: + element = sample[name] + memory = memories[name] + + memory.set_info(element) + else: + memory = memories[next(iter(memories))] + memory.set_info(sample) + + + @classmethod + def memories_to_space(cls, memories:Mapping[str, Memory], + space:gym.Space) -> dict[str, Any]|Any: + ''' + Convert the memories info to the space sample. + + Args: + memories (Mapping[str, Memory]): memories to get the sample. + space (gym.Space): space the sample belongs + + Raises: + ValueError: if the generated sample from the memories + doesn't belongs to the space + + Returns: + dict[str, Any]|Any: converted sample. + ''' + if isinstance(space, gym.spaces.Dict): + sample = {} + for memory_name in memories: + sample[memory_name] = memories[memory_name].get_info() + else: + sample = memories[next(iter(memories))].get_info() + + if not space.contains(sample): + raise ValueError("Memories do not correspond to an element of the Space.") + + return sample \ No newline at end of file From 13875fb1bc73517dc04cd1693be69ca914a6d790 Mon Sep 17 00:00:00 2001 From: Elton Cardoso do Nascimento <43186596+EltonCN@users.noreply.github.com> Date: Wed, 27 Nov 2024 13:59:54 -0300 Subject: [PATCH 3/6] Added "gym" dependencies --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 0f8634c..f5c97c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ Homepage = "https://hiaac.unicamp.br" tests = ["mypy", "testbook", "ipython", "ipykernel", "numpy", "matplotlib"] doc_generation = ["sphinx", "sphinx_rtd_theme", "nbsphinx", "sphinx-mdinclude==0.5.4"] dev = ["cffconvert"] +gym = ["gymnasium"] [tool.setuptools] include-package-data = true From 8d9befd72a94e96845d0e94503ff369574f247c7 Mon Sep 17 00:00:00 2001 From: Elton Cardoso do Nascimento <43186596+EltonCN@users.noreply.github.com> Date: Wed, 27 Nov 2024 15:00:11 -0300 Subject: [PATCH 4/6] Gym Codelet example --- .github/workflows/test.yml | 2 +- dev/Gym codelet.ipynb | 2 +- examples/Gymnasium Integration.ipynb | 714 +++++++++++++++++++++++ src/cst_python/python/gym/gym_codelet.py | 21 +- tests/examples/test_gym_integration.py | 45 ++ 5 files changed, 778 insertions(+), 6 deletions(-) create mode 100644 examples/Gymnasium Integration.ipynb create mode 100644 tests/examples/test_gym_integration.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a01d4e4..d6497dc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,7 +28,7 @@ jobs: python3 -m pip install --upgrade pip python3 -m pip install pytest python3 -m pip install pytest-cov - python3 -m pip install -e .[tests] + python3 -m pip install -e .[tests, gym] - name: Tests run: | diff --git a/dev/Gym codelet.ipynb b/dev/Gym codelet.ipynb index aab533e..ea44177 100644 --- a/dev/Gym codelet.ipynb +++ b/dev/Gym codelet.ipynb @@ -281,7 +281,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [ { diff --git a/examples/Gymnasium Integration.ipynb b/examples/Gymnasium Integration.ipynb new file mode 100644 index 0000000..ad81ade --- /dev/null +++ b/examples/Gymnasium Integration.ipynb @@ -0,0 +1,714 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Gymnasium Integration\n", + "\n", + "[![Open in Colab](https://img.shields.io/badge/Open%20in%20Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/H-IAAC/CST-Python/blob/main/examples/Gymnasium%20Integration.ipynb) [![Open in Github](https://img.shields.io/badge/Open%20in%20Github-100000?style=for-the-badge&logo=github&logoColor=white)](https://github.com/H-IAAC/CST-Python/blob/main/examples/Gymnasium%20Integration.ipynb)\n", + "\n", + "[Gymnasium](https://gymnasium.farama.org/) is the library that defines the most widely used interface for creating environments for reinforcement learning problems. CST-Python provides an interface for interacting with environments using a cognitive agent." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lets start by importing the CST-Python and other required modules:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " import cst_python as cst\n", + " import gymnasium as gym\n", + "except:\n", + " !python3 -m pip install cst_python[gym]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "from gymnasium.wrappers import TransformAction, TransformObservation\n", + "\n", + "from cst_python.python.gym import GymCodelet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The GymCodelet\n", + "\n", + "The GymCodelet is the main interface with environments. Before we use it, we need to create the environment and the agent's mind.\n", + "\n", + "The environment we gonna use is the Blackjack card game. See the [environment documentation](https://gymnasium.farama.org/environments/toy_text/blackjack/) for more details about the game and the environment." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "env = gym.make(\"Blackjack-v1\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "mind = cst.Mind()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With the mind and environment, we can create the codelet, insert it inside the mind and start it:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "gym_codelet = GymCodelet(mind, env)\n", + "mind.insert_codelet(gym_codelet)\n", + "\n", + "mind.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "One important detail is that the GymCodelet always runs in the [Publisher-Subscriber](https://h-iaac.github.io/CST-Python/_build/html/_examples/Publisher-Subscriber.html) mode.\n", + "\n", + "It creates two important memories for starting the environment: the seed memory and the reset memory.\n", + "\n", + "We gonna set the environment seed to 42 to exemplify how it works, and restart the environment: " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.seed_memory.set_info(42)\n", + "gym_codelet.reset_memory.set_info(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we look the observation memories, we gonna see a single memory with the environment provided observation, a tuple with the player current sum, dealer showing card value and usable ace:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "tags": [ + "observation0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'observation': MemoryObject [idmemoryobject=0, timestamp=1732730372039, evaluation=0.0, I=(15, 2, 0), name=observation]}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "tags": [ + "observation1" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(15, 2, 0)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories[\"observation\"].get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The step count memory shows the steps since the episode start:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "tags": [ + "step_count" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.step_count_memory.get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The action memories also contains a single \"action\" memory:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "tags": [ + "action0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'action': MemoryObject [idmemoryobject=1, timestamp=1732730372025, evaluation=0.0, I=1, name=action]}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We gonna set it to `1` for a hit." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"action\"].set_info(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When the action memory changes, the codelet executes a step in the environment. We can see that the step count and observation changes:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "tags": [ + "step_count+observation0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1, (25, 2, 0))" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.step_count_memory.get_info(), gym_codelet.observation_memories[\"observation\"].get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we busted, the environment terminated:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "tags": [ + "terminated0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.terminated_memory.get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And the step reward is -1 as we lost:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "tags": [ + "reward0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "-1.0" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.reward_memory.get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We gonna start a new episode. Observes that the codelet resets the environment each time the reset memory timestamp changes, even if the content is the same. The first observation is the same as before, since we setted the environment seed:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "tags": [ + "observation2" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(15, 2, 0)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.reset_memory.set_info(True)\n", + "gym_codelet.observation_memories[\"observation\"].get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This time, we gonna choose to stick:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "tags": [ + "observation3" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(15, 2, 0)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"action\"].set_info(0)\n", + "gym_codelet.observation_memories[\"observation\"].get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And we won this game:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "tags": [ + "terminated+reward0" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(True, 1.0)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.terminated_memory.get_info(), gym_codelet.reward_memory.get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dict Spaces\n", + "\n", + "So far, we have used the codelet to map all observations in the environment to a single memory with a generic name. However, if the environment has observation and action spaces of type Dict, the Codelet will map each observation and each action to a specific memory.\n", + "\n", + "Let's see this." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "env = gym.make(\"Blackjack-v1\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Different from before, we will use TransformObservation and TransformAction to transform the original observations and actions into Dict Spaces:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "env = TransformObservation(env, \n", + " lambda obs:{\"player_sum\":obs[0], \"dealer_card\":obs[1], \"usable_ace\":obs[2]}, \n", + " gym.spaces.Dict({\"player_sum\":env.observation_space[0], \"dealer_card\":env.observation_space[1], \"usable_ace\":env.observation_space[2]}))\n", + "\n", + "env = TransformAction(env, \n", + " lambda action:action[\"hit\"], \n", + " gym.spaces.Dict({\"hit\":env.action_space}))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's create and start the agent and environment just like before:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mind = cst.Mind()\n", + "gym_codelet = GymCodelet(mind, env)\n", + "mind.insert_codelet(gym_codelet)\n", + "\n", + "mind.start()\n", + "\n", + "gym_codelet.seed_memory.set_info(42)\n", + "gym_codelet.reset_memory.set_info(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This time, we can see that the observation memories changed, with a single memory for each observation:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "tags": [ + "observation4" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732730372367, evaluation=0.0, I=2, name=dealer_card],\n", + " 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732730372367, evaluation=0.0, I=15, name=player_sum],\n", + " 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732730372367, evaluation=0.0, I=0, name=usable_ace]}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.observation_memories" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "tags": [ + "observation5" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dealer_card': 2, 'player_sum': 15, 'usable_ace': 0}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{memory_name:gym_codelet.observation_memories[memory_name].get_info() for memory_name in gym_codelet.observation_memories}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The action memory also changed it's name:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "tags": [ + "action1" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'hit': MemoryObject [idmemoryobject=3, timestamp=1732730372365, evaluation=0.0, I=0, name=hit]}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Just like before, we choose to stick:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.action_memories[\"hit\"].set_info(0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And won:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "tags": [ + "terminated+reward1" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(True, 1.0)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym_codelet.terminated_memory.get_info(), gym_codelet.reward_memory.get_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next steps\n", + "\n", + "The idea is not to use the Codelet to manually interface with the environment like this example, but to create a cognitive architecture to perform the environment's task.\n", + "\n", + "Another possibility is to combine GymCodelet with MemoryStorage to use gym environments with a remote cognitive agent or in CST-Java." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/cst_python/python/gym/gym_codelet.py b/src/cst_python/python/gym/gym_codelet.py index e702319..2498f2b 100644 --- a/src/cst_python/python/gym/gym_codelet.py +++ b/src/cst_python/python/gym/gym_codelet.py @@ -14,7 +14,10 @@ class GymCodelet(Codelet): environment with the action. ''' - _last_indexes : dict[str, int] = {"reward":-1, "reset":-1, "terminated":-1, "truncated":-1, "info":-1, "seed":-1} + _last_indexes : dict[str, int] = {"reward":-1, "reset":-1, + "terminated":-1, "truncated":-1, + "info":-1, "seed":-1, + "step_count":-1} def __init__(self, mind:Mind, env:gym.Env): ''' @@ -36,7 +39,7 @@ def __init__(self, mind:Mind, env:gym.Env): self.action_memories = self.space_to_memories(mind, env.action_space, action=True) self._common_memories : dict[str, MemoryObject] = {} - for name in ["reward", "reset", "terminated", "truncated", "info", "seed"]: + for name in ["reward", "reset", "terminated", "truncated", "info", "seed", "step_count"]: self._last_indexes[name] += 1 memory_name = name @@ -51,6 +54,7 @@ def __init__(self, mind:Mind, env:gym.Env): self._common_memories["truncated"].set_info(False) self._common_memories["info"].set_info({}) self._common_memories["seed"].set_info(None) + self._common_memories["step_count"].set_info(0) self.is_memory_observer = True @@ -59,7 +63,7 @@ def __init__(self, mind:Mind, env:gym.Env): memory.add_memory_observer(self) self._common_memories["reset"].add_memory_observer(self) - self._last_reset = self._common_memories["reset"].get_timestamp() + self._last_reset = 0 @property def reward_memory(self) -> MemoryObject: @@ -105,6 +109,13 @@ def seed_memory(self) -> MemoryObject: ''' return self._common_memories["seed"] + @property + def step_count_memory(self) -> MemoryObject: + ''' + Memory that contains the step count for the current environment + episode. + ''' + return self._common_memories["step_count"] def access_memory_objects(self) -> None: #NOSONAR pass @@ -120,18 +131,20 @@ def proc(self) -> None: reward = 0.0 terminated = False truncated = False + step_count = 0 else: action = self.memories_to_space(self.action_memories, self.env.action_space) observation, r, terminated, truncated, info = self.env.step(action) reward = float(r) #SupportsFloat to float - print("Observation", observation) + step_count = self.step_count_memory.get_info()+1 self.reward_memory.set_info(reward) self.terminated_memory.set_info(terminated) self.truncated_memory.set_info(truncated) self.info_memory.set_info(info) + self.step_count_memory.set_info(step_count) self.sample_to_memories(observation, self.observation_memories) diff --git a/tests/examples/test_gym_integration.py b/tests/examples/test_gym_integration.py new file mode 100644 index 0000000..f1f7e55 --- /dev/null +++ b/tests/examples/test_gym_integration.py @@ -0,0 +1,45 @@ +import os +import re + +from testbook import testbook +from testbook.client import TestbookNotebookClient + +from ..utils import get_examples_path + +examples_path = get_examples_path() + +@testbook(os.path.join(examples_path, "Gymnasium Integration.ipynb"), execute=True) +def test_gym_integration(tb :TestbookNotebookClient): + + expected_result = {"observation0":"{'observation': MemoryObject [idmemoryobject=0, timestamp=, evaluation=0.0, I=(15, 2, 0), name=observation]}", + "observation1":"(15, 2, 0)", + "step_count":"0", + "action0":"{'action': MemoryObject [idmemoryobject=1, timestamp=, evaluation=0.0, I=, name=action]}", + "step_count+observation0":"(1, (25, 2, 0))", + "terminated0":"True", + "reward0":"-1.0", + "observation2":"(15, 2, 0)", + "observation3":"(15, 2, 0)", + "terminated+reward0":"(True, 1.0)", + + "observation4":'''{'dealer_card': MemoryObject [idmemoryobject=0, timestamp=, evaluation=0.0, I=2, name=dealer_card], + 'player_sum': MemoryObject [idmemoryobject=1, timestamp=, evaluation=0.0, I=15, name=player_sum], + 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=, evaluation=0.0, I=0, name=usable_ace]}''', + + "observation5":"{'dealer_card': 2, 'player_sum': 15, 'usable_ace': 0}", + "action1":"{'hit': MemoryObject [idmemoryobject=3, timestamp=, evaluation=0.0, I=, name=hit]}", + "terminated+reward1":"(True, 1.0)" + } + + clear_info = ["action0", "action1"] + + for tag in expected_result: + result = tb.cell_output_text(tag) + result = re.sub(r"timestamp=[0-9]+", "timestamp=", result) + + if tag in clear_info: + result = re.sub(r"I=[0-9]+", "I=", result) + + assert result == expected_result[tag] + + From 34ae5acdb1b829c661dc9445597434685209b9b2 Mon Sep 17 00:00:00 2001 From: Elton Cardoso do Nascimento <43186596+EltonCN@users.noreply.github.com> Date: Wed, 27 Nov 2024 15:03:48 -0300 Subject: [PATCH 5/6] Gym example in documentation --- docs/index.rst | 1 + examples/README.md | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 06ffaa8..a8afed6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,6 +19,7 @@ _examples/Implementing a Architecture _examples/Publisher-Subscriber _examples/Activation and Monitoring + _examples/Gymnasium Integration .. toctree:: :maxdepth: 4 diff --git a/examples/README.md b/examples/README.md index 2116e3e..621060e 100644 --- a/examples/README.md +++ b/examples/README.md @@ -5,4 +5,5 @@ Here we have some examples of how to use the CST-Python: - [Introduction to CST-Python](https://h-iaac.github.io/CST-Python/_build/html/_examples/Introduction%20to%20CST-Python.html): what is CST-Python, and basics about how to use it. - [Implementing a Architecture](https://h-iaac.github.io/CST-Python/_build/html/_examples/Implementing%20a%20Architecture.html): how to implement a cognitive architecture using CST-Python. - [Publisher-Subscriber](https://h-iaac.github.io/CST-Python/_build/html/_examples/Publisher-Subscriber.html): using the publisher-subscriber mechanism for synchronous codelets. -- [Activation and Monitoring](https://h-iaac.github.io/CST-Python/_build/html/_examples/Activation%20and%20Monitoring.html): using codelet's activation value and monitoring the agent. \ No newline at end of file +- [Activation and Monitoring](https://h-iaac.github.io/CST-Python/_build/html/_examples/Activation%20and%20Monitoring.html): using codelet's activation value and monitoring the agent. +- [Gymnasium Integration](https://h-iaac.github.io/CST-Python/_build/html/_examples/Gymnasium%20Integration.html): using gymnasium environments with CST. \ No newline at end of file From d5a219884aec048d11ac07ae87f78b474dbfa158 Mon Sep 17 00:00:00 2001 From: Elton Cardoso do Nascimento <43186596+EltonCN@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:32:38 -0300 Subject: [PATCH 6/6] GymCodelet tests --- src/cst_python/python/gym/gym_codelet.py | 12 +- tests/cst_python/python/__init__.py | 0 tests/cst_python/python/gym/__init__.py | 0 .../cst_python/python/gym/test_gym_codelet.py | 124 ++++++++++++++++++ 4 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 tests/cst_python/python/__init__.py create mode 100644 tests/cst_python/python/gym/__init__.py create mode 100644 tests/cst_python/python/gym/test_gym_codelet.py diff --git a/src/cst_python/python/gym/gym_codelet.py b/src/cst_python/python/gym/gym_codelet.py index 2498f2b..3c99e50 100644 --- a/src/cst_python/python/gym/gym_codelet.py +++ b/src/cst_python/python/gym/gym_codelet.py @@ -64,7 +64,7 @@ def __init__(self, mind:Mind, env:gym.Env): self._common_memories["reset"].add_memory_observer(self) self._last_reset = 0 - + @property def reward_memory(self) -> MemoryObject: ''' @@ -148,6 +148,16 @@ def proc(self) -> None: self.sample_to_memories(observation, self.observation_memories) + @classmethod + def reset_indexes(cls) -> None: + ''' + Reset the indexes for setting the sufix of new memories. + ''' + cls._last_indexes : dict[str, int] = {"reward":-1, "reset":-1, + "terminated":-1, "truncated":-1, + "info":-1, "seed":-1, + "step_count":-1} + @classmethod def space_to_memories(cls, mind:Mind, space:gym.Space, diff --git a/tests/cst_python/python/__init__.py b/tests/cst_python/python/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cst_python/python/gym/__init__.py b/tests/cst_python/python/gym/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cst_python/python/gym/test_gym_codelet.py b/tests/cst_python/python/gym/test_gym_codelet.py new file mode 100644 index 0000000..5651016 --- /dev/null +++ b/tests/cst_python/python/gym/test_gym_codelet.py @@ -0,0 +1,124 @@ +from contextlib import redirect_stdout +import math +import unittest +import time +import threading +import io + +import gymnasium as gym +from gymnasium.spaces import Box, Dict +import numpy as np +from numpy.testing import assert_array_almost_equal + +from cst_python import MemoryObject, Mind +from cst_python.python.gym import GymCodelet + +class TestGymCodelet(unittest.TestCase): + def setUp(self) -> None: + ... + + def test_space_to_memories(self) -> None: + space = Box(-1, 1, (2,)) + mind = Mind() + + GymCodelet.reset_indexes() + + memories = GymCodelet.space_to_memories(mind, space) + keys = list(memories.keys()) + assert len(keys) == 1 + assert keys[0] == "observation" + memory = memories[keys[0]] + assert memory.get_name() == "observation" + assert space.contains(memory.get_info()) + + memories = GymCodelet.space_to_memories(mind, space) + memory = memories[next(iter(memories))] + assert memory.get_name() == "observation1" + + space = Dict({"x":Box(-1, 1, (2,)), "y":Box(-2, 1, (1,))}) + memories = GymCodelet.space_to_memories(mind, space) + keys = list(memories.keys()) + assert len(keys) == 2 + assert "x" in keys + assert "y" in keys + assert memories["x"].get_name() == "x" + assert memories["y"].get_name() == "y" + + memories = GymCodelet.space_to_memories(mind, space) + keys = list(memories.keys()) + assert len(keys) == 2 + assert "x" in keys + assert "y" in keys + assert memories["x"].get_name() == "x1" + assert memories["y"].get_name() == "y1" + + def test_sample_to_memories(self) -> None: + space = Box(-1, 1, (2,)) + sample = space.sample() + memories = {"observation":MemoryObject()} + + GymCodelet.sample_to_memories(sample, memories) + + assert_array_almost_equal(memories["observation"].get_info(), sample) + + + space = Dict({"x":Box(-1, 1, (2,)), "y":Box(-2, 1, (1,))}) + sample = space.sample() + memories = {"x":MemoryObject(), "y":MemoryObject()} + + GymCodelet.sample_to_memories(sample, memories) + + assert_array_almost_equal(memories["x"].get_info(), sample["x"]) + assert_array_almost_equal(memories["y"].get_info(), sample["y"]) + + def test_memories_to_space(self) -> None: + space = Box(-1, 1, (2,)) + sample = space.sample() + memories = {"observation":MemoryObject()} + memories["observation"].set_info(sample) + + reconstruced_sample = GymCodelet.memories_to_space(memories, space) + assert space.contains(reconstruced_sample) + assert_array_almost_equal(reconstruced_sample, sample) + + space = Dict({"x":Box(-1, 1, (2,)), "y":Box(-2, 1, (1,))}) + sample = space.sample() + memories = {"x":MemoryObject(), "y":MemoryObject()} + memories["x"].set_info(sample["x"]) + memories["y"].set_info(sample["y"]) + + reconstruced_sample = GymCodelet.memories_to_space(memories, space) + assert space.contains(reconstruced_sample) + assert_array_almost_equal(reconstruced_sample["x"], sample["x"]) + assert_array_almost_equal(reconstruced_sample["y"], sample["y"]) + + def test_episode(self) -> None: + env = gym.make("MountainCar-v0") + mind = Mind() + gym_codelet = GymCodelet(mind, env) + + mind.start() + + assert gym_codelet.step_count_memory.get_info() == 0 + gym_codelet.reset_memory.set_info(True) + assert gym_codelet.step_count_memory.get_info() == 0 + gym_codelet.action_memories["action"].set_info(1) + assert gym_codelet.step_count_memory.get_info() == 1 + gym_codelet.action_memories["action"].set_info(1) + assert gym_codelet.step_count_memory.get_info() == 2 + time.sleep(1e-3) #Minimum time for memory timestamp comparation is 1 ms + gym_codelet.reset_memory.set_info(True) + assert gym_codelet.step_count_memory.get_info() == 0 + + def test_env_memories(self) -> None: + env = gym.make("Blackjack-v1") + mind = Mind() + gym_codelet = GymCodelet(mind, env) + + assert len(gym_codelet.observation_memories) == 1 + assert "observation" in gym_codelet.observation_memories + assert env.observation_space.contains(gym_codelet.observation_memories["observation"].get_info()) + + assert len(gym_codelet.action_memories) == 1 + assert "action" in gym_codelet.action_memories + assert env.action_space.contains(gym_codelet.action_memories["action"].get_info()) \ No newline at end of file