From 20173dbdb0a098c8eb1578ab846c948f61639f34 Mon Sep 17 00:00:00 2001 From: Max Spahn Date: Wed, 14 Jun 2023 11:19:37 +0200 Subject: [PATCH 01/11] ft[gymnasium]: Started the migration to gymnasium by changing all the imports. --- examples/albert.py | 2 +- examples/boxer.py | 2 +- examples/dual_arm.py | 2 +- examples/generic_holonomic.py | 2 +- examples/heijn.py | 2 +- examples/iris.py | 2 +- examples/jackal.py | 2 +- examples/keyboard_input.py | 2 +- examples/mobile_reacher.py | 2 +- examples/multi_robot.py | 2 +- examples/n_link_urdf_reacher.py | 2 +- examples/panda_reacher.py | 2 +- examples/point_robot.py | 2 +- examples/point_robot_3dlidar_sensor.py | 2 +- examples/point_robot_free_space_decomp.py | 2 +- examples/point_robot_full_sensor.py | 2 +- examples/point_robot_lidar_sensor.py | 2 +- examples/point_robot_obstacle_sensor.py | 2 +- examples/point_robot_sdf_sensor.py | 2 +- examples/prius.py | 2 +- examples/tiago.py | 2 +- poetry.lock | 85 ++++++++++++------- pyproject.toml | 2 +- urdfenvs/sensors/free_space_decomposition.py | 2 +- urdfenvs/sensors/full_sensor.py | 2 +- urdfenvs/sensors/lidar.py | 2 +- urdfenvs/sensors/lidar_3d.py | 2 +- urdfenvs/sensors/obstacle_sensor.py | 2 +- urdfenvs/sensors/sdf_sensor.py | 2 +- urdfenvs/urdf_common/__init__.py | 2 +- urdfenvs/urdf_common/bicycle_model.py | 2 +- .../urdf_common/differential_drive_robot.py | 2 +- urdfenvs/urdf_common/generic_robot.py | 2 +- urdfenvs/urdf_common/holonomic_robot.py | 2 +- urdfenvs/urdf_common/quadrotor.py | 2 +- urdfenvs/urdf_common/urdf_env.py | 2 +- 36 files changed, 89 insertions(+), 66 deletions(-) diff --git a/examples/albert.py b/examples/albert.py index 11e65bcf..4e34240f 100644 --- a/examples/albert.py +++ b/examples/albert.py @@ -1,5 +1,5 @@ import warnings -import gym +import gymnasium as gym import numpy as np from urdfenvs.robots.generic_urdf.generic_diff_drive_robot import GenericDiffDriveRobot from urdfenvs.urdf_common.urdf_env import UrdfEnv diff --git a/examples/boxer.py b/examples/boxer.py index 67c67916..a6bcf32f 100644 --- a/examples/boxer.py +++ b/examples/boxer.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from urdfenvs.scene_examples.obstacles import wall_obstacles import numpy as np diff --git a/examples/dual_arm.py b/examples/dual_arm.py index 043b71c1..ef175944 100644 --- a/examples/dual_arm.py +++ b/examples/dual_arm.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from urdfenvs.robots.generic_urdf import GenericUrdfReacher diff --git a/examples/generic_holonomic.py b/examples/generic_holonomic.py index 3ed141ff..97da8c3f 100644 --- a/examples/generic_holonomic.py +++ b/examples/generic_holonomic.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from urdfenvs.robots.generic_urdf import GenericUrdfReacher import numpy as np import os diff --git a/examples/heijn.py b/examples/heijn.py index 9f447327..0570d5d7 100644 --- a/examples/heijn.py +++ b/examples/heijn.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from urdfenvs.scene_examples.obstacles import * from urdfenvs.scene_examples.goal import * diff --git a/examples/iris.py b/examples/iris.py index 6ec2d4db..1e6e0d9f 100644 --- a/examples/iris.py +++ b/examples/iris.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from urdfenvs.robots.iris import IrisDrone diff --git a/examples/jackal.py b/examples/jackal.py index accddb8c..18737fa7 100644 --- a/examples/jackal.py +++ b/examples/jackal.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from urdfenvs.scene_examples.obstacles import wall_obstacles diff --git a/examples/keyboard_input.py b/examples/keyboard_input.py index d903efda..0ec1f460 100644 --- a/examples/keyboard_input.py +++ b/examples/keyboard_input.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from urdfenvs.robots.tiago import TiagoRobot from multiprocessing import Process, Pipe diff --git a/examples/mobile_reacher.py b/examples/mobile_reacher.py index daf8a45b..96d7dbe9 100644 --- a/examples/mobile_reacher.py +++ b/examples/mobile_reacher.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from urdfenvs.robots.generic_urdf import GenericUrdfReacher import numpy as np diff --git a/examples/multi_robot.py b/examples/multi_robot.py index f3e7e75f..37c7a37a 100644 --- a/examples/multi_robot.py +++ b/examples/multi_robot.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from urdfenvs.robots.generic_urdf import GenericUrdfReacher from urdfenvs.robots.generic_urdf import GenericDiffDriveRobot diff --git a/examples/n_link_urdf_reacher.py b/examples/n_link_urdf_reacher.py index 93c45956..c877a56f 100644 --- a/examples/n_link_urdf_reacher.py +++ b/examples/n_link_urdf_reacher.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from urdfenvs.robots.generic_urdf import GenericUrdfReacher import numpy as np diff --git a/examples/panda_reacher.py b/examples/panda_reacher.py index c91813cd..2cc5b4d7 100644 --- a/examples/panda_reacher.py +++ b/examples/panda_reacher.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from urdfenvs.robots.generic_urdf import GenericUrdfReacher from urdfenvs.scene_examples.goal import dynamicGoal diff --git a/examples/point_robot.py b/examples/point_robot.py index e537f018..72c1ebf8 100644 --- a/examples/point_robot.py +++ b/examples/point_robot.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from urdfenvs.robots.generic_urdf import GenericUrdfReacher import numpy as np diff --git a/examples/point_robot_3dlidar_sensor.py b/examples/point_robot_3dlidar_sensor.py index 3a332590..20d2c4d2 100644 --- a/examples/point_robot_3dlidar_sensor.py +++ b/examples/point_robot_3dlidar_sensor.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from urdfenvs.scene_examples.obstacles import ( sphereObst1, diff --git a/examples/point_robot_free_space_decomp.py b/examples/point_robot_free_space_decomp.py index e5fabf96..d5a461e4 100644 --- a/examples/point_robot_free_space_decomp.py +++ b/examples/point_robot_free_space_decomp.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import pprint import numpy as np from urdfenvs.scene_examples.obstacles import ( diff --git a/examples/point_robot_full_sensor.py b/examples/point_robot_full_sensor.py index 26144bf2..6567f161 100644 --- a/examples/point_robot_full_sensor.py +++ b/examples/point_robot_full_sensor.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from urdfenvs.urdf_common.urdf_env import UrdfEnv from urdfenvs.robots.generic_urdf import GenericUrdfReacher from urdfenvs.sensors.full_sensor import FullSensor diff --git a/examples/point_robot_lidar_sensor.py b/examples/point_robot_lidar_sensor.py index d6d507b0..434120e2 100644 --- a/examples/point_robot_lidar_sensor.py +++ b/examples/point_robot_lidar_sensor.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from urdfenvs.scene_examples.obstacles import ( sphereObst1, diff --git a/examples/point_robot_obstacle_sensor.py b/examples/point_robot_obstacle_sensor.py index e91b972d..f5007a5a 100644 --- a/examples/point_robot_obstacle_sensor.py +++ b/examples/point_robot_obstacle_sensor.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from urdfenvs.robots.generic_urdf import GenericUrdfReacher from urdfenvs.sensors.obstacle_sensor import ObstacleSensor from urdfenvs.scene_examples.obstacles import ( diff --git a/examples/point_robot_sdf_sensor.py b/examples/point_robot_sdf_sensor.py index 4de78ee9..ed5302b3 100644 --- a/examples/point_robot_sdf_sensor.py +++ b/examples/point_robot_sdf_sensor.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from urdfenvs.urdf_common.urdf_env import UrdfEnv diff --git a/examples/prius.py b/examples/prius.py index 043c02ff..5abba6bc 100644 --- a/examples/prius.py +++ b/examples/prius.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from urdfenvs.robots.prius import Prius import numpy as np diff --git a/examples/tiago.py b/examples/tiago.py index 50306110..13c3fa74 100644 --- a/examples/tiago.py +++ b/examples/tiago.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from urdfenvs.robots.generic_urdf.generic_diff_drive_robot import GenericDiffDriveRobot import numpy as np diff --git a/poetry.lock b/poetry.lock index 8d0679c0..0dcb6f4f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -308,6 +308,18 @@ files = [ {file = "evdev-1.6.1.tar.gz", hash = "sha256:299db8628cc73b237fc1cc57d3c2948faa0756e2a58b6194b5bf81dc2081f1e3"}, ] +[[package]] +name = "farama-notifications" +version = "0.0.4" +description = "Notifications for all Farama Foundation maintained libraries." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "Farama-Notifications-0.0.4.tar.gz", hash = "sha256:13fceff2d14314cf80703c8266462ebf3733c7d165336eee998fc58e545efd18"}, + {file = "Farama_Notifications-0.0.4-py3-none-any.whl", hash = "sha256:14de931035a41961f7c056361dc7f980762a143d05791ef5794a751a2caf05ae"}, +] + [[package]] name = "geomdl" version = "5.3.1" @@ -321,45 +333,37 @@ files = [ ] [[package]] -name = "gym" -version = "0.26.2" -description = "Gym: A universal API for reinforcement learning environments" +name = "gymnasium" +version = "0.28.1" +description = "A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)." category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "gym-0.26.2.tar.gz", hash = "sha256:e0d882f4b54f0c65f203104c24ab8a38b039f1289986803c7d02cdbe214fbcc4"}, + {file = "gymnasium-0.28.1-py3-none-any.whl", hash = "sha256:7bc9a5bce1022f997d1dbc152fc91d1ac977bad9cc7794cdc25437010867cabf"}, + {file = "gymnasium-0.28.1.tar.gz", hash = "sha256:4c2c745808792c8f45c6e88ad0a5504774394e0c126f6e3db555e720d3da6f24"}, ] [package.dependencies] cloudpickle = ">=1.2.0" -gym_notices = ">=0.0.4" -importlib_metadata = {version = ">=4.8.0", markers = "python_version < \"3.10\""} -numpy = ">=1.18.0" +farama-notifications = ">=0.0.1" +importlib-metadata = {version = ">=4.8.0", markers = "python_version < \"3.10\""} +jax-jumpy = ">=1.0.0" +numpy = ">=1.21.0" +typing-extensions = ">=4.3.0" [package.extras] accept-rom-license = ["autorom[accept-rom-license] (>=0.4.2,<0.5.0)"] -all = ["ale-py (>=0.8.0,<0.9.0)", "box2d-py (==2.3.5)", "imageio (>=2.14.1)", "lz4 (>=3.1.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "mujoco (==2.2)", "mujoco_py (>=2.1,<2.2)", "opencv-python (>=3.0)", "pygame (==2.1.0)", "pytest (==7.0.1)", "swig (>=4.0.0,<5.0.0)"] -atari = ["ale-py (>=0.8.0,<0.9.0)"] -box2d = ["box2d-py (==2.3.5)", "pygame (==2.1.0)", "swig (>=4.0.0,<5.0.0)"] -classic-control = ["pygame (==2.1.0)"] -mujoco = ["imageio (>=2.14.1)", "mujoco (==2.2)"] -mujoco-py = ["mujoco_py (>=2.1,<2.2)"] -other = ["lz4 (>=3.1.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "opencv-python (>=3.0)"] -testing = ["box2d-py (==2.3.5)", "imageio (>=2.14.1)", "lz4 (>=3.1.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "mujoco (==2.2)", "mujoco_py (>=2.1,<2.2)", "opencv-python (>=3.0)", "pygame (==2.1.0)", "pytest (==7.0.1)", "swig (>=4.0.0,<5.0.0)"] -toy-text = ["pygame (==2.1.0)"] - -[[package]] -name = "gym-notices" -version = "0.0.8" -description = "Notices for gym" -category = "main" -optional = false -python-versions = "*" -files = [ - {file = "gym-notices-0.0.8.tar.gz", hash = "sha256:ad25e200487cafa369728625fe064e88ada1346618526102659b4640f2b4b911"}, - {file = "gym_notices-0.0.8-py3-none-any.whl", hash = "sha256:e5f82e00823a166747b4c2a07de63b6560b1acb880638547e0cabf825a01e463"}, -] +all = ["box2d-py (==2.3.5)", "imageio (>=2.14.1)", "jax (==0.3.24)", "jaxlib (==0.3.24)", "lz4 (>=3.1.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "mujoco (>=2.3.2)", "mujoco-py (>=2.1,<2.2)", "opencv-python (>=3.0)", "pygame (==2.1.3)", "shimmy[atari] (>=0.1.0,<1.0)", "swig (>=4.0.0,<5.0.0)", "torch (>=1.0.0)"] +atari = ["shimmy[atari] (>=0.1.0,<1.0)"] +box2d = ["box2d-py (==2.3.5)", "pygame (==2.1.3)", "swig (>=4.0.0,<5.0.0)"] +classic-control = ["pygame (==2.1.3)", "pygame (==2.1.3)"] +jax = ["jax (==0.3.24)", "jaxlib (==0.3.24)"] +mujoco = ["imageio (>=2.14.1)", "mujoco (>=2.3.2)"] +mujoco-py = ["mujoco-py (>=2.1,<2.2)", "mujoco-py (>=2.1,<2.2)"] +other = ["lz4 (>=3.1.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "opencv-python (>=3.0)", "torch (>=1.0.0)"] +testing = ["pytest (==7.1.3)", "scipy (==1.7.3)"] +toy-text = ["pygame (==2.1.3)", "pygame (==2.1.3)"] [[package]] name = "idna" @@ -454,6 +458,25 @@ pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib" plugins = ["setuptools"] requirements-deprecated-finder = ["pip-api", "pipreqs"] +[[package]] +name = "jax-jumpy" +version = "1.0.0" +description = "Common backend for Jax or Numpy." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jax-jumpy-1.0.0.tar.gz", hash = "sha256:195fb955cc4c2b7f0b1453e3cb1fb1c414a51a407ffac7a51e69a73cb30d59ad"}, + {file = "jax_jumpy-1.0.0-py3-none-any.whl", hash = "sha256:ab7e01454bba462de3c4d098e3e585c302a8f06bc36d9182ab4e7e4aa7067c5e"}, +] + +[package.dependencies] +numpy = ">=1.18.0" + +[package.extras] +jax = ["jax (>=0.3.24)", "jaxlib (>=0.3.24)"] +testing = ["pytest (==7.1.3)"] + [[package]] name = "jinja2" version = "3.1.2" @@ -1931,7 +1954,7 @@ test = ["autopep8", "coveralls", "ezdxf", "pyinstrument", "pytest", "pytest-cov" name = "typing-extensions" version = "4.5.0" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2192,4 +2215,4 @@ keyboard = [] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "912f6976dd79f29806e90767f5f43956fad88475f8ed6f46547570db920c4e9b" +content-hash = "93113dd2e9e3869f0ca729c1175ad721aa6578dc8d871368c88286cc58a0a495" diff --git a/pyproject.toml b/pyproject.toml index e53157d5..e9a25d17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,9 +19,9 @@ python = "^3.8" pybullet = "^3.2.1" numpy = "^1.19,<1.24" yourdfpy = "^0.0.52" -gym = "^0.26.0" deprecation = "^2.1.0" mpscenes = "^0.4.2" +gymnasium = "^0.28.1" [tool.poetry.extras] keyboard = ["pynput", "multiprocess"] diff --git a/urdfenvs/sensors/free_space_decomposition.py b/urdfenvs/sensors/free_space_decomposition.py index 8b4ac23c..bfac2de8 100644 --- a/urdfenvs/sensors/free_space_decomposition.py +++ b/urdfenvs/sensors/free_space_decomposition.py @@ -1,7 +1,7 @@ """Module for free space decomposition.""" import numpy as np import pybullet as p -import gym +import gymnasium as gym from typing import Callable, List from urdfenvs.sensors.lidar import Lidar diff --git a/urdfenvs/sensors/full_sensor.py b/urdfenvs/sensors/full_sensor.py index 8cee8a43..adaadf30 100644 --- a/urdfenvs/sensors/full_sensor.py +++ b/urdfenvs/sensors/full_sensor.py @@ -1,7 +1,7 @@ from urdfenvs.sensors.sensor import Sensor import numpy as np import pybullet as p -from gym import spaces +from gymnasium import spaces class FullSensor(Sensor): diff --git a/urdfenvs/sensors/lidar.py b/urdfenvs/sensors/lidar.py index cfff5ef1..8bb7f303 100644 --- a/urdfenvs/sensors/lidar.py +++ b/urdfenvs/sensors/lidar.py @@ -1,7 +1,7 @@ """Module for lidar sensor simulation.""" import numpy as np import pybullet as p -import gym +import gymnasium as gym from urdfenvs.sensors.sensor import Sensor diff --git a/urdfenvs/sensors/lidar_3d.py b/urdfenvs/sensors/lidar_3d.py index f29548af..aef0a803 100644 --- a/urdfenvs/sensors/lidar_3d.py +++ b/urdfenvs/sensors/lidar_3d.py @@ -1,7 +1,7 @@ """Module for lidar sensor simulation.""" import numpy as np import pybullet as p -import gym +import gymnasium as gym from urdfenvs.sensors.sensor import Sensor diff --git a/urdfenvs/sensors/obstacle_sensor.py b/urdfenvs/sensors/obstacle_sensor.py index 82ad1e22..97d740f5 100644 --- a/urdfenvs/sensors/obstacle_sensor.py +++ b/urdfenvs/sensors/obstacle_sensor.py @@ -1,6 +1,6 @@ import numpy as np import pybullet as p -import gym +import gymnasium as gym from urdfenvs.sensors.sensor import Sensor from urdfenvs import __version__ diff --git a/urdfenvs/sensors/sdf_sensor.py b/urdfenvs/sensors/sdf_sensor.py index 8ac2ee45..afa947cc 100644 --- a/urdfenvs/sensors/sdf_sensor.py +++ b/urdfenvs/sensors/sdf_sensor.py @@ -2,7 +2,7 @@ from time import perf_counter import numpy as np import pybullet as p -import gym +import gymnasium as gym from urdfenvs.sensors.sensor import Sensor diff --git a/urdfenvs/urdf_common/__init__.py b/urdfenvs/urdf_common/__init__.py index e5087658..ebb94387 100644 --- a/urdfenvs/urdf_common/__init__.py +++ b/urdfenvs/urdf_common/__init__.py @@ -1,4 +1,4 @@ -from gym.envs.registration import register +from gymnasium import register from urdfenvs.urdf_common.urdf_env import UrdfEnv register( id='urdf-env-v0', diff --git a/urdfenvs/urdf_common/bicycle_model.py b/urdfenvs/urdf_common/bicycle_model.py index 9855cb4d..f22aa303 100644 --- a/urdfenvs/urdf_common/bicycle_model.py +++ b/urdfenvs/urdf_common/bicycle_model.py @@ -1,5 +1,5 @@ import pybullet as p -import gym +import gymnasium as gym import numpy as np import logging diff --git a/urdfenvs/urdf_common/differential_drive_robot.py b/urdfenvs/urdf_common/differential_drive_robot.py index 0fa9d861..8a96ee54 100644 --- a/urdfenvs/urdf_common/differential_drive_robot.py +++ b/urdfenvs/urdf_common/differential_drive_robot.py @@ -1,7 +1,7 @@ import logging from typing import List import pybullet as p -import gym +import gymnasium as gym import numpy as np from urdfenvs.urdf_common.generic_robot import ControlMode, GenericRobot diff --git a/urdfenvs/urdf_common/generic_robot.py b/urdfenvs/urdf_common/generic_robot.py index d44b9b43..d5ecd2e5 100644 --- a/urdfenvs/urdf_common/generic_robot.py +++ b/urdfenvs/urdf_common/generic_robot.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from typing import List import pybullet as p -import gym +import gymnasium as gym import numpy as np import yourdfpy import urdfenvs diff --git a/urdfenvs/urdf_common/holonomic_robot.py b/urdfenvs/urdf_common/holonomic_robot.py index d8f196ec..b75ce8b7 100644 --- a/urdfenvs/urdf_common/holonomic_robot.py +++ b/urdfenvs/urdf_common/holonomic_robot.py @@ -1,5 +1,5 @@ import pybullet as p -import gym +import gymnasium as gym import numpy as np from urdfenvs.urdf_common.generic_robot import GenericRobot diff --git a/urdfenvs/urdf_common/quadrotor.py b/urdfenvs/urdf_common/quadrotor.py index f1713006..0b0a29e0 100644 --- a/urdfenvs/urdf_common/quadrotor.py +++ b/urdfenvs/urdf_common/quadrotor.py @@ -1,5 +1,5 @@ import pybullet as p -import gym +import gymnasium as gym import numpy as np import logging diff --git a/urdfenvs/urdf_common/urdf_env.py b/urdfenvs/urdf_common/urdf_env.py index dd9c37af..aaf5bb22 100644 --- a/urdfenvs/urdf_common/urdf_env.py +++ b/urdfenvs/urdf_common/urdf_env.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import time import numpy as np import pybullet as p From b3e2c594c0e55ae49cf95600d881e790fe82e25c Mon Sep 17 00:00:00 2001 From: Max Spahn Date: Wed, 14 Jun 2023 11:34:38 +0200 Subject: [PATCH 02/11] ft[gymnasium]: Updates step and reset api. Gymnasium encourages the change of the return values of the reset and step functions. Now, the reset-function returns the initial observation and an info dictionary (in our case still an empty one). The step function was changed to return not only the done boolean but two boolean, one for terminated and one for truncated. So far, our implementation has no truncation criteria, so it is always false. --- examples/albert.py | 2 +- examples/boxer.py | 2 +- examples/dual_arm.py | 2 +- examples/generic_holonomic.py | 2 +- examples/heijn.py | 2 +- examples/iris.py | 2 +- examples/jackal.py | 2 +- examples/mobile_reacher.py | 2 +- examples/multi_robot.py | 2 +- examples/n_link_urdf_reacher.py | 2 +- examples/panda_reacher.py | 4 ++-- examples/point_robot.py | 2 +- examples/point_robot_3dlidar_sensor.py | 2 +- examples/point_robot_free_space_decomp.py | 2 +- examples/point_robot_lidar_sensor.py | 2 +- examples/point_robot_obstacle_sensor.py | 2 +- examples/prius.py | 2 +- examples/tiago.py | 6 +++--- pyproject.toml | 2 +- tests/test_acc_envs.py | 14 +++++++------- tests/test_full_sensor.py | 4 ++-- tests/test_lidary_sensor.py | 4 ++-- tests/test_obstacle_sensor.py | 10 +++++----- tests/test_vel_envs.py | 14 +++++++------- urdfenvs/urdf_common/urdf_env.py | 9 ++++++--- 25 files changed, 51 insertions(+), 48 deletions(-) diff --git a/examples/albert.py b/examples/albert.py index 4e34240f..c60566b5 100644 --- a/examples/albert.py +++ b/examples/albert.py @@ -32,7 +32,7 @@ def run_albert(n_steps=1000, render=False, goal=True, obstacles=True): print(f"Initial observation : {ob}") history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history diff --git a/examples/boxer.py b/examples/boxer.py index a6bcf32f..c1cdba67 100644 --- a/examples/boxer.py +++ b/examples/boxer.py @@ -30,7 +30,7 @@ def run_boxer(n_steps=1000, render=False, goal=True, obstacles=True): print("Starting episode") history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) print(ob['robot_0']['joint_state']['velocity'][0:2]) history.append(ob) env.close() diff --git a/examples/dual_arm.py b/examples/dual_arm.py index ef175944..7e3f661b 100644 --- a/examples/dual_arm.py +++ b/examples/dual_arm.py @@ -16,7 +16,7 @@ def run_dual_arm(n_steps=5000, render=False, goal=True, obstacles=True): print(f"Initial observation : {ob}") history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history diff --git a/examples/generic_holonomic.py b/examples/generic_holonomic.py index 97da8c3f..05c66b77 100644 --- a/examples/generic_holonomic.py +++ b/examples/generic_holonomic.py @@ -35,7 +35,7 @@ def run_generic_holonomic(n_steps=1000, render=False, goal=True, obstacles=True) print("Starting episode") history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history diff --git a/examples/heijn.py b/examples/heijn.py index 0570d5d7..06cf77a3 100644 --- a/examples/heijn.py +++ b/examples/heijn.py @@ -28,7 +28,7 @@ def run_point_robot(n_steps=1000, render=False, goal=True, obstacles=True): env.add_goal(splineGoal) history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history diff --git a/examples/iris.py b/examples/iris.py index 1e6e0d9f..c7491ea5 100644 --- a/examples/iris.py +++ b/examples/iris.py @@ -22,7 +22,7 @@ def run_iris(n_steps=3000, render=False, goal=True, obstacles=True): action += np.array([0, 1, 0, 1]) * 1 if i > 248 and i < 298: action += np.array([0, 1, 0, 1]) * -1 - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history diff --git a/examples/jackal.py b/examples/jackal.py index 18737fa7..171ca925 100644 --- a/examples/jackal.py +++ b/examples/jackal.py @@ -33,7 +33,7 @@ def run_jackal(n_steps=1000, render=False, goal=True, obstacles=True): print("Starting episode") history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history diff --git a/examples/mobile_reacher.py b/examples/mobile_reacher.py index 96d7dbe9..1dcc2af3 100644 --- a/examples/mobile_reacher.py +++ b/examples/mobile_reacher.py @@ -25,7 +25,7 @@ def run_mobile_reacher(n_steps=1000, render=False, goal=True, obstacles=True): else: action[-1] = 0.01 action[-2] = 0.01 - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history diff --git a/examples/multi_robot.py b/examples/multi_robot.py index 37c7a37a..a9244057 100644 --- a/examples/multi_robot.py +++ b/examples/multi_robot.py @@ -78,7 +78,7 @@ def run_multi_robot(n_steps=1000, render=False, obstacles=False, goal=False): print("Starting episode") history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history diff --git a/examples/n_link_urdf_reacher.py b/examples/n_link_urdf_reacher.py index c877a56f..ce824dd6 100644 --- a/examples/n_link_urdf_reacher.py +++ b/examples/n_link_urdf_reacher.py @@ -17,7 +17,7 @@ def run_n_link_reacher(n_steps=1000, render=False, goal=True, obstacles=True): print(f"Initial observation : {ob}") history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history diff --git a/examples/panda_reacher.py b/examples/panda_reacher.py index 2cc5b4d7..1f9115c2 100644 --- a/examples/panda_reacher.py +++ b/examples/panda_reacher.py @@ -45,11 +45,11 @@ def run_panda(n_steps=1000, render=False, goal=True, obstacles=True): else: action[7] = 0.02 action[8] = 0.02 - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history if __name__ == "__main__": - run_panda(render=True) + run_panda(render=False) diff --git a/examples/point_robot.py b/examples/point_robot.py index 72c1ebf8..815c788c 100644 --- a/examples/point_robot.py +++ b/examples/point_robot.py @@ -32,7 +32,7 @@ def run_point_robot(n_steps=1000, render=False, goal=True, obstacles=True): history = [] env.reconfigure_camera(2.0, 0.0, -90.01, (0, 0, 0)) for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history diff --git a/examples/point_robot_3dlidar_sensor.py b/examples/point_robot_3dlidar_sensor.py index 20d2c4d2..7a4fb739 100644 --- a/examples/point_robot_3dlidar_sensor.py +++ b/examples/point_robot_3dlidar_sensor.py @@ -46,7 +46,7 @@ def run_point_robot_with_lidar( print(f"Initial observation : {ob}") history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) # Access the lidar observation #_ = ob["robot_0"]["LidarSensor"] diff --git a/examples/point_robot_free_space_decomp.py b/examples/point_robot_free_space_decomp.py index d5a461e4..eed38862 100644 --- a/examples/point_robot_free_space_decomp.py +++ b/examples/point_robot_free_space_decomp.py @@ -41,7 +41,7 @@ def run_point_robot_with_freespacedecomp( print(f"Initial observation : {ob}") history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) #print(ob['robot_0']['FreeSpaceDecompSensor']) # Access the lidar observation #_ = ob["robot_0"]["LidarSensor"] diff --git a/examples/point_robot_lidar_sensor.py b/examples/point_robot_lidar_sensor.py index 434120e2..1219aef1 100644 --- a/examples/point_robot_lidar_sensor.py +++ b/examples/point_robot_lidar_sensor.py @@ -40,7 +40,7 @@ def run_point_robot_with_lidar( print(f"Initial observation : {ob}") history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) # Access the lidar observation #_ = ob["robot_0"]["LidarSensor"] diff --git a/examples/point_robot_obstacle_sensor.py b/examples/point_robot_obstacle_sensor.py index f5007a5a..185927ac 100644 --- a/examples/point_robot_obstacle_sensor.py +++ b/examples/point_robot_obstacle_sensor.py @@ -36,7 +36,7 @@ def run_point_robot_with_obstacle_sensor(n_steps=1000, render=False, obstacles=T history = [] for _ in range(n_steps): action = defaultAction - ob, reward, done, info = env.step(action) + ob, *_ = env.step(action) # In observations, information about obstacles is stored in ob['obstacleSensor'] history.append(ob) env.close() diff --git a/examples/prius.py b/examples/prius.py index 5abba6bc..a1323ba7 100644 --- a/examples/prius.py +++ b/examples/prius.py @@ -17,7 +17,7 @@ def run_prius(n_steps=1000, render=False, goal=True, obstacles=True): print(f"Initial observation : {ob}") history = [] for i in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) if ob['robot_0']['joint_state']['steering'] > 0.2: action[1] = 0 history.append(ob) diff --git a/examples/tiago.py b/examples/tiago.py index 13c3fa74..21de7401 100644 --- a/examples/tiago.py +++ b/examples/tiago.py @@ -48,7 +48,7 @@ def run_tiago(n_steps=1000, render=False, goal=True, obstacles=True): action[14] = 0.0 # right arm shoulder action[22] = 0.01 # finger joint vel0 = np.zeros(env.n()) - ob = env.reset() + ob, *_ = env.reset() print("base: ", ob['robot_0']["joint_state"]["position"][0:3]) print("torso: ", ob['robot_0']["joint_state"]["position"][3]) print("head: ", ob['robot_0']["joint_state"]["position"][4:6]) @@ -57,11 +57,11 @@ def run_tiago(n_steps=1000, render=False, goal=True, obstacles=True): print("Starting episode") history = [] for _ in range(n_steps): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history if __name__ == "__main__": - run_tiago(render=True) + run_tiago(render=False) diff --git a/pyproject.toml b/pyproject.toml index e9a25d17..d5a38d93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "urdfenvs" -version = "0.7.7" +version = "0.8.0" description = "Simple simulation environment for robots, based on the urdf files." authors = ["Max Spahn "] maintainers = [ diff --git a/tests/test_acc_envs.py b/tests/test_acc_envs.py index 7090dbea..89a00159 100644 --- a/tests/test_acc_envs.py +++ b/tests/test_acc_envs.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np import pytest from urdfenvs.robots.generic_urdf import GenericUrdfReacher @@ -131,10 +131,10 @@ def allBicycleModelEnvs(priusEnv): def test_all(allEnvs): for setup in allEnvs: env = gym.make("urdf-env-v0", robots=[setup[0]], render=False, dt=0.01) - ob = env.reset(pos=setup[1], vel=setup[2]) + ob, _ = env.reset(pos=setup[1], vel=setup[2]) action = np.random.random(env.n()) np.testing.assert_array_almost_equal(ob['robot_0']['joint_state']['position'], setup[1], decimal=2) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) assert isinstance(ob, dict) assert isinstance(ob['robot_0']['joint_state']['position'], np.ndarray) assert isinstance(ob['robot_0']['joint_state']['velocity'], np.ndarray) @@ -144,10 +144,10 @@ def test_all(allEnvs): def test_allDifferentialDrive(allDifferentialDriveEnvs): for setup in allDifferentialDriveEnvs: env = gym.make("urdf-env-v0", robots=[setup[0]], render=False, dt=0.01) - ob = env.reset(pos=setup[1], vel=setup[2]) + ob, _ = env.reset(pos=setup[1], vel=setup[2]) action = np.random.random(env.n()) * 0.1 np.testing.assert_array_almost_equal(ob['robot_0']['joint_state']['position'], setup[1], decimal=2) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) assert isinstance(ob['robot_0'], dict) assert isinstance(ob['robot_0']['joint_state']['position'], np.ndarray) assert isinstance(ob['robot_0']['joint_state']['velocity'], np.ndarray) @@ -157,10 +157,10 @@ def test_allDifferentialDrive(allDifferentialDriveEnvs): def test_allBicycleModel(allBicycleModelEnvs): for setup in allBicycleModelEnvs: env = gym.make("urdf-env-v0", robots=[setup[0]], render=False, dt=0.01) - ob = env.reset(pos=setup[1], vel=setup[2]) + ob, _ = env.reset(pos=setup[1], vel=setup[2]) action = np.random.random(env.n()) * 0.1 np.testing.assert_array_almost_equal(ob['robot_0']['joint_state']['position'], setup[1], decimal=2) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) assert isinstance(ob['robot_0'], dict) assert isinstance(ob['robot_0']['joint_state']['position'], np.ndarray) assert isinstance(ob['robot_0']['joint_state']['velocity'], np.ndarray) diff --git a/tests/test_full_sensor.py b/tests/test_full_sensor.py index ec8dddfb..5270d927 100644 --- a/tests/test_full_sensor.py +++ b/tests/test_full_sensor.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np import pytest from urdfenvs.sensors.full_sensor import FullSensor @@ -31,7 +31,7 @@ def test_full_sensor(): env.set_spaces() action = np.random.random(env.n()) for _ in range(10): - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) full_sensor_ob = ob['robot_0']['FullSensor'] assert "obstacles" in full_sensor_ob.keys() assert "goals" in full_sensor_ob diff --git a/tests/test_lidary_sensor.py b/tests/test_lidary_sensor.py index addc26e8..aedcdaf5 100644 --- a/tests/test_lidary_sensor.py +++ b/tests/test_lidary_sensor.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np import pytest from urdfenvs.sensors.lidar import Lidar @@ -24,7 +24,7 @@ def test_full_sensor(): env.add_sensor(sensor, [0]) env.set_spaces() action = np.random.random(env.n()) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) lidar_sensor_ob = ob['robot_0']['LidarSensor'] assert isinstance(lidar_sensor_ob, np.ndarray) env.close() diff --git a/tests/test_obstacle_sensor.py b/tests/test_obstacle_sensor.py index ef5e3238..1ea22cc6 100644 --- a/tests/test_obstacle_sensor.py +++ b/tests/test_obstacle_sensor.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np import pytest @@ -21,7 +21,7 @@ def test_static_obstacle(): env.add_sensor(sensor, [0]) env.set_spaces() action = np.random.random(env.n()) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) ob = ob['robot_0']['ObstacleSensor'] assert "obstacle_0" in ob assert isinstance(ob["obstacle_0"]["pose"]["position"], np.ndarray) @@ -52,7 +52,7 @@ def test_dynamicObstacle(): env.add_sensor(sensor, [0]) env.set_spaces() action = np.random.random(env.n()) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) ob = ob['robot_0']['ObstacleSensor'] assert "obstacle_0" in ob assert isinstance(ob["obstacle_0"]["pose"]["position"], np.ndarray) @@ -82,7 +82,7 @@ def test_shape_observation_space(): env.add_sensor(sensor, [0]) env.set_spaces() action = np.random.random(env.n()) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) ob = ob['robot_0']['ObstacleSensor'] assert ob["obstacle_0"]["pose"]["position"].shape == (3, ) assert ob["obstacle_0"]["pose"]["orientation"].shape == (4, ) @@ -101,7 +101,7 @@ def test_urdfObstacle(env): # change order env.add_obstacle(urdfObst1) action = np.random.random(env.n()) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) assert "obstacleSensor" in ob['robot_0'] assert "2" in ob['robot_0']["obstacleSensor"] assert isinstance(ob['robot_0']["obstacleSensor"]["2"]["pose"]["position"], np.ndarray) diff --git a/tests/test_vel_envs.py b/tests/test_vel_envs.py index e4861d27..d1b68ed4 100644 --- a/tests/test_vel_envs.py +++ b/tests/test_vel_envs.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np import pytest from urdfenvs.robots.generic_urdf import GenericUrdfReacher @@ -132,10 +132,10 @@ def allBicycleModelEnvs(priusEnv): def test_all_generic(allEnvs): for setup in allEnvs: env = gym.make("urdf-env-v0", robots=[setup[0]], render=False, dt=0.01) - ob = env.reset(pos=setup[1], vel=setup[2]) + ob, _ = env.reset(pos=setup[1], vel=setup[2]) action = np.random.random(env.n()) np.testing.assert_array_almost_equal(ob['robot_0']['joint_state']['position'], setup[1], decimal=2) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) assert isinstance(ob['robot_0'], dict) assert isinstance(ob['robot_0']['joint_state']['position'], np.ndarray) assert isinstance(ob['robot_0']['joint_state']['velocity'], np.ndarray) @@ -146,10 +146,10 @@ def test_all_generic(allEnvs): def test_allDifferentialDrive(allDifferentialDriveEnvs): for setup in allDifferentialDriveEnvs: env = gym.make("urdf-env-v0", robots=[setup[0]], render=False, dt=0.01) - ob = env.reset(pos=setup[1], vel=setup[2]) + ob, _ = env.reset(pos=setup[1], vel=setup[2]) action = np.random.random(env.n()) * 0.02 np.testing.assert_array_almost_equal(ob['robot_0']['joint_state']['position'], setup[1], decimal=2) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) assert isinstance(ob['robot_0'], dict) assert isinstance(ob['robot_0']['joint_state']['position'], np.ndarray) assert isinstance(ob['robot_0']['joint_state']['velocity'], np.ndarray) @@ -163,10 +163,10 @@ def test_allDifferentialDrive(allDifferentialDriveEnvs): def test_allBicycleModel(allBicycleModelEnvs): for setup in allBicycleModelEnvs: env = gym.make("urdf-env-v0", robots=[setup[0]], render=False, dt=0.01) - ob = env.reset(pos=setup[1], vel=setup[2]) + ob, _ = env.reset(pos=setup[1], vel=setup[2]) action = np.random.random(env.n()) * 0.1 np.testing.assert_array_almost_equal(ob['robot_0']['joint_state']['position'], setup[1], decimal=2) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) assert isinstance(ob['robot_0'], dict) assert isinstance(ob['robot_0']['joint_state']['position'], np.ndarray) assert isinstance(ob['robot_0']['joint_state']['velocity'], np.ndarray) diff --git a/urdfenvs/urdf_common/urdf_env.py b/urdfenvs/urdf_common/urdf_env.py index aaf5bb22..8799d7f3 100644 --- a/urdfenvs/urdf_common/urdf_env.py +++ b/urdfenvs/urdf_common/urdf_env.py @@ -247,7 +247,9 @@ def step(self, action): if self._render: self.render() - return ob, reward, self._done, self._info + terminated = self._done + truncated = False + return ob, reward, terminated, truncated, self._info def _get_ob(self) -> dict: """Compose the observation.""" @@ -538,7 +540,7 @@ def reset( vel: np.ndarray = None, mount_positions: np.ndarray = None, mount_orientations: np.ndarray = None, - ) -> dict: + ) -> tuple: """Resets the simulation and the robot. Parameters @@ -581,7 +583,8 @@ def reset( ) self.reset_obstacles() self.reset_goals() - return self._get_ob() + info = {} + return self._get_ob(), info def render(self) -> None: """Rendering the simulation environment. From 2c483742c3ce8fa5f89c4cb4ee0235e9c22c63de Mon Sep 17 00:00:00 2001 From: Max Spahn Date: Wed, 14 Jun 2023 11:37:59 +0200 Subject: [PATCH 03/11] Makes tiago and panda example visible through toggling the render flag. --- examples/panda_reacher.py | 2 +- examples/tiago.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/panda_reacher.py b/examples/panda_reacher.py index 1f9115c2..636184de 100644 --- a/examples/panda_reacher.py +++ b/examples/panda_reacher.py @@ -52,4 +52,4 @@ def run_panda(n_steps=1000, render=False, goal=True, obstacles=True): if __name__ == "__main__": - run_panda(render=False) + run_panda(render=True) diff --git a/examples/tiago.py b/examples/tiago.py index 21de7401..d0a43e66 100644 --- a/examples/tiago.py +++ b/examples/tiago.py @@ -64,4 +64,4 @@ def run_tiago(n_steps=1000, render=False, goal=True, obstacles=True): if __name__ == "__main__": - run_tiago(render=False) + run_tiago(render=True) From ecf81f134ad330a800afebe68ee228ef0b24e8bf Mon Sep 17 00:00:00 2001 From: BehradX Date: Wed, 14 Jun 2023 18:38:02 +0330 Subject: [PATCH 04/11] =?UTF-8?q?=F0=9F=9B=A0=EF=B8=8F=20Added=20`seed`=20?= =?UTF-8?q?and=20`options`=20parameter=20to=20`reset`=20method.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added `env_checker` file. --- .../reinforcement learning/env_checker.py | 71 +++++++++++++++++++ urdfenvs/urdf_common/urdf_env.py | 6 +- 2 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 examples/reinforcement learning/env_checker.py diff --git a/examples/reinforcement learning/env_checker.py b/examples/reinforcement learning/env_checker.py new file mode 100644 index 00000000..8ca0d6db --- /dev/null +++ b/examples/reinforcement learning/env_checker.py @@ -0,0 +1,71 @@ +# Basic imports +import gymnasium +import numpy as np +import os +from gymnasium.wrappers import FlattenObservation + +# Stable baselines 3 +''' +Stable baselines 3 has a build-in function called `check_env` that checks if the environment is compatible with the library. +It checks the following: + - Observation space + - Action space + - Reward range + - Whether the environment is vectorized or not + - Whether the environment uses a `Dict` or `Tuple` observation space + - Whether the environment uses a `Dict` or `Tuple` action space + - Whether the environment uses a `VecEnv` or not + - Whether the environment uses a `VecNormalize` wrapper or not + - Whether the environment uses a `FlattenObservation` wrapper or not + - Whether the environment uses a `FrameStack` wrapper or not + - Whether the environment uses a `TimeLimit` wrapper or not + - Whether the environment uses a `Monitor` wrapper or not + - Whether the environment uses a `VecFrameStack` wrapper or not + - Whether the environment uses a `VecTransposeImage` wrapper or not +''' +from stable_baselines3.common.env_checker import check_env + + +# URDF Envs +from urdfenvs.urdf_common.urdf_env import UrdfEnv +from urdfenvs.robots.generic_urdf import GenericUrdfReacher +from urdfenvs.sensors.full_sensor import FullSensor +from urdfenvs.scene_examples.goal import goal1 +from urdfenvs.urdf_common.reward import Reward + +class InverseDistanceDenseReward(Reward): + def calculateReward(self, observation: dict) -> float: + goal = observation['robot_0']['FullSensor']['goals'][1]['position'] + position = observation['robot_0']['joint_state']['position'] + reward = 1.0/np.linalg.norm(goal-position) + print(f'🏆 Reward is: {reward}') + return reward + + +robots = [ + GenericUrdfReacher(urdf="pointRobot.urdf", mode="vel"), + ] +env= UrdfEnv( + dt=0.01, + robots=robots, + render=False, +) + + +env.add_goal(goal1) +sensor = FullSensor(['position'], ['position', 'size'], variance=0.0) +env.add_sensor(sensor, [0]) +env.set_spaces() +env.set_reward_calculator(InverseDistanceDenseReward()) +defaultAction = np.array([0.5, -0.0, 0.0]) +pos0 = np.array([0.0, 0.1, 0.0]) +vel0 = np.array([1.0, 0.0, 0.0]) + +ob = env.reset(pos=pos0, vel=vel0) +env.shuffle_goals() + +env = FlattenObservation(env) + +print("🏁 Check Env Started.") +check_env(env, warn=True) +print("🏁 Check Env Finished.") \ No newline at end of file diff --git a/urdfenvs/urdf_common/urdf_env.py b/urdfenvs/urdf_common/urdf_env.py index 8799d7f3..fc1b68e5 100644 --- a/urdfenvs/urdf_common/urdf_env.py +++ b/urdfenvs/urdf_common/urdf_env.py @@ -536,6 +536,8 @@ def add_sensor(self, sensor: Sensor, robot_ids: List) -> None: def reset( self, + seed = None, + options = None, pos: np.ndarray = None, vel: np.ndarray = None, mount_positions: np.ndarray = None, @@ -557,6 +559,7 @@ def reset( Mounting position for the robots This is ignored for mobile robots """ + super().reset(seed=seed, options=options) self._t = 0.0 if mount_positions is None: mount_positions = np.tile(np.zeros(3), (len(self._robots), 1)) @@ -583,8 +586,7 @@ def reset( ) self.reset_obstacles() self.reset_goals() - info = {} - return self._get_ob(), info + return self._get_ob(), self._info def render(self) -> None: """Rendering the simulation environment. From 26247cb3d3c8decb51a253ec147616b18cb30cb9 Mon Sep 17 00:00:00 2001 From: BehradX Date: Wed, 14 Jun 2023 18:47:52 +0330 Subject: [PATCH 05/11] =?UTF-8?q?=F0=9F=94=A8=20Removed=20Extra=20Stuff=20?= =?UTF-8?q?from=20Reward=20File?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- urdfenvs/urdf_common/reward.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/urdfenvs/urdf_common/reward.py b/urdfenvs/urdf_common/reward.py index 6e5af2bb..18388113 100644 --- a/urdfenvs/urdf_common/reward.py +++ b/urdfenvs/urdf_common/reward.py @@ -1,13 +1,5 @@ from abc import ABC, abstractmethod -from typing import List -from urdfenvs.sensors.sensor import Sensor - class Reward(ABC): @abstractmethod - # TODO: Get a clear list of things that could be needed for calculating reward. def calculateReward(self, observation) -> float: pass - -# class Reward1(Reward): -# def calculateReward(self, sensors: list[Sensor]) -> float: -# return 1 From a7dd1601d4ce21439cb762ed1ce6378fc837ab56 Mon Sep 17 00:00:00 2001 From: BehradX Date: Wed, 14 Jun 2023 18:56:10 +0330 Subject: [PATCH 06/11] =?UTF-8?q?=E2=9E=95=20Added=20a=20Wrapper=20for=20A?= =?UTF-8?q?ction=20Space?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unfortunately, Stable Baselines3 has a problem with `float64` action spaces in DDPG, TD3 and SAC algorithms. This wrapper solves that problem by converting the `dType` of action space to `float32`. --- .../wrappers/sb3_float32_action_wrapper.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 urdfenvs/wrappers/sb3_float32_action_wrapper.py diff --git a/urdfenvs/wrappers/sb3_float32_action_wrapper.py b/urdfenvs/wrappers/sb3_float32_action_wrapper.py new file mode 100644 index 00000000..e9d29f63 --- /dev/null +++ b/urdfenvs/wrappers/sb3_float32_action_wrapper.py @@ -0,0 +1,25 @@ +import gymnasium as gym +import numpy as np + +class SB3Float32ActionWrapper(gym.Wrapper): + """ + A wrapper to convert action space to float32. + + Unfortunately, Stable Baselines3 has a problem with `float64` + action spaces in DDPG, TD3 and SAC algorithms. This wrapper + solves that problem by converting the `dType` of action space + to `float32`. Use it like `env = SB3Float32ActionWrapper(env)`. + + """ + def __init__(self, env): + super().__init__(env) + self.action_space = gym.spaces.Box( + low=self.env.action_space.low.astype(np.float32), + high=self.env.action_space.high.astype(np.float32), + dtype=np.float32, + ) + + def step(self, action): + action = action.astype(np.float32) + obs, reward, done, truncated, info = self.env.step(action) + return obs, reward, done, truncated, info \ No newline at end of file From 15f1c983fab0008d49126a0d5b93c3faffd3d96a Mon Sep 17 00:00:00 2001 From: BehradX Date: Wed, 14 Jun 2023 19:04:02 +0330 Subject: [PATCH 07/11] =?UTF-8?q?=F0=9F=94=A8=20Minor=20Changes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Made `truncated` a method instead of a fixed variable. --- urdfenvs/urdf_common/urdf_env.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/urdfenvs/urdf_common/urdf_env.py b/urdfenvs/urdf_common/urdf_env.py index fc1b68e5..e1054e07 100644 --- a/urdfenvs/urdf_common/urdf_env.py +++ b/urdfenvs/urdf_common/urdf_env.py @@ -247,10 +247,15 @@ def step(self, action): if self._render: self.render() + terminated = self._done - truncated = False + truncated = self._getTruncated() return ob, reward, terminated, truncated, self._info - + + def _getTruncated(self) -> bool: + # TODO: Implement Truncated. + return False + def _get_ob(self) -> dict: """Compose the observation.""" observation = {} From 00e3f16c50312a3ef754b81d404cb5cec1440bec Mon Sep 17 00:00:00 2001 From: BehradX Date: Thu, 15 Jun 2023 14:28:36 +0330 Subject: [PATCH 08/11] =?UTF-8?q?=F0=9F=94=A8=20Removed=20`env=5Fchecker`?= =?UTF-8?q?=20file.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../reinforcement learning/env_checker.py | 71 ------------------- 1 file changed, 71 deletions(-) delete mode 100644 examples/reinforcement learning/env_checker.py diff --git a/examples/reinforcement learning/env_checker.py b/examples/reinforcement learning/env_checker.py deleted file mode 100644 index 8ca0d6db..00000000 --- a/examples/reinforcement learning/env_checker.py +++ /dev/null @@ -1,71 +0,0 @@ -# Basic imports -import gymnasium -import numpy as np -import os -from gymnasium.wrappers import FlattenObservation - -# Stable baselines 3 -''' -Stable baselines 3 has a build-in function called `check_env` that checks if the environment is compatible with the library. -It checks the following: - - Observation space - - Action space - - Reward range - - Whether the environment is vectorized or not - - Whether the environment uses a `Dict` or `Tuple` observation space - - Whether the environment uses a `Dict` or `Tuple` action space - - Whether the environment uses a `VecEnv` or not - - Whether the environment uses a `VecNormalize` wrapper or not - - Whether the environment uses a `FlattenObservation` wrapper or not - - Whether the environment uses a `FrameStack` wrapper or not - - Whether the environment uses a `TimeLimit` wrapper or not - - Whether the environment uses a `Monitor` wrapper or not - - Whether the environment uses a `VecFrameStack` wrapper or not - - Whether the environment uses a `VecTransposeImage` wrapper or not -''' -from stable_baselines3.common.env_checker import check_env - - -# URDF Envs -from urdfenvs.urdf_common.urdf_env import UrdfEnv -from urdfenvs.robots.generic_urdf import GenericUrdfReacher -from urdfenvs.sensors.full_sensor import FullSensor -from urdfenvs.scene_examples.goal import goal1 -from urdfenvs.urdf_common.reward import Reward - -class InverseDistanceDenseReward(Reward): - def calculateReward(self, observation: dict) -> float: - goal = observation['robot_0']['FullSensor']['goals'][1]['position'] - position = observation['robot_0']['joint_state']['position'] - reward = 1.0/np.linalg.norm(goal-position) - print(f'🏆 Reward is: {reward}') - return reward - - -robots = [ - GenericUrdfReacher(urdf="pointRobot.urdf", mode="vel"), - ] -env= UrdfEnv( - dt=0.01, - robots=robots, - render=False, -) - - -env.add_goal(goal1) -sensor = FullSensor(['position'], ['position', 'size'], variance=0.0) -env.add_sensor(sensor, [0]) -env.set_spaces() -env.set_reward_calculator(InverseDistanceDenseReward()) -defaultAction = np.array([0.5, -0.0, 0.0]) -pos0 = np.array([0.0, 0.1, 0.0]) -vel0 = np.array([1.0, 0.0, 0.0]) - -ob = env.reset(pos=pos0, vel=vel0) -env.shuffle_goals() - -env = FlattenObservation(env) - -print("🏁 Check Env Started.") -check_env(env, warn=True) -print("🏁 Check Env Finished.") \ No newline at end of file From 60c218ac835bc7c6bc8d6734e84cd932222737e0 Mon Sep 17 00:00:00 2001 From: BehradX Date: Thu, 15 Jun 2023 14:30:40 +0330 Subject: [PATCH 09/11] =?UTF-8?q?=F0=9F=94=A8=20Converted=20variable=20nam?= =?UTF-8?q?es=20from=20camel=20case=20to=20snake=20case?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/point_robot_reward.py | 2 +- urdfenvs/urdf_common/reward.py | 2 +- urdfenvs/urdf_common/urdf_env.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/point_robot_reward.py b/examples/point_robot_reward.py index fda4bb89..e015a225 100644 --- a/examples/point_robot_reward.py +++ b/examples/point_robot_reward.py @@ -10,7 +10,7 @@ class InverseDistanceDenseReward(Reward): - def calculateReward(self, observation: dict) -> float: + def calculate_reward(self, observation: dict) -> float: goal = observation['robot_0']['FullSensor']['goals'][1]['position'] position = observation['robot_0']['joint_state']['position'] return 1.0/np.linalg.norm(goal-position) diff --git a/urdfenvs/urdf_common/reward.py b/urdfenvs/urdf_common/reward.py index 18388113..c406e15a 100644 --- a/urdfenvs/urdf_common/reward.py +++ b/urdfenvs/urdf_common/reward.py @@ -2,4 +2,4 @@ class Reward(ABC): @abstractmethod - def calculateReward(self, observation) -> float: pass + def calculate_reward(self, observation) -> float: pass diff --git a/urdfenvs/urdf_common/urdf_env.py b/urdfenvs/urdf_common/urdf_env.py index e1054e07..e6be0f4d 100644 --- a/urdfenvs/urdf_common/urdf_env.py +++ b/urdfenvs/urdf_common/urdf_env.py @@ -241,7 +241,7 @@ def step(self, action): # Calculate the reward. # If there is no reward object, then the reward is 1.0. if self._reward_calculator is not None: - reward = self._reward_calculator.calculateReward(ob) + reward = self._reward_calculator.calculate_reward(ob) else: reward = 1.0 @@ -249,10 +249,10 @@ def step(self, action): self.render() terminated = self._done - truncated = self._getTruncated() + truncated = self._get_truncated() return ob, reward, terminated, truncated, self._info - def _getTruncated(self) -> bool: + def _get_truncated(self) -> bool: # TODO: Implement Truncated. return False From 19d349a3c07610572184ef2ebc55ddf69e99fa8b Mon Sep 17 00:00:00 2001 From: BehradX Date: Thu, 15 Jun 2023 14:31:54 +0330 Subject: [PATCH 10/11] =?UTF-8?q?=F0=9F=94=A8=20Added=20Type=20Annotations?= =?UTF-8?q?=20to=20`reset`=20Method=20Parameters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- urdfenvs/urdf_common/urdf_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/urdfenvs/urdf_common/urdf_env.py b/urdfenvs/urdf_common/urdf_env.py index e6be0f4d..e4312ab0 100644 --- a/urdfenvs/urdf_common/urdf_env.py +++ b/urdfenvs/urdf_common/urdf_env.py @@ -541,8 +541,8 @@ def add_sensor(self, sensor: Sensor, robot_ids: List) -> None: def reset( self, - seed = None, - options = None, + seed: int = None, + options: dict = None, pos: np.ndarray = None, vel: np.ndarray = None, mount_positions: np.ndarray = None, From b3ad4aeb636711d4b3dfc57164e90ce6f824642f Mon Sep 17 00:00:00 2001 From: Max Spahn Date: Sun, 18 Jun 2023 23:44:03 +0200 Subject: [PATCH 11/11] Fixes panda reacher. Changes number of return values for step function. --- examples/panda_reacher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/panda_reacher.py b/examples/panda_reacher.py index b6a3cd0d..fa701531 100644 --- a/examples/panda_reacher.py +++ b/examples/panda_reacher.py @@ -46,7 +46,7 @@ def run_panda(n_steps=1000, render=False, goal=True, obstacles=True): action[7] = 0.02 action[8] = 0.02 collision_links_position: dict = env.collision_links_poses(position_only=True) - ob, _, _, _ = env.step(action) + ob, *_ = env.step(action) history.append(ob) env.close() return history