From 2daeebec2b554130e4a74f646d8b377fb7c55ede Mon Sep 17 00:00:00 2001
From: Benjamin Piwowarski <benjamin@piwowarski.fr>
Date: Tue, 27 Aug 2024 15:29:30 +0200
Subject: [PATCH] Bug fix and tests

---
 README.md                            | 133 +++++++++++++++++++++------
 src/pystk2_gymnasium/envs.py         |   9 +-
 src/pystk2_gymnasium/stk_wrappers.py |   3 +-
 src/pystk2_gymnasium/wrappers.py     |  11 ++-
 tests/test_consistency.py            |  18 +++-
 5 files changed, 141 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index c6d3c20..69075a0 100644
--- a/README.md
+++ b/README.md
@@ -1,72 +1,150 @@
 # PySuperTuxKart gymnasium wrapper
 
-[![PyPI version](https://badge.fury.io/py/pystk2-gymnasium.svg)](https://badge.fury.io/py/pystk2-gymnasium)
+[![PyPI
+version](https://badge.fury.io/py/pystk2-gymnasium.svg)](https://badge.fury.io/py/pystk2-gymnasium)
 
 Read the [Changelog](./CHANGELOG.md)
 
 ## Install
 
-The PySuperKart2 gymnasium wrapper is a Python package, so installing is fairly easy
+The PySuperKart2 gymnasium wrapper is a Python package, so installing is fairly
+easy
 
 `pip install pystk2-gymnasium`
 
-Note that during the first run, SuperTuxKart assets are downloaded in the cache directory.
+Note that during the first run, SuperTuxKart assets are downloaded in the cache
+directory.
 
 ## AgentSpec
 
 Each controlled kart is parametrized by `pystk2_gymnasium.AgentSpec`:
 
 - `name` defines name of the player (displayed on top of the kart)
-- `rank_start` defines the starting position (None for random, which is the default)
-- `use_ai` flag (False by default) to ignore actions (when calling `step`, and use a SuperTuxKart bot)
-- `camera_mode` can be set to `AUTO` (camera on for non STK bots), `ON` (camera on) or `OFF` (no camera).
+- `rank_start` defines the starting position (None for random, which is the
+  default)
+- `use_ai` flag (False by default) to ignore actions (when calling `step`, and
+  use a SuperTuxKart bot)
+- `camera_mode` can be set to `AUTO` (camera on for non STK bots), `ON` (camera
+  on) or `OFF` (no camera).
+
 
 
 ## Environments
 
+Limitations:
 
-*Warning* only one SuperTuxKart environment can be created for now. Moreover, no graphics information
-is available for now.
+-  only one SuperTuxKart environment can be created for now
+-  no graphics information is available (i.e. pixmap)
 
 After importing `pystk2_gymnasium`, the following environments are available:
 
-- `supertuxkart/full-v0` is the main environment containing complete observations. The observation and action spaces are both dictionaries with continuous or discrete variables (see below). The exact structure can be found using `env.observation_space` and `env.action_space`. The following options can be used to modify the environment:
+- `supertuxkart/full-v0` is the main environment containing complete
+  observations. The observation and action spaces are both dictionaries with
+  continuous or discrete variables (see below). The exact structure can be found
+  using `env.observation_space` and `env.action_space`. The following options
+  can be used to modify the environment:
     - `agent` is an `AgentSpec (see above)`
     - `render_mode` can be None or `human`
-    - `track` defines the SuperTuxKart track to use (None for random). The full list can be found in `STKRaceEnv.TRACKS` after initialization with `initialize.initialize(with_graphics: bool)` has been called.
+    - `track` defines the SuperTuxKart track to use (None for random). The full
+      list can be found in `STKRaceEnv.TRACKS` after initialization with
+      `initialize.initialize(with_graphics: bool)` has been called.
     - `num_kart` defines the number of karts on the track (3 by default)
-    - `max_paths` the maximum number of the (nearest) paths (a track is made of paths) to consider in the observation state
+    - `max_paths` the maximum number of the (nearest) paths (a track is made of
+      paths) to consider in the observation state
     - `laps` is the number of laps (1 by default)
-    - `difficulty` is the difficulty of the AI bots (lowest 0 to highest 2, default to 2)
-- `supertuxkart/simple-v0` is a simplified environment with a fixed number of observations for paths (controlled by `state_paths`, default 5), items (`state_items`, default 5), karts (`state_karts`, default 5)
-- `supertuxkart/flattened-v0` has observation and action spaces simplified at the maximum (only `discrete` and `continuous` keys)
-- `supertuxkart/flattened_continuous_actions-v0` removes discrete actions (default to 0) so this is steer/acceleration only in the continuous domain
-- `supertuxkart/flattened_multidiscrete-v0` is like the previous one, but with fully multi-discrete actions. `acceleration_steps` and `steer_steps` (default to 5) control the number of discrete values for acceleration and steering respectively.
-- `supertuxkart/flattened_discrete-v0` is like the previous one, but with fully discretized actions
+    - `difficulty` is the difficulty of the AI bots (lowest 0 to highest 2,
+      default to 2)
+
+Some environments are created using wrappers,
+- `supertuxkart/simple-v0` is a simplified environment with a fixed number of
+  observations for paths (controlled by `state_paths`, default 5), items
+  (`state_items`, default 5), karts (`state_karts`, default 5)
+- `supertuxkart/flattened-v0` has observation and action spaces simplified at
+  the maximum (only `discrete` and `continuous` keys)
+- `supertuxkart/flattened_continuous_actions-v0` removes discrete actions
+  (default to 0) so this is steer/acceleration only in the continuous domain
+- `supertuxkart/flattened_multidiscrete-v0` is like the previous one, but with
+  fully multi-discrete actions. `acceleration_steps` and `steer_steps` (default
+  to 5) control the number of discrete values for acceleration and steering
+  respectively.
+- `supertuxkart/flattened_discrete-v0` is like the previous one, but with fully
+  discretized actions
 
 The reward $r_t$ at time $t$ is given by
 
-$$ r_{t} =  \frac{1}{10}(d_{t} - d_{t-1}) + (1 - \frac{\mathrm{pos}_t}{K}) \times (3 + 7 f_t) - 0.1 + 10 * f_t $$
+$$ r_{t} =  \frac{1}{10}(d_{t} - d_{t-1}) + (1 - \frac{\mathrm{pos}_t}{K})
+\times (3 + 7 f_t) - 0.1 + 10 * f_t $$
+
+where $d_t$ is the overall track distance at time $t$, $\mathrm{pos}_t$ the
+position among the $K$ karts at time $t$, and $f_t$ is $1$ when the kart
+finishes the race.
+
+## Wrappers
+
+Wrappers can be used to modify the environment.
+
+### ConstantSizedObservations
+
+Ensures that the number of observed items is constant (e.g. for other karts,
+tracks).
+
+### PolarObservations
+
+Changes Cartesian coordinates to Polar ones.
+
+### FlattenerWrapper
+
+Flattens actions and observations
+
+### FlattenMultiDiscreteActions
 
-where $d_t$ is the
-overall track distance at time $t$, $\mathrm{pos}_t$ the position among the $K$ karts at time $t$, and $f_t$ is $1$ when the kart finishes the race.
 
 ## Multi-agent environment
 
 `supertuxkart/multi-full-v0` can be used to control multiple karts. It takes an
-`agents` parameter that is a list of `AgentSpec`. Observations and actions are a dictionary of single-kart ones where **string** keys that range from `0` to `n-1` with `n` the number of karts.
+`agents` parameter that is a list of `AgentSpec`. Observations and actions are a
+dictionary of single-kart ones where **string** keys that range from `0` to
+`n-1` with `n` the number of karts.
 
 To use different gymnasium wrappers, one can use a `MonoAgentWrapperAdapter`.
+Example
+
+```py
+agents = [
+    AgentSpec(use_ai=True, name="Yin Team", camera_mode=CameraMode.ON),
+    AgentSpec(use_ai=True, name="Yang Team", camera_mode=CameraMode.ON),
+    AgentSpec(use_ai=True, name="Zen Team", camera_mode=CameraMode.ON)
+]
+
+wrappers = [
+    partial(MonoAgentWrapperAdapter, wrapper_factories={
+        "0": lambda env: ConstantSizedObservations(env),
+        "1": lambda env: PolarObservations(ConstantSizedObservations(env)),
+        "2": lambda env: PolarObservations(ConstantSizedObservations(env))
+    }),
+]
+
+make_stkenv = partial(
+    make_env,
+    "supertuxkart/multi-full-v0",
+    render_mode="human",
+    num_kart=5,
+    agents=agents,
+    wrappers=wrappers
+)
+```
 
 ## Action and observation space
 
-All the 3D vectors are within the kart referential (`z` front, `x` left, `y` up):
+All the 3D vectors are within the kart referential (`z` front, `x` left, `y`
+up):
 
 - `distance_down_track`: The distance from the start
 - `energy`: remaining collected energy
 - `front`: front of the kart (3D vector)
 - `items_position`: position of the items (3D vectors)
-- `attachment`: the item attached to the kart (bonus box, banana, nitro/big, nitro/small, bubble gum, easter egg)
+- `attachment`: the item attached to the kart (bonus box, banana, nitro/big,
+  nitro/small, bubble gum, easter egg)
 - `attachment_time_left`: how much time the attachment will be kept
 - `items_type`: type of the item
 - `jumping`: is the kart jumping
@@ -74,9 +152,10 @@ All the 3D vectors are within the kart referential (`z` front, `x` left, `y` up)
 - `max_steer_angle` the max angle of the steering (given the current speed)
 - `distance_center_path`: distance to the center of the path
 - `paths_distance`: the distance of the paths
-- `paths_start`, `paths_end`, `paths_width`: 3D vector to the paths start and end, with their widths (sccalar)
-- `paths_start`: 3D vectors to the the path s
-- `powerup`
+- `paths_start`, `paths_end`, `paths_width`: 3D vectors to the paths start and
+  end, and vector of their widths (scalar). The paths are sorted so that the
+  first element of the array is the current one.
+- `powerup`: collected power-up
 - `shield_time`
 - `skeed_factor`
 - `velocity`: velocity vector
@@ -91,7 +170,7 @@ from pystk2_gymnasium import AgentSpec
 # In both case, this corresponds to a dictionary with two keys:
 # - `continuous` is a vector corresponding to the continuous observations
 # - `discrete` is a vector (of integers) corresponding to discrete observations
-env = gym.make("supertuxkart/flattened-v0", render_mode="human", agent=AgentSpec(use_ai=False))
+env = gym.make("supertuxkart/flattened-v0", render_mode="human", agents=[AgentSpec(use_ai=False)])
 
 ix = 0
 done = False
diff --git a/src/pystk2_gymnasium/envs.py b/src/pystk2_gymnasium/envs.py
index 7ef9628..a9993bc 100644
--- a/src/pystk2_gymnasium/envs.py
+++ b/src/pystk2_gymnasium/envs.py
@@ -293,6 +293,7 @@ def list_permute(list, sort_ix):
             list[:] = (list[ix] for ix in sort_ix)
 
         def sort_closest(positions, *lists):
+            # z axis is front
             distances = [np.linalg.norm(p) * np.sign(p[2]) for p in positions]
 
             # Change distances: if d < 0, d <- -d+max_d+1
@@ -328,18 +329,20 @@ def sort_closest(positions, *lists):
         )
 
         # Add action if using AI bot
+        # (this corresponds to the action before the observation)
         obs = {}
         if use_ai:
+            # Adds actions
             action = self.race.get_kart_action(kart_ix)
             obs = {
                 "action": {
+                    "acceleration": np.array([action.acceleration], dtype=np.float32),
                     "brake": action.brake,
-                    "nitro": action.nitro,
                     "drift": action.drift,
-                    "rescue": action.rescue,
                     "fire": action.fire,
+                    "nitro": action.nitro,
+                    "rescue": action.rescue,
                     "steer": np.array([action.steer], dtype=np.float32),
-                    "acceleration": np.array([action.acceleration], dtype=np.float32),
                 }
             }
 
diff --git a/src/pystk2_gymnasium/stk_wrappers.py b/src/pystk2_gymnasium/stk_wrappers.py
index f02b5ce..c444ea9 100644
--- a/src/pystk2_gymnasium/stk_wrappers.py
+++ b/src/pystk2_gymnasium/stk_wrappers.py
@@ -124,7 +124,7 @@ class STKDiscreteAction(STKAction):
 
 class DiscreteActionsWrapper(ActionObservationWrapper):
     # Wraps the actions
-    def __init__(self, env: gym.Env, *, acceleration_steps=5, steer_steps=5, **kwargs):
+    def __init__(self, env: gym.Env, *, acceleration_steps=5, steer_steps=10, **kwargs):
         super().__init__(env, **kwargs)
 
         self._action_space = copy.deepcopy(env.action_space)
@@ -138,6 +138,7 @@ def __init__(self, env: gym.Env, *, acceleration_steps=5, steer_steps=5, **kwarg
         self._action_space["steer"] = self.d_steer.space
 
         if "action" in self.observation_space:
+            # When using AI, "action" is part of the observation space
             self._observation_space = copy.deepcopy(self.observation_space)
             self._observation_space["action"]["steer"] = self.d_steer.space
             self._observation_space["action"][
diff --git a/src/pystk2_gymnasium/wrappers.py b/src/pystk2_gymnasium/wrappers.py
index efac909..65568c4 100644
--- a/src/pystk2_gymnasium/wrappers.py
+++ b/src/pystk2_gymnasium/wrappers.py
@@ -19,6 +19,13 @@
 
 
 class SpaceFlattener:
+    """Flattens an observation or action space
+
+    If the space has discrete and continuous values, returns
+    a dictionary with "continuous" and "discrete" keys – each associated with
+    a flattened observation or action. Otherwise, returns the flattened space itself.
+    """
+
     def __init__(self, space: gym.Space):
         # Flatten the observation space
         self.continuous_keys = []
@@ -31,7 +38,7 @@ def __init__(self, space: gym.Space):
         highs = []
         counts = []
 
-        # Combine keys (sort them before hand)
+        # Combine keys (sort them beforehand so we always have the same order)
         for key, value in sorted(space.items(), key=lambda x: x[0]):
             # Ignore the AI action
             if key == "action":
@@ -75,6 +82,8 @@ def __init__(self, space: gym.Space):
 
 
 class FlattenerWrapper(ActionObservationWrapper):
+    """Flattens actions and observations."""
+
     def __init__(self, env: gym.Env):
         super().__init__(env)
 
diff --git a/tests/test_consistency.py b/tests/test_consistency.py
index 08f4895..cdd911d 100644
--- a/tests/test_consistency.py
+++ b/tests/test_consistency.py
@@ -1,6 +1,8 @@
+import gymnasium
 import numpy as np
+import numpy.testing
 import pystk2
-from pystk2_gymnasium.utils import rotate
+from pystk2_gymnasium.utils import Discretizer, rotate
 from pystk2_gymnasium.envs import STKRaceEnv
 
 
@@ -22,3 +24,17 @@ def test_rotation():
         if race is not None:
             race.stop()
             del race
+
+
+def test_discretizer():
+    k = 5
+
+    discretizer = Discretizer(gymnasium.spaces.Box(-1, 1, shape=(1,)), k)
+    step = 2.0 / (k - 1)
+
+    for j in range(k):
+        assert discretizer.discretize(discretizer.continuous(j)) == j, f"For index {j}"
+
+    for x in np.arange(-1, 1, step):
+        xhat = discretizer.continuous(discretizer.discretize(x))
+        assert np.abs(xhat - x) < step, f"For value {x} vs {xhat}"