diff --git a/.deepsource.toml b/.deepsource.toml
index 0ac63d33..ae408557 100644
--- a/.deepsource.toml
+++ b/.deepsource.toml
@@ -1,5 +1,12 @@
 version = 1
 
+exclude_patterns = [
+  "examples/**",
+  "gym/f110_gym/**",
+  "gym_env/share/**",
+  "docs/**"
+]
+
 [[analyzers]]
 name = "python"
 
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4d5256ea..bda13323 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,28 +2,41 @@ name: CI
 
 on:
   push:
-    branches: [ main ]
   pull_request:
     branches: [ main ]
 
 jobs:
-  build:
-
+  build-gym:
     runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.8", "3.9"]
-
     steps:
     - uses: actions/checkout@v2
-    
-    - name: Set up Python ${{ matrix.python-version }}
+    - name: Set up Python 3.8
       uses: actions/setup-python@v2
       with:
-        python-version: ${{ matrix.python-version }}
-        
+        python-version: '3.8'
     - name: Install
       run: |
         python -m pip install --upgrade pip
         pip install -e .
+  
+  build-ros:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.8'
+    - name: Install
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e ./model-gym-ros-env/car_node/car_node
+
+  build-image:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+        
+    - name: Build Docker image
+      run: |
+        docker build .
diff --git a/.gitignore b/.gitignore
index 1147da41..4f965255 100644
--- a/.gitignore
+++ b/.gitignore
@@ -202,6 +202,9 @@ runs/
 PPO_*/
 
 
+train_test/PPO_*/
+
+
 ### VirtualEnv ###
 # Virtualenv
 # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
diff --git a/Dockerfile-gym-ros b/Dockerfile-gym-ros
new file mode 100644
index 00000000..67fcf1a4
--- /dev/null
+++ b/Dockerfile-gym-ros
@@ -0,0 +1,102 @@
+# MIT License
+
+# Copyright (c) 2020 Hongrui Zheng
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+FROM ros:foxy
+
+SHELL ["/bin/bash", "-c"]
+
+# dependencies
+RUN apt-get update --fix-missing && \
+    apt-get install -y git \
+    nano \
+    python3-pip \
+    libeigen3-dev \
+    tmux \
+    ros-foxy-rviz2
+RUN apt-get -y dist-upgrade
+RUN pip3 install transforms3d
+
+# f1tenth gym
+RUN git clone https://github.com/f1tenth/f1tenth_gym
+RUN cd f1tenth_gym && \
+    pip3 install -e .
+
+# ros2 gym bridge
+# RUN mkdir -p sim_ws/src/f1tenth_gym_ros
+# COPY ./f1tenth_gym_ros /sim_ws/src/f1tenth_gym_ros
+RUN git clone https://github.com/f1tenth/f1tenth_gym_ros.git /sim_ws/src/f1tenth_gym_ros && \
+    cd /sim_ws/src/f1tenth_gym_ros && \
+    git checkout 910789ad9029839abda0d7b6d66f46945fe5cef0
+RUN source /opt/ros/foxy/setup.bash && \
+    cd sim_ws/ && \
+    apt-get update --fix-missing && \
+    rosdep install -i --from-path src --rosdistro foxy -y && \
+    colcon build
+
+# our dependencies
+RUN echo "source /opt/ros/foxy/setup.bash" >> ~/.bashrc && \
+    echo "source /sim_ws/install/setup.bash" >> ~/.bashrc && \
+    pip install --upgrade pip && \
+    # ROS2 packages
+    source /opt/ros/foxy/setup.bash && \
+    ros2 pkg create --destination-directory /sim_ws/src car_node --build-type ament_python --dependencies rclpy && \
+    # f1tenth gym env branch v1.0.0
+    rm -rf /sim_ws/src/car_node/car_node && \
+    git clone --depth=1 https://github.com/f1tenth/f1tenth_gym.git /sim_ws/src/car_node/car_node && \
+    cd /sim_ws/src/car_node/car_node && \
+    git checkout cd56335eda43ff4e401331c461877227474a3ed4 && \
+    touch /sim_ws/src/car_node/car_node/__init__.py && \
+    # Put all maps maps in f1tenth gym ros
+    cd / && \
+    git clone --single-branch --branch v1.0.0 https://github.com/f1tenth/f1tenth_racetracks.git && \
+    cp f1tenth_racetracks/*/*.{png,yaml} /sim_ws/src/f1tenth_gym_ros/maps/ && \
+    # Put all maps in car_node
+    mkdir -p /sim_ws/src/car_node/car_node/map/ && \
+    mv f1tenth_racetracks /sim_ws/src/car_node/car_node/map/
+
+COPY model-gym-ros-env/car_node/setup.py /sim_ws/src/car_node/setup.py
+COPY model-gym-ros-env/car_node/car_node/main.py model-gym-ros-env/car_node/car_node/wrapper.py model-gym-ros-env/car_node/car_node/map_utility.py model-gym-ros-env/car_node/car_node/setup.py /sim_ws/src/car_node/car_node/
+COPY train_test /sim_ws/src/car_node/car_node/train_test
+
+RUN pip install -e /sim_ws/src/car_node/car_node && \
+    # pip install stable_baselines3 && \
+    # pip install 'shimmy>=0.2.1' && \
+    source /opt/ros/foxy/setup.bash && \
+    cd sim_ws/ && \
+    colcon build
+
+# RUN pip install setuptools==65.7.0 && \
+#     pip install -e /sim_ws/src/car_node/car_node && \
+#     pip install stable_baselines3 && \
+#     pip install 'shimmy>=0.2.1' && \
+#     cd sim_ws/ && \
+#     colcon build
+
+#     pip install gym[atari,accept-rom-license]==0.19.0 && \
+#     pip install 'stable-baselines3[extra]' && \
+#     pip install -e /sim_ws/src/car_node/car_node && \
+#     source /opt/ros/foxy/setup.bash && \
+#     cd sim_ws/ && \
+#     colcon build
+
+WORKDIR '/sim_ws'
+ENTRYPOINT ["/bin/bash"]
diff --git a/docker-compose-ros-env.yml b/docker-compose-ros-env.yml
new file mode 100644
index 00000000..5d0fe869
--- /dev/null
+++ b/docker-compose-ros-env.yml
@@ -0,0 +1,46 @@
+# MIT License
+
+# Copyright (c) 2020 Hongrui Zheng
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+version: '3'
+services:
+  sim:
+    image: manuandru/f1tenth-gym-ros-model-env:latest
+    # volumes: 
+    #   - .:/sim_ws/src/f1tenth_gym_ros
+    #   - ../src:/sim_ws/src/
+    environment:
+     - DISPLAY=novnc:0.0
+    networks:
+      - x11
+    stdin_open: true
+    tty: true 
+  novnc:  
+    image: theasp/novnc:latest
+    environment:
+      - DISPLAY_WIDTH=1728
+      - DISPLAY_HEIGHT=972
+    ports:
+      - "8080:8080"
+    networks:
+      - x11
+networks:
+  x11:
diff --git a/f1tenth_gym-setup.md b/f1tenth_gym-setup.md
index 7fd4552d..c2ca89ac 100644
--- a/f1tenth_gym-setup.md
+++ b/f1tenth_gym-setup.md
@@ -8,7 +8,6 @@ cd f1tenth_gym
 conda create -n f1tenth_gym python=3.8
 conda activate f1tenth_gym
 pip install -e .
-pip install -r requirements.txt
 ```
 
 If there are any error during installation([source](https://github.com/freqtrade/freqtrade/issues/8376#issuecomment-1519257211)):
diff --git a/model-gym-ros-env/car_node/car_node/main.py b/model-gym-ros-env/car_node/car_node/main.py
new file mode 100644
index 00000000..bea425f0
--- /dev/null
+++ b/model-gym-ros-env/car_node/car_node/main.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+# from __future__ import print_function
+import numpy as np
+import rclpy
+from rclpy.node import Node
+
+#ROS Imports
+from sensor_msgs.msg import LaserScan
+from ackermann_msgs.msg import AckermannDriveStamped
+
+
+import gym
+from stable_baselines3 import PPO
+from car_node.wrapper import F110_Wrapped
+from f110_gym.envs.base_classes import Integrator
+from car_node.map_utility import get_map, get_formatted_map, map_ext
+from car_node.wrapper import convert_range
+
+timestep = 0.01
+tensorboard_path = './train_test/'
+map = "BrandsHatch"
+path = get_map(map)
+map_path = get_formatted_map(path)
+device = 'cpu'
+
+STEER_SCALE = 0.5
+SPEED_SCALE = 0.7
+
+class PPOModelEvaluator(Node):
+    
+    def __init__(self):
+        super().__init__('wall_follow_node')
+        
+        #Topics & Subs, Pubs
+        lidarscan_topic = '/scan'
+        drive_topic = '/drive'
+
+        # Model & Env
+        self.eval_env  = gym.make('f110_gym:f110-v0', map=map_path, map_ext=map_ext, num_agents=1, timestep=timestep, integrator=Integrator.RK4)
+        self.eval_env = F110_Wrapped(self.eval_env, random_map=False)
+        self.eval_env.set_map_path(path)
+        self.model = PPO.load("/sim_ws/src/car_node/car_node/train_test/best_global_model", self.eval_env, device=device)
+        self.vec_env = self.model.get_env()
+
+        # ROS 
+        self.lidar_sub = self.create_subscription(LaserScan, lidarscan_topic, self.lidar_callback, 10)
+        self.drive_pub = self.create_publisher(AckermannDriveStamped, drive_topic, 10)
+
+    def lidar_callback(self, data):
+        d = np.array(data.ranges, dtype=np.float64)
+        d = convert_range(d, [data.angle_min, data.angle_max], [-1, 1])
+        
+        action, _states = self.model.predict(d, deterministic=True)
+        action = self.eval_env.un_normalise_actions(action)
+        
+        self.get_logger().info(f'{action[0], action[1]}')
+        
+        drive_msg = AckermannDriveStamped()
+        drive_msg.header.stamp = self.get_clock().now().to_msg()
+        drive_msg.header.frame_id = "laser"
+        drive_msg.drive.steering_angle = action[0] * STEER_SCALE
+        drive_msg.drive.speed = action[1] * SPEED_SCALE
+        self.drive_pub.publish(drive_msg)
+
+def main(args=None):
+    rclpy.init(args=args)
+    node = PPOModelEvaluator()
+    rclpy.spin(node)
+    node.destroy_node()
+    rclpy.shutdown()
+
+if __name__=='__main__':
+    main()
diff --git a/model-gym-ros-env/car_node/car_node/map_utility.py b/model-gym-ros-env/car_node/car_node/map_utility.py
new file mode 100644
index 00000000..000b325b
--- /dev/null
+++ b/model-gym-ros-env/car_node/car_node/map_utility.py
@@ -0,0 +1,101 @@
+#get all the path of the map in ./map/f1tenth_racetracks
+import os
+import random
+import numpy as np
+
+
+wpt_delim= ';'
+wpt_rowskip= 3
+wpt_xind= 1
+wpt_yind= 2
+wpt_thind= 3
+wpt_vind= 5
+map_ext= '.png'
+
+def get_map_path():
+    path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+    map_path = "/sim_ws/src/car_node/car_node/map/f1tenth_racetracks"
+    return map_path
+
+def get_map_list():
+    map_path = get_map_path()
+    map_list = os.listdir(map_path)
+    filtered_map_list = []
+
+    for map_file in map_list:
+        if os.path.isdir(os.path.join(map_path, map_file)):
+            raceline_file = os.path.join(map_path, map_file, f"{map_file}_raceline.csv")
+            if os.path.exists(raceline_file):
+                filtered_map_list.append(map_file)
+
+    return filtered_map_list
+
+def get_map(map_name):
+    map_path = get_map_path()
+    map_file = map_path + "/" + map_name
+    return map_file
+
+def get_formatted_map(map_name):
+    map_file =  map_name + "/" + get_map_name(map_name) + "_map"
+    return map_file
+
+def get_map_name(map_file):
+    map_name = map_file.split("/")[-1]
+    return map_name
+
+def get_map_list_name():
+    map_list = get_map_list()
+    map_list_name = []
+    for map_file in map_list:
+        map_list_name.append(get_map_name(map_file))
+    return map_list_name
+
+def get_map_list_file():
+    map_list = get_map_list()
+    map_list_file = []
+    for map_file in map_list:
+        map_list_file.append(get_map(map_file))
+    return map_list_file
+
+def get_map_list_name_file():
+    map_list = get_map_list()
+    map_list_name_file = []
+    for map_file in map_list:
+        map_list_name_file.append(get_map_name(map_file))
+        map_list_name_file.append(get_map(map_file))
+    return map_list_name_file
+
+def get_map_list_name_file_dict():
+    map_list = get_map_list()
+    map_list_name_file_dict = {}
+    for map_file in map_list:
+        map_list_name_file_dict[get_map_name(map_file)] = get_map(map_file)
+    return map_list_name_file_dict
+
+
+
+def get_one_random_map():
+    map_list = get_map_list()
+    map_file = random.choice(map_list)
+    return get_map(map_file)
+
+
+def get_raceline(map_folder):
+    map_name = get_map_name(map_folder)
+    raceline_file = map_folder + "/"+ map_name + "_raceline.csv"
+    raceline = np.genfromtxt(raceline_file, delimiter=wpt_delim, skip_header=wpt_rowskip)
+    return raceline
+
+def get_x_y_theta_from_raceline(raceline):
+    x = raceline[:, wpt_xind]
+    y = raceline[:, wpt_yind]
+    theta = raceline[:, wpt_thind]
+    return x, y, theta
+
+def get_start_position(map_folder):
+    raceline = get_raceline(map_folder)
+    x, y, theta = get_x_y_theta_from_raceline(raceline)
+    start_pose = [x[0], y[0], theta[0]]
+    return start_pose
+
+
diff --git a/model-gym-ros-env/car_node/car_node/setup.py b/model-gym-ros-env/car_node/car_node/setup.py
new file mode 100644
index 00000000..22d56a61
--- /dev/null
+++ b/model-gym-ros-env/car_node/car_node/setup.py
@@ -0,0 +1,20 @@
+from setuptools import setup
+
+setup(name='f110_gym',
+      version='0.2.1',
+      author='Hongrui Zheng',
+      author_email='billyzheng.bz@gmail.com',
+      url='https://f1tenth.org',
+      package_dir={'': 'gym'},
+      install_requires=['gym==0.19.0',
+                        'numpy==1.23.0',
+                        'Pillow>=9.0.1',
+                        'scipy>=1.7.3',
+                        'numba>=0.55.2',
+                        'pyyaml>=5.3.1',
+                        'pyglet==1.5.28',
+                        'pyopengl',
+                        # our deps
+                        'stable-baselines3',
+                        'shimmy>=0.2.1']
+      )
diff --git a/model-gym-ros-env/car_node/car_node/wrapper.py b/model-gym-ros-env/car_node/car_node/wrapper.py
new file mode 100644
index 00000000..befef1c7
--- /dev/null
+++ b/model-gym-ros-env/car_node/car_node/wrapper.py
@@ -0,0 +1,265 @@
+import gym
+import numpy as np
+#from gym import spaces
+from gym import spaces
+from pathlib import Path
+from argparse import Namespace
+from pyglet.gl import GL_POINTS
+import car_node.map_utility as map_utility
+from typing import Any, Dict, List, Optional, SupportsFloat, Tuple, Union
+
+def convert_range(value, input_range, output_range):
+    # converts value(s) from range to another range
+    # ranges ---> [min, max]
+    (in_min, in_max), (out_min, out_max) = input_range, output_range
+    in_range = in_max - in_min
+    out_range = out_max - out_min
+    return (((value - in_min) * out_range) / in_range) + out_min
+
+
+class F110_Wrapped(gym.Wrapper):
+    """
+    This is a wrapper for the F1Tenth Gym environment intended
+    for only one car, but should be expanded to handle multi-agent scenarios
+    """
+
+    def __init__(self, env, random_map=False):
+        super().__init__(env)
+
+        # normalised action space, steer and speed
+        self.action_space = spaces.Box(low=np.array([-1.0, -1.0]), high=np.array([1.0, 1.0]), dtype=np.float)
+
+        # normalised observations, just take the lidar scans
+        self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(1080,), dtype=np.float)
+
+        # store allowed steering/speed/lidar ranges for normalisation
+        self.s_min = self.env.params['s_min']
+        self.s_max = self.env.params['s_max']
+        self.v_min = self.env.params['v_min']
+        self.v_max = self.env.params['v_max']
+        self.lidar_min = 0
+        self.lidar_max = 30  # see ScanSimulator2D max_range
+
+        # store car dimensions and some track info
+        self.car_length = self.env.params['length']
+        self.car_width = self.env.params['width']
+        self.track_width = 3.2  # ~= track width, see random_trackgen.py
+
+        # radius of circle where car can start on track, relative to a centerpoint
+        self.start_radius = (self.track_width / 2) - ((self.car_length + self.car_width) / 2)  # just extra wiggle room
+
+        self.step_count = 0
+
+        # set threshold for maximum angle of car, to prevent spinning
+        self.max_theta = 100
+        self.count = 0
+
+        self.map_path = None
+        self.random_map = random_map
+
+        self.race_line_color = [255, 0, 0]
+        self.race_line_x = []
+        self.race_line_y = []
+        self.race_line_theta = []
+
+        self.episode_returns = []
+
+        self.is_rendering = False
+
+        self.last_position = {'x': None, 'y': None}
+
+
+    def get_total_steps(self) -> int:
+        return self.step_count
+    
+    def get_episode_rewards(self) -> List[float]:
+        """
+        Returns the rewards of all the episodes
+
+        :return:
+        """
+        return self.episode_returns
+
+
+  
+
+
+    def set_raceliens(self):
+        if self.map_path is not None:
+            raceline = map_utility.get_raceline(self.map_path)
+            self.race_line_x, self.race_line_y, self.race_line_theta = map_utility.get_x_y_theta_from_raceline(raceline)
+            self.race_line_x = self.race_line_x.tolist()
+            self.race_line_y = self.race_line_y.tolist()
+            self.race_line_theta = self.race_line_theta.tolist()
+
+    def set_map_path(self, map_path):
+        self.map_path = map_path
+        self.set_raceliens()
+
+
+    def start_position(self):
+        if self.map_path is not None:
+            x, y, t = map_utility.get_start_position(self.map_path)
+            self.set_raceliens()
+            return x, y, t
+        else:
+            raise Exception("Map path not set")
+
+
+
+    def step(self, action):
+
+        #add noise to action
+        #action = action + np.random.normal(0, 0.1, 2)
+
+        action_convert = self.un_normalise_actions(action)
+        observation, _, done, info = self.env.step(np.array([action_convert]))
+
+        self.step_count += 1
+
+
+
+        next_x = self.race_line_x[self.count]
+        next_y = self.race_line_y[self.count]
+
+        if self.env.renderer:
+            if done:
+                self.race_line_color = [np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)]
+            self.env.renderer.batch.add(1, GL_POINTS, None, ('v3f/stream', [next_x * 50, next_y * 50, 0.]),
+                                        ('c3B/stream', self.race_line_color))
+
+        reward = 0
+ 
+            
+
+        
+        if self.count < len(self.race_line_x) - 1:
+            X, Y = observation['poses_x'][0], observation['poses_y'][0]
+        
+            dist = np.sqrt(np.power((X - next_x), 2) + np.power((Y - next_y), 2))
+            if dist < 2:
+                self.count = self.count + 1
+                reward += 0.01
+                pass
+
+            if dist < 2.5:
+                complete = 1#(self.count/len(self.race_line_x)) * 0.5
+                reward += complete
+        else:
+            print("---- Lap Done ---->", self.map_path)
+            self.count = 0
+            reward += 10
+
+
+            # Check if the car has moved a significant distance from the last position
+        distance_from_last_position = np.sqrt(
+            np.power((observation["poses_x"][0] - self.last_position['x']), 2)
+            + 
+            np.power((observation["poses_y"][0] - self.last_position['y']), 2)
+                                            )
+        
+        # if distance_from_last_position > 0.0005:
+        #     reward += 0.8
+        # else:
+        #     reward = 0
+        # # Update the last position
+        reward += distance_from_last_position
+
+        self.last_position['x'] = observation['poses_x'][0]
+        self.last_position['y'] = observation['poses_y'][0]
+
+
+
+
+        if observation['collisions'][0]:
+            done = True
+            self.count = 0
+            reward = 0
+
+        if abs(observation['poses_theta'][0]) > self.max_theta:
+            done = True
+            self.count = 0
+            reward = 0
+                    #if the car go out of the track the episode is done
+        if len(self.episode_returns) > 50_000:
+            print(reward)
+            self.episode_returns = []
+            done = True
+            self.count = 0
+            reward = 0
+            print("Episod Done - Too slow")
+
+
+
+
+
+        self.episode_returns.append(reward)
+
+        
+
+
+        return self.normalise_observations(observation['scans'][0]), reward, bool(done), info
+
+
+    def reset(self):
+        if self.random_map:
+            path = map_utility.get_one_random_map()
+            map_path = map_utility.get_formatted_map(path)
+            map_ext = map_utility.map_ext
+            self.update_map(map_path, map_ext, update_render=True)
+            self.set_map_path(path)
+
+        # Select a random point from the race line
+        race_line_x = self.race_line_x
+        race_line_y = self.race_line_y
+        race_line_theta = self.race_line_theta
+        random_index = np.random.randint(len(race_line_x))
+        x = race_line_x[random_index]
+        y = race_line_y[random_index]
+        t = race_line_theta[random_index]
+
+
+        # Update the race line to start from the selected point
+        race_line_x = race_line_x[random_index:] + race_line_x[:random_index]
+        race_line_y = race_line_y[random_index:] + race_line_y[:random_index]
+        race_line_theta = race_line_theta[random_index:] + race_line_theta[:random_index]
+        
+
+        self.race_line_x = race_line_x
+        self.race_line_y = race_line_y
+        self.race_line_theta = race_line_theta
+
+        # else:
+        #     x, y, t = self.start_position()
+
+        self.episode_returns = []
+
+        self.last_position = {'x': x, 'y': y}
+
+
+        observation, _, _, _ = self.env.reset(np.array([[x, y, t]]))
+        return self.normalise_observations(observation['scans'][0])
+
+    def update_map(self, map_name, map_extension, update_render=False):
+        self.env.map_name = map_name
+        self.env.map_ext = map_extension
+        self.env.update_map(f"{map_name}.yaml", map_extension)
+        # if update_render and self.env.renderer:
+        #     self.env.renderer.close()
+        #     self.env.renderer = None
+
+    def seed(self, seed):
+        self.current_seed = seed
+        np.random.seed(self.current_seed)
+        print(f"Seed -> {self.current_seed}")
+
+    def un_normalise_actions(self, actions):
+        # convert actions from range [-1, 1] to normal steering/speed range
+        steer = convert_range(actions[0], [-1, 1], [self.s_min, self.s_max])
+        speed = convert_range(actions[1], [-1, 1], [self.v_min, self.v_max])
+        return np.array([steer, speed], dtype=np.float)
+
+    def normalise_observations(self, observations):
+        # convert observations from normal lidar distances range to range [-1, 1]
+        return convert_range(observations, [self.lidar_min, self.lidar_max], [-1, 1])
+    
diff --git a/model-gym-ros-env/car_node/setup.py b/model-gym-ros-env/car_node/setup.py
new file mode 100644
index 00000000..ea8b8ee9
--- /dev/null
+++ b/model-gym-ros-env/car_node/setup.py
@@ -0,0 +1,26 @@
+from setuptools import setup
+
+package_name = 'car_node'
+
+setup(
+    name=package_name,
+    version='0.0.0',
+    packages=[package_name],
+    data_files=[
+        ('share/ament_index/resource_index/packages',
+            ['resource/' + package_name]),
+        ('share/' + package_name, ['package.xml']),
+    ],
+    install_requires=['setuptools'],
+    zip_safe=True,
+    maintainer='root',
+    maintainer_email='manuel.andruccioli@studio.unibo.it',
+    description='TODO: Package description',
+    license='TODO: License declaration',
+    tests_require=['pytest'],
+    entry_points={
+        'console_scripts': [
+            'car = car_node.main:main'
+        ],
+    },
+)
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 4d002cdc..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-absl-py==2.0.0
-cachetools==5.3.2
-certifi==2023.11.17
-charset-normalizer==3.3.2
-cloudpickle==1.6.0
-contourpy==1.1.1
-cycler==0.12.1
--e git+ssh://git@github.com/zucchero-sintattico/svs-f1tenth_gym.git@a4cd61809932d839f8d412682d61252a8a243c27#egg=f110_gym
-Farama-Notifications==0.0.4
-filelock==3.13.1
-fonttools==4.46.0
-fsspec==2023.12.2
-google-auth==2.25.2
-google-auth-oauthlib==1.0.0
-grpcio==1.60.0
-gym==0.19.0
-gymnasium==0.29.1
-idna==3.6
-importlib-metadata==7.0.0
-importlib-resources==6.1.1
-Jinja2==3.1.2
-kiwisolver==1.4.5
-llvmlite==0.41.1
-Markdown==3.5.1
-MarkupSafe==2.1.3
-matplotlib==3.7.4
-mpmath==1.3.0
-networkx==3.1
-numba==0.58.1
-numpy==1.22.0
-oauthlib==3.2.2
-packaging==23.2
-pandas==2.0.3
-Pillow==10.1.0
-protobuf==4.25.1
-pyasn1==0.5.1
-pyasn1-modules==0.3.0
-pyglet==1.5.28
-PyOpenGL==3.1.7
-pyparsing==3.1.1
-python-dateutil==2.8.2
-pytz==2023.3.post1
-PyYAML==6.0.1
-requests==2.31.0
-requests-oauthlib==1.3.1
-rsa==4.9
-scipy==1.10.1
-six==1.16.0
-sympy==1.12
-tensorboard==2.14.0
-tensorboard-data-server==0.7.2
-torch==2.1.1
-torch-tb-profiler==0.4.3
-typing_extensions==4.9.0
-tzdata==2023.3
-urllib3==2.1.0
-Werkzeug==3.0.1
-zipp==3.17.0
diff --git a/setup.py b/setup.py
index 4b411f93..872758d7 100644
--- a/setup.py
+++ b/setup.py
@@ -13,5 +13,10 @@
                         'numba>=0.55.2',
                         'pyyaml>=5.3.1',
                         'pyglet==1.5.28',
-                        'pyopengl']
+                        'pyopengl',
+                        # our deps
+                        'stable-baselines3==2.2.1',
+                        'stable-baselines3[extra]',
+                        'Shimmy==1.3.0',
+                        'tensorboard==2.14.0']
       )
diff --git a/src/full_train.py b/src/full_train.py
deleted file mode 100644
index f7f4cb5d..00000000
--- a/src/full_train.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import gym
-from stable_baselines3 import PPO
-from wrapper.wrapper import F110_Wrapped
-from argparse import Namespace
-from f110_gym.envs.base_classes import Integrator
-import utility.map_utility as map_utility
-from random_train import RandomTrain
-from train_on_one_map import TrainOnOneMap
-
-RandomTrain().run()
-
-TrainOnOneMap("YasMarina").run()
-
-
-timestep = 0.01
-
-path = map_utility.get_map("YasMarina")
-map_path = map_utility.get_formatted_map(path)
-map_ext = map_utility.map_ext
-
-
-eval_env  = gym.make('f110_gym:f110-v0', map=map_path, map_ext=map_ext, num_agents=1, timestep=timestep, integrator=Integrator.RK4)
-eval_env = F110_Wrapped(eval_env, random_map=False)
-eval_env.set_map_path(path)
-
-
-model = PPO.load("./train_test/best_model", eval_env,  device='cpu')
-
-episode = 0
-while episode < 1000:
-    episode += 1
-    obs = eval_env.reset()
-    done = False
-    while not done:
-        action, _ = model.predict(obs)
-        obs, reward, done, _ = eval_env.step(action)
-        if done:
-            print("Lap done", episode)
-        eval_env.render(mode='human')
diff --git a/src/random_train.py b/src/random_train.py
deleted file mode 100644
index bbb31390..00000000
--- a/src/random_train.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import gym
-from stable_baselines3 import PPO
-from wrapper.wrapper import F110_Wrapped
-from f110_gym.envs.base_classes import Integrator
-import numpy as np
-from stable_baselines3.common.callbacks import EvalCallback
-import utility.map_utility as map_utility
-from utility.linear_schedule import linear_schedule
-
-class RandomTrain:
-    def __init__(self):
-        self.timestep = 0.01
-        self.total_timesteps = 1_000
-        self.learning_rate = linear_schedule(0.0003)
-        self.gamma = 0.99
-        self.gae_lambda = 0.95
-        self.verbose = 1
-        self.device = 'cpu'
-
-
-        self.eval_env = gym.make('f110_gym:f110-v0', num_agents=1, timestep=self.timestep, integrator=Integrator.RK4)
-        self.eval_env = F110_Wrapped(self.eval_env, random_map=True)
-        self.eval_env.seed(np.random.randint(pow(2, 31) - 1))
-
-    def run(self):
-
-
-        try:
-            model = PPO.load("./train_test/best_model", self.eval_env, device='cpu')
-        except:
-            model = PPO("MlpPolicy", self.eval_env, learning_rate=self.learning_rate, gamma=self.gamma, gae_lambda=self.gae_lambda, verbose=self.verbose, device=self.device)
-
-        eval_callback = EvalCallback(self.eval_env, best_model_save_path='./train_test/',
-                                     log_path='./train_test/', eval_freq=1000,
-                                     deterministic=True, render=False)
-
-        model.learn(total_timesteps=self.total_timesteps, callback=eval_callback)
-
-if __name__ == "__main__":
-    random_train = RandomTrain()
-    random_train.run()
diff --git a/src/train.py b/src/train.py
new file mode 100644
index 00000000..86428216
--- /dev/null
+++ b/src/train.py
@@ -0,0 +1,134 @@
+import gym
+from stable_baselines3 import PPO, A2C
+from wrapper.wrapper import F110_Wrapped
+import yaml
+from argparse import Namespace
+from f110_gym.envs.base_classes import Integrator
+import numpy as np
+import os
+from stable_baselines3.common.env_util import make_vec_env
+from stable_baselines3.common.vec_env import SubprocVecEnv
+from stable_baselines3.common.callbacks import BaseCallback
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3.common.env_checker import check_env
+from utility.map_utility import get_map, get_formatted_map, map_ext
+from utility.linear_schedule import linear_schedule
+from stable_baselines3.common.callbacks import EvalCallback
+from stable_baselines3.common.monitor import Monitor
+from utility.SaveOnBestTrainingRewardCallback import SaveTheBestAndRestart
+timestep = 0.01
+tensorboard_path = './train_test/'
+
+
+map = "Zandvoort"
+path = get_map(map)
+map_path = get_formatted_map(path)
+
+eval_env  = gym.make('f110_gym:f110-v0', map=map_path, map_ext=map_ext, num_agents=1, timestep=timestep, integrator=Integrator.RK4)
+
+eval_env = F110_Wrapped(eval_env, random_map=True)
+
+eval_env.seed(1773449316)
+
+skip_training = False
+
+max_timesteps = 200_000
+min_timesteps = 50_000
+
+max_learning_rate = 0.0005
+min_learning_rate = 0.00005
+num_of_steps = 6
+
+device = 'cpu'
+
+timesteps_list = np.logspace(np.log10(min_timesteps), np.log10(max_timesteps), num=num_of_steps, endpoint=True, base=10.0, dtype=int, axis=0)
+learning_rate_list = np.logspace(np.log10(max_learning_rate), np.log10(min_learning_rate), num=num_of_steps, endpoint=True, base=10.0, dtype=None, axis=0)
+
+print("timestemp" , timesteps_list)
+print("leaning rate", learning_rate_list)
+
+if not skip_training:
+
+    for timesteps, learning_rate in zip(timesteps_list, learning_rate_list):
+            #if best_model exists, load it
+        # if os.path.exists("./train_test/best_model.zip"):
+        #     print("Loading Existing Model")
+        #     model = PPO.load("./train_test/best_model", eval_env, learning_rate=learning_rate)
+        if os.path.exists("./train_test/best_global_model.zip"):
+            print("Loading Existing Model")
+            model = PPO.load("./train_test/best_global_model", eval_env, learning_rate=linear_schedule(learning_rate), device=device)
+        else:
+            model = PPO("MlpPolicy",
+                        eval_env,
+                        gamma=0.99,
+                        learning_rate=linear_schedule(learning_rate),
+                        gae_lambda=0.95,
+                        verbose=0, 
+                        tensorboard_log=tensorboard_path,
+                        device=device
+                        )
+
+
+        eval_callback = EvalCallback(eval_env, best_model_save_path='./train_test/',
+                                    log_path='./train_test/', eval_freq= int(timesteps/20),
+                                    deterministic=True, render=False)
+        
+
+
+        model.learn(total_timesteps=timesteps, progress_bar=True, callback=eval_callback)
+
+        del model 
+
+        model = PPO.load("./train_test/best_model", eval_env, device=device)
+
+        mean_reward, _ = evaluate_policy(model, model.get_env(), n_eval_episodes=20)
+
+
+        #save in file the mean reward, if the file does not exist, create it
+        if not os.path.exists("./train_test/mean_reward.txt"):
+            with open("./train_test/mean_reward.txt", "w") as f:
+                f.write(f"{mean_reward}")
+                model.save("./train_test/best_global_model")
+        else:
+            #overwrite the file with the new mean reward if it is better
+            with open("./train_test/mean_reward.txt", "r") as f:
+                best_mean_reward = float(f.read())
+            if mean_reward > best_mean_reward:
+                with open("./train_test/mean_reward.txt", "w") as f:
+                    f.write(f"{mean_reward}")
+                model.save("./train_test/best_global_model")
+                print("Saved Best Model")
+
+        del model 
+
+
+
+    
+
+
+eval_env  = gym.make('f110_gym:f110-v0', map=map_path, map_ext=map_ext, num_agents=1, timestep=timestep, integrator=Integrator.RK4)
+eval_env = F110_Wrapped(eval_env, random_map=False)
+eval_env.set_map_path(path)
+
+model = PPO.load("./train_test/best_global_model", eval_env, device=device)
+
+mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=1)
+
+print(mean_reward)
+
+# Enjoy trained agent
+vec_env = model.get_env()
+obs = vec_env.reset()
+
+episode = 0
+while episode < 100:
+
+    episode += 1
+    obs = vec_env.reset()
+    done = False
+    while not done:
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, dones, info = vec_env.step(action)
+        if done:
+            print("-->", episode)
+        eval_env.render(mode='human')
diff --git a/src/train_on_one_map.py b/src/train_on_one_map.py
deleted file mode 100644
index f4a5c2f3..00000000
--- a/src/train_on_one_map.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import gym
-from stable_baselines3 import PPO
-from wrapper.wrapper import F110_Wrapped
-from f110_gym.envs.base_classes import Integrator
-import numpy as np
-from stable_baselines3.common.callbacks import EvalCallback
-import utility.map_utility as map_utility
-from utility.linear_schedule import linear_schedule
-
-class TrainOnOneMap:
-    def __init__(self, map_name="Monza"):
-        self.timestep = 0.01
-        self.total_timesteps = 1_000
-        self.learning_rate = linear_schedule(0.0003)
-        self.gamma = 0.99
-        self.gae_lambda = 0.95
-        self.verbose = 1
-        self.device = 'cpu'
-        self.eval_env = None
-        
-        path = map_utility.get_map(map_name)
-        map_path = map_utility.get_formatted_map(path)
-        map_ext = map_utility.map_ext
-
-        self.eval_env = gym.make('f110_gym:f110-v0', map=map_path, map_ext=map_ext, num_agents=1, timestep=self.timestep, integrator=Integrator.RK4)
-        # Wrap basic gym with RL functions
-        self.eval_env = F110_Wrapped(self.eval_env, random_map=False)
-        self.eval_env.set_map_path(path)
-        self.eval_env.seed(np.random.randint(pow(2, 31) - 1))
-
-        
-
-    def run(self):
-
-        try:
-            model = PPO.load("./train_test/best_model", self.eval_env, device='cpu')
-        except:
-            model = PPO("MlpPolicy", self.eval_env, learning_rate=self.learning_rate, gamma=self.gamma, gae_lambda=self.gae_lambda, verbose=self.verbose, device=self.device)
-
-        eval_callback = EvalCallback(self.eval_env, best_model_save_path='./train_test/', log_path='./train_test/', eval_freq=1000, deterministic=True, render=False)
-
-        model.learn(total_timesteps=self.total_timesteps, callback=eval_callback)
-
-if __name__ == "__main__":
-    train_on_one_map = TrainOnOneMap()
-    train_on_one_map.run()
diff --git a/src/utility/SaveOnBestTrainingRewardCallback.py b/src/utility/SaveOnBestTrainingRewardCallback.py
new file mode 100644
index 00000000..97fdf330
--- /dev/null
+++ b/src/utility/SaveOnBestTrainingRewardCallback.py
@@ -0,0 +1,400 @@
+import os
+import warnings
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
+
+import gymnasium as gym
+import numpy as np
+
+from stable_baselines3.common.logger import Logger
+
+try:
+    from tqdm import TqdmExperimentalWarning
+
+    # Remove experimental warning
+    warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
+    from tqdm.rich import tqdm
+except ImportError:
+    # Rich not installed, we only throw an error
+    # if the progress bar is used
+    tqdm = None
+
+
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3.common.vec_env import DummyVecEnv, VecEnv, sync_envs_normalization
+
+if TYPE_CHECKING:
+    from stable_baselines3.common import base_class
+
+
+class BaseCallback(ABC):
+    """
+    Base class for callback.
+
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
+    """
+
+    # The RL model
+    # Type hint as string to avoid circular import
+    model: "base_class.BaseAlgorithm"
+
+    def __init__(self, verbose: int = 0):
+        super().__init__()
+        # Number of time the callback was called
+        self.n_calls = 0  # type: int
+        # n_envs * n times env.step() was called
+        self.num_timesteps = 0  # type: int
+        self.verbose = verbose
+        self.locals: Dict[str, Any] = {}
+        self.globals: Dict[str, Any] = {}
+        # Sometimes, for event callback, it is useful
+        # to have access to the parent object
+        self.parent = None  # type: Optional[BaseCallback]
+
+    @property
+    def training_env(self) -> VecEnv:
+        training_env = self.model.get_env()
+        assert (
+            training_env is not None
+        ), "`model.get_env()` returned None, you must initialize the model with an environment to use callbacks"
+        return training_env
+
+    @property
+    def logger(self) -> Logger:
+        return self.model.logger
+
+    # Type hint as string to avoid circular import
+    def init_callback(self, model: "base_class.BaseAlgorithm") -> None:
+        """
+        Initialize the callback by saving references to the
+        RL model and the training environment for convenience.
+        """
+        self.model = model
+        self._init_callback()
+
+    def _init_callback(self) -> None:
+        pass
+
+    def on_training_start(self, locals_: Dict[str, Any], globals_: Dict[str, Any]) -> None:
+        # Those are reference and will be updated automatically
+        self.locals = locals_
+        self.globals = globals_
+        # Update num_timesteps in case training was done before
+        self.num_timesteps = self.model.num_timesteps
+        self._on_training_start()
+
+    def _on_training_start(self) -> None:
+        pass
+
+    def on_rollout_start(self) -> None:
+        self._on_rollout_start()
+
+    def _on_rollout_start(self) -> None:
+        pass
+
+    @abstractmethod
+    def _on_step(self) -> bool:
+        """
+        :return: If the callback returns False, training is aborted early.
+        """
+        return True
+
+    def on_step(self) -> bool:
+        """
+        This method will be called by the model after each call to ``env.step()``.
+
+        For child callback (of an ``EventCallback``), this will be called
+        when the event is triggered.
+
+        :return: If the callback returns False, training is aborted early.
+        """
+        self.n_calls += 1
+        self.num_timesteps = self.model.num_timesteps
+
+        return self._on_step()
+
+    def on_training_end(self) -> None:
+        self._on_training_end()
+
+    def _on_training_end(self) -> None:
+        pass
+
+    def on_rollout_end(self) -> None:
+        self._on_rollout_end()
+
+    def _on_rollout_end(self) -> None:
+        pass
+
+    def update_locals(self, locals_: Dict[str, Any]) -> None:
+        """
+        Update the references to the local variables.
+
+        :param locals_: the local variables during rollout collection
+        """
+        self.locals.update(locals_)
+        self.update_child_locals(locals_)
+
+    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
+        """
+        Update the references to the local variables on sub callbacks.
+
+        :param locals_: the local variables during rollout collection
+        """
+        pass
+
+
+class EventCallback(BaseCallback):
+    """
+    Base class for triggering callback on event.
+
+    :param callback: Callback that will be called
+        when an event is triggered.
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
+    """
+
+    def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0):
+        super().__init__(verbose=verbose)
+        self.callback = callback
+        # Give access to the parent
+        if callback is not None:
+            assert self.callback is not None
+            self.callback.parent = self
+
+    def init_callback(self, model: "base_class.BaseAlgorithm") -> None:
+        super().init_callback(model)
+        if self.callback is not None:
+            self.callback.init_callback(self.model)
+
+    def _on_training_start(self) -> None:
+        if self.callback is not None:
+            self.callback.on_training_start(self.locals, self.globals)
+
+    def _on_event(self) -> bool:
+        if self.callback is not None:
+            return self.callback.on_step()
+        return True
+
+    def _on_step(self) -> bool:
+        return True
+
+    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
+        """
+        Update the references to the local variables.
+
+        :param locals_: the local variables during rollout collection
+        """
+        if self.callback is not None:
+            self.callback.update_locals(locals_)
+
+
+
+
+
+
+
+class SaveTheBestAndRestart(EventCallback):
+    """
+    Callback for evaluating an agent.
+
+    .. warning::
+
+      When using multiple environments, each call to  ``env.step()``
+      will effectively correspond to ``n_envs`` steps.
+      To account for that, you can use ``eval_freq = max(eval_freq // n_envs, 1)``
+
+    :param eval_env: The environment used for initialization
+    :param callback_on_new_best: Callback to trigger
+        when there is a new best model according to the ``mean_reward``
+    :param callback_after_eval: Callback to trigger after every evaluation
+    :param n_eval_episodes: The number of episodes to test the agent
+    :param eval_freq: Evaluate the agent every ``eval_freq`` call of the callback.
+    :param log_path: Path to a folder where the evaluations (``evaluations.npz``)
+        will be saved. It will be updated at each evaluation.
+    :param best_model_save_path: Path to a folder where the best model
+        according to performance on the eval env will be saved.
+    :param deterministic: Whether the evaluation should
+        use a stochastic or deterministic actions.
+    :param render: Whether to render or not the environment during evaluation
+    :param verbose: Verbosity level: 0 for no output, 1 for indicating information about evaluation results
+    :param warn: Passed to ``evaluate_policy`` (warns if ``eval_env`` has not been
+        wrapped with a Monitor wrapper)
+    """
+
+    def __init__(
+        self,
+        eval_env: Union[gym.Env, VecEnv],
+        callback_on_new_best: Optional[BaseCallback] = None,
+        callback_after_eval: Optional[BaseCallback] = None,
+        n_eval_episodes: int = 5,
+        eval_freq: int = 10000,
+        log_path: Optional[str] = None,
+        best_model_save_path: Optional[str] = None,
+        deterministic: bool = True,
+        render: bool = False,
+        verbose: int = 1,
+        warn: bool = True,
+    ):
+        super().__init__(callback_after_eval, verbose=verbose)
+
+        self.callback_on_new_best = callback_on_new_best
+        if self.callback_on_new_best is not None:
+            # Give access to the parent
+            self.callback_on_new_best.parent = self
+
+        self.n_eval_episodes = n_eval_episodes
+        self.eval_freq = eval_freq
+        self.best_mean_reward = -np.inf
+        self.last_mean_reward = -np.inf
+        self.deterministic = deterministic
+        self.render = render
+        self.warn = warn
+
+        # Convert to VecEnv for consistency
+        if not isinstance(eval_env, VecEnv):
+            eval_env = DummyVecEnv([lambda: eval_env])  # type: ignore[list-item, return-value]
+
+        self.eval_env = eval_env
+        self.best_model_save_path = best_model_save_path
+        # Logs will be written in ``evaluations.npz``
+        if log_path is not None:
+            log_path = os.path.join(log_path, "evaluations")
+        self.log_path = log_path
+        self.evaluations_results: List[List[float]] = []
+        self.evaluations_timesteps: List[int] = []
+        self.evaluations_length: List[List[int]] = []
+        # For computing success rate
+        self._is_success_buffer: List[bool] = []
+        self.evaluations_successes: List[List[bool]] = []
+
+    def _init_callback(self) -> None:
+        # Does not work in some corner cases, where the wrapper is not the same
+        if not isinstance(self.training_env, type(self.eval_env)):
+            warnings.warn("Training and eval env are not of the same type" f"{self.training_env} != {self.eval_env}")
+
+        # Create folders if needed
+        if self.best_model_save_path is not None:
+            os.makedirs(self.best_model_save_path, exist_ok=True)
+        if self.log_path is not None:
+            os.makedirs(os.path.dirname(self.log_path), exist_ok=True)
+
+        # Init callback called on new best model
+        if self.callback_on_new_best is not None:
+            self.callback_on_new_best.init_callback(self.model)
+
+    def _log_success_callback(self, locals_: Dict[str, Any], globals_: Dict[str, Any]) -> None:
+        """
+        Callback passed to the  ``evaluate_policy`` function
+        in order to log the success rate (when applicable),
+        for instance when using HER.
+
+        :param locals_:
+        :param globals_:
+        """
+        info = locals_["info"]
+
+        if locals_["done"]:
+            maybe_is_success = info.get("is_success")
+            if maybe_is_success is not None:
+                self._is_success_buffer.append(maybe_is_success)
+
+    def _on_step(self) -> bool:
+        continue_training = True
+
+        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
+            # Sync training and eval env if there is VecNormalize
+            if self.model.get_vec_normalize_env() is not None:
+                try:
+                    sync_envs_normalization(self.training_env, self.eval_env)
+                except AttributeError as e:
+                    raise AssertionError(
+                        "Training and eval env are not wrapped the same way, "
+                        "see https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html#evalcallback "
+                        "and warning above."
+                    ) from e
+
+            # Reset success rate buffer
+            self._is_success_buffer = []
+
+            episode_rewards, episode_lengths = evaluate_policy(
+                self.model,
+                self.eval_env,
+                n_eval_episodes=self.n_eval_episodes,
+                render=self.render,
+                deterministic=self.deterministic,
+                return_episode_rewards=True,
+                warn=self.warn,
+                callback=self._log_success_callback,
+            )
+
+            if self.log_path is not None:
+                assert isinstance(episode_rewards, list)
+                assert isinstance(episode_lengths, list)
+                self.evaluations_timesteps.append(self.num_timesteps)
+                self.evaluations_results.append(episode_rewards)
+                self.evaluations_length.append(episode_lengths)
+
+                kwargs = {}
+                # Save success log if present
+                if len(self._is_success_buffer) > 0:
+                    self.evaluations_successes.append(self._is_success_buffer)
+                    kwargs = dict(successes=self.evaluations_successes)
+
+                np.savez(
+                    self.log_path,
+                    timesteps=self.evaluations_timesteps,
+                    results=self.evaluations_results,
+                    ep_lengths=self.evaluations_length,
+                    **kwargs,
+                )
+
+            mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
+            mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths)
+            self.last_mean_reward = float(mean_reward)
+
+            if self.verbose >= 1:
+                print(f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}")
+                print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}")
+            # Add to current Logger
+            self.logger.record("eval/mean_reward", float(mean_reward))
+            self.logger.record("eval/mean_ep_length", mean_ep_length)
+
+            if len(self._is_success_buffer) > 0:
+                success_rate = np.mean(self._is_success_buffer)
+                if self.verbose >= 1:
+                    print(f"Success rate: {100 * success_rate:.2f}%")
+                self.logger.record("eval/success_rate", success_rate)
+
+            # Dump log so the evaluation results are printed with the correct timestep
+            self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
+            self.logger.dump(self.num_timesteps)
+
+            if mean_reward > self.best_mean_reward:
+                if self.verbose >= 1:
+                    print("New best mean reward!")
+                if self.best_model_save_path is not None:
+                    self.model.save(os.path.join(self.best_model_save_path, "best_model"))
+                self.best_mean_reward = float(mean_reward)
+                # Trigger callback on new best model, if needed
+                if self.callback_on_new_best is not None:
+                    continue_training = self.callback_on_new_best.on_step()
+
+            # Trigger callback after every evaluation, if needed
+            if self.callback is not None:
+                continue_training = continue_training and self._on_event()
+
+        return continue_training
+
+    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
+        """
+        Update the references to the local variables.
+
+        :param locals_: the local variables during rollout collection
+        """
+        if self.callback:
+            self.callback.update_locals(locals_)
+
+
+
+
+
+
diff --git a/src/wrapper/wrapper.py b/src/wrapper/wrapper.py
index 180766f9..ff2b08ae 100644
--- a/src/wrapper/wrapper.py
+++ b/src/wrapper/wrapper.py
@@ -1,20 +1,13 @@
 import gym
 import numpy as np
+#from gym import spaces
 from gym import spaces
 from pathlib import Path
 import yaml
 from argparse import Namespace
 from pyglet.gl import GL_POINTS
 import utility.map_utility as map_utility
-
-
-
-def get_distance_from_closest_point(x_list, w_list, x, y, index):
-    closest_x = x_list[index]
-    closest_y = w_list[index]
-    distance = np.sqrt(np.power(x - closest_x, 2) + np.power(y - closest_y, 2))
-    return distance
-
+from typing import Any, Dict, List, Optional, SupportsFloat, Tuple, Union
 
 def convert_range(value, input_range, output_range):
     # converts value(s) from range to another range
@@ -68,13 +61,37 @@ def __init__(self, env, random_map=False):
         self.race_line_color = [255, 0, 0]
         self.race_line_x = []
         self.race_line_y = []
+        self.race_line_theta = []
+
+        self.episode_returns = []
+
+        self.is_rendering = False
+
+        self.last_position = {'x': None, 'y': None}
+
+
+    def get_total_steps(self) -> int:
+        return self.step_count
+    
+    def get_episode_rewards(self) -> List[float]:
+        """
+        Returns the rewards of all the episodes
+
+        :return:
+        """
+        return self.episode_returns
+
+
+  
+
 
     def set_raceliens(self):
         if self.map_path is not None:
             raceline = map_utility.get_raceline(self.map_path)
-            self.race_line_x, self.race_line_y, _ = map_utility.get_x_y_theta_from_raceline(raceline)
+            self.race_line_x, self.race_line_y, self.race_line_theta = map_utility.get_x_y_theta_from_raceline(raceline)
             self.race_line_x = self.race_line_x.tolist()
             self.race_line_y = self.race_line_y.tolist()
+            self.race_line_theta = self.race_line_theta.tolist()
 
     def set_map_path(self, map_path):
         self.map_path = map_path
@@ -92,74 +109,169 @@ def start_position(self):
 
 
     def step(self, action):
-        action_convert = self.un_normalise_actions(action)
-        observation, _, done, info = self.env.step(np.array([action_convert]))
 
-        self.step_count += 1
+            #add noise to action
+            #action = action + np.random.normal(0, 0.1, 2)
+
+            action_convert = self.un_normalise_actions(action)
+            observation, _, done, info = self.env.step(np.array([action_convert]))
+
+            self.step_count += 1
+
+
+
+            next_x = self.race_line_x[self.count]
+            next_y = self.race_line_y[self.count]
 
-        next_x = self.race_line_x[self.count]
-        next_y = self.race_line_y[self.count]
+            if self.env.renderer:
+                if done:
+                    self.race_line_color = [np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)]
+                self.env.renderer.batch.add(1, GL_POINTS, None, ('v3f/stream', [next_x * 50, next_y * 50, 0.]),
+                                            ('c3B/stream', self.race_line_color))
 
-        if self.env.renderer:
-            if done:
-                self.race_line_color = [np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)]
-            self.env.renderer.batch.add(1, GL_POINTS, None, ('v3f/stream', [next_x * 50, next_y * 50, 0.]),
-                                        ('c3B/stream', self.race_line_color))
-        reward = 0
-        reward = reward + 0.9
+            reward = 0
+    
+                
 
-        if self.count < len(self.race_line_x) - 1:
-            X, Y = observation['poses_x'][0], observation['poses_y'][0]
+            
+            if self.count < len(self.race_line_x) - 1:
+                X, Y = observation['poses_x'][0], observation['poses_y'][0]
+            
+                dist = np.sqrt(np.power((X - next_x), 2) + np.power((Y - next_y), 2))
+                if dist < 1:
+                    self.count = self.count + 1
+                    reward += 0.01
 
-            dist = np.sqrt(np.power((X - next_x), 2) + np.power((Y - next_y), 2))
-            if dist < 2:
-                self.count += 1
+                if dist < 1.5:
+                    complete = 1 #(self.count/len(self.race_line_x)) * 0.5
+                    reward += complete
+            else:
+                print("---- Lap Done ---->", self.map_path)
+                self.count = 0
+                reward += 10
+                
+            reward += 0.1
 
-            if dist < 2.5:
-                complete = (self.count / len(self.race_line_x)) * 0.5
-                reward += complete
 
-        else:
-            self.count = 0
-            reward += 100
-            print("Lap Done")
+            #     # Check if the car has moved a significant distance from the last position
+            distance_from_last_position = \
+                np.power((observation["poses_x"][0] - self.last_position['x']), 2) \
+                +  \
+                np.power((observation["poses_y"][0] - self.last_position['y']), 2) \
+                                                
+            distance_from_last_position = round(distance_from_last_position, 4)
+            
+            if distance_from_last_position < 0.0005 :
+                reward = 0
+            
+            
+            
+            #    # Assicurati che gli input siano all'interno dei range specificati
+            # angolo_sterzo = max(self.s_min, min(self.s_max, action_convert[0]))
+            # accelerazione = max(self.v_min, min(self.v_max, action_convert[1]))
+
+            # # Normalizza i valori per ottenere un valore tra 0 e 1
+            # norm_angolo_sterzo = (angolo_sterzo - self.s_min) / (self.s_max - self.s_min)
+            # norm_accelerazione = (accelerazione - self.v_min) / (self.v_max - self.v_min)
+
+            # # Calcola la reward inversamente proporzionale alla differenza tra i due valori normalizzati
+            # reward += (1 - abs(norm_angolo_sterzo - norm_accelerazione))
+            
+            # if distance_from_last_position < 0.005:
+            #     reward += -0.5
+                
+            # if abs(action_convert[1]) < 1:
+            #     reward = 0
+            
+            
+            reward = round(reward, 4)
+            
+                    
+            
+
+            self.last_position['x'] = observation['poses_x'][0]
+            self.last_position['y'] = observation['poses_y'][0]
 
-        if observation['collisions'][0]:
-            self.count = 0
-            reward = -2
 
-        if abs(observation['poses_theta'][0]) > self.max_theta:
-            done = True
 
-        if self.env.lap_counts[0] > 0:
-            self.count = 0
-            reward += 1
-            if self.env.lap_counts[0] > 1:
-                reward += 1
-                self.env.lap_counts[0] = 0
 
-        return self.normalise_observations(observation['scans'][0]), reward, bool(done), info
+            if observation['collisions'][0]:
+                done = True
+                self.count = 0
+                reward = 0
+
+            # if abs(observation['poses_theta'][0]) > self.max_theta:
+            #     done = True
+            #     self.count = 0
+            #     reward = 0
+                        #if the car go out of the track the episode is done
+            if len(self.episode_returns) > 70_000:
+                print(reward)
+                self.episode_returns = []
+                done = True
+                self.count = 0
+                reward = -100000
+                print("Episod Done - Too slow")
+
+
+
+
+
+            self.episode_returns.append(reward)
+
+            
+
+
+            return self.normalise_observations(observation['scans'][0]), reward, bool(done), info
+
+
 
     def reset(self):
         if self.random_map:
             path = map_utility.get_one_random_map()
             map_path = map_utility.get_formatted_map(path)
-            print("Rand Map :", map_path)
             map_ext = map_utility.map_ext
             self.update_map(map_path, map_ext, update_render=True)
             self.set_map_path(path)
 
-        x, y, t = self.start_position()
+        # Select a random point from the race line
+        race_line_x = self.race_line_x
+        race_line_y = self.race_line_y
+        race_line_theta = self.race_line_theta
+        random_index = np.random.randint(len(race_line_x))
+        x = race_line_x[random_index]
+        y = race_line_y[random_index]
+        t = race_line_theta[random_index]
+
+
+        # Update the race line to start from the selected point
+        race_line_x = race_line_x[random_index:] + race_line_x[:random_index]
+        race_line_y = race_line_y[random_index:] + race_line_y[:random_index]
+        race_line_theta = race_line_theta[random_index:] + race_line_theta[:random_index]
+        
+
+        self.race_line_x = race_line_x
+        self.race_line_y = race_line_y
+        self.race_line_theta = race_line_theta
+
+        # else:
+        #     x, y, t = self.start_position()
+
+        self.episode_returns = []
+
+        self.last_position = {'x': x, 'y': y}
+
+
         observation, _, _, _ = self.env.reset(np.array([[x, y, t]]))
         return self.normalise_observations(observation['scans'][0])
 
-    def update_map(self, map_name, map_extension, update_render=True):
+    def update_map(self, map_name, map_extension, update_render=False):
         self.env.map_name = map_name
         self.env.map_ext = map_extension
         self.env.update_map(f"{map_name}.yaml", map_extension)
-        if update_render and self.env.renderer:
-            self.env.renderer.close()
-            self.env.renderer = None
+        # if update_render and self.env.renderer:
+        #     self.env.renderer.close()
+        #     self.env.renderer = None
 
     def seed(self, seed):
         self.current_seed = seed
@@ -175,3 +287,4 @@ def un_normalise_actions(self, actions):
     def normalise_observations(self, observations):
         # convert observations from normal lidar distances range to range [-1, 1]
         return convert_range(observations, [self.lidar_min, self.lidar_max], [-1, 1])
+    
diff --git a/train_test/best_global_model.zip b/train_test/best_global_model.zip
new file mode 100644
index 00000000..eeb7667b
Binary files /dev/null and b/train_test/best_global_model.zip differ
diff --git a/train_test/best_model.zip b/train_test/best_model.zip
index 6bf2c211..cd788b79 100644
Binary files a/train_test/best_model.zip and b/train_test/best_model.zip differ
diff --git a/train_test/evaluations.npz b/train_test/evaluations.npz
index fad3c286..42b855d8 100644
Binary files a/train_test/evaluations.npz and b/train_test/evaluations.npz differ
diff --git a/train_test/mean_reward.txt b/train_test/mean_reward.txt
new file mode 100644
index 00000000..95571131
--- /dev/null
+++ b/train_test/mean_reward.txt
@@ -0,0 +1 @@
+206.90949999999998
\ No newline at end of file