diff --git a/.deepsource.toml b/.deepsource.toml index 0ac63d33..ae408557 100644 --- a/.deepsource.toml +++ b/.deepsource.toml @@ -1,5 +1,12 @@ version = 1 +exclude_patterns = [ + "examples/**", + "gym/f110_gym/**", + "gym_env/share/**", + "docs/**" +] + [[analyzers]] name = "python" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4d5256ea..bda13323 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,28 +2,41 @@ name: CI on: push: - branches: [ main ] pull_request: branches: [ main ] jobs: - build: - + build-gym: runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ["3.8", "3.9"] - steps: - uses: actions/checkout@v2 - - - name: Set up Python ${{ matrix.python-version }} + - name: Set up Python 3.8 uses: actions/setup-python@v2 with: - python-version: ${{ matrix.python-version }} - + python-version: '3.8' - name: Install run: | python -m pip install --upgrade pip pip install -e . + + build-ros: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: '3.8' + - name: Install + run: | + python -m pip install --upgrade pip + pip install -e ./model-gym-ros-env/car_node/car_node + + build-image: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Build Docker image + run: | + docker build . diff --git a/.gitignore b/.gitignore index 1147da41..4f965255 100644 --- a/.gitignore +++ b/.gitignore @@ -202,6 +202,9 @@ runs/ PPO_*/ +train_test/PPO_*/ + + ### VirtualEnv ### # Virtualenv # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ diff --git a/Dockerfile-gym-ros b/Dockerfile-gym-ros new file mode 100644 index 00000000..67fcf1a4 --- /dev/null +++ b/Dockerfile-gym-ros @@ -0,0 +1,102 @@ +# MIT License + +# Copyright (c) 2020 Hongrui Zheng + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +FROM ros:foxy + +SHELL ["/bin/bash", "-c"] + +# dependencies +RUN apt-get update --fix-missing && \ + apt-get install -y git \ + nano \ + python3-pip \ + libeigen3-dev \ + tmux \ + ros-foxy-rviz2 +RUN apt-get -y dist-upgrade +RUN pip3 install transforms3d + +# f1tenth gym +RUN git clone https://github.com/f1tenth/f1tenth_gym +RUN cd f1tenth_gym && \ + pip3 install -e . + +# ros2 gym bridge +# RUN mkdir -p sim_ws/src/f1tenth_gym_ros +# COPY ./f1tenth_gym_ros /sim_ws/src/f1tenth_gym_ros +RUN git clone https://github.com/f1tenth/f1tenth_gym_ros.git /sim_ws/src/f1tenth_gym_ros && \ + cd /sim_ws/src/f1tenth_gym_ros && \ + git checkout 910789ad9029839abda0d7b6d66f46945fe5cef0 +RUN source /opt/ros/foxy/setup.bash && \ + cd sim_ws/ && \ + apt-get update --fix-missing && \ + rosdep install -i --from-path src --rosdistro foxy -y && \ + colcon build + +# our dependencies +RUN echo "source /opt/ros/foxy/setup.bash" >> ~/.bashrc && \ + echo "source /sim_ws/install/setup.bash" >> ~/.bashrc && \ + pip install --upgrade pip && \ + # ROS2 packages + source /opt/ros/foxy/setup.bash && \ + ros2 pkg create --destination-directory /sim_ws/src car_node --build-type ament_python --dependencies rclpy && \ + # f1tenth gym env branch v1.0.0 + rm -rf /sim_ws/src/car_node/car_node && \ + git clone --depth=1 https://github.com/f1tenth/f1tenth_gym.git /sim_ws/src/car_node/car_node && \ + cd /sim_ws/src/car_node/car_node && \ + git checkout cd56335eda43ff4e401331c461877227474a3ed4 && \ + touch /sim_ws/src/car_node/car_node/__init__.py && \ + # Put all maps maps in f1tenth gym ros + cd / && \ + git clone --single-branch --branch v1.0.0 https://github.com/f1tenth/f1tenth_racetracks.git && \ + cp f1tenth_racetracks/*/*.{png,yaml} /sim_ws/src/f1tenth_gym_ros/maps/ && \ + # Put all maps in car_node + mkdir -p /sim_ws/src/car_node/car_node/map/ && \ + mv f1tenth_racetracks /sim_ws/src/car_node/car_node/map/ + +COPY model-gym-ros-env/car_node/setup.py /sim_ws/src/car_node/setup.py +COPY model-gym-ros-env/car_node/car_node/main.py model-gym-ros-env/car_node/car_node/wrapper.py model-gym-ros-env/car_node/car_node/map_utility.py model-gym-ros-env/car_node/car_node/setup.py /sim_ws/src/car_node/car_node/ +COPY train_test /sim_ws/src/car_node/car_node/train_test + +RUN pip install -e /sim_ws/src/car_node/car_node && \ + # pip install stable_baselines3 && \ + # pip install 'shimmy>=0.2.1' && \ + source /opt/ros/foxy/setup.bash && \ + cd sim_ws/ && \ + colcon build + +# RUN pip install setuptools==65.7.0 && \ +# pip install -e /sim_ws/src/car_node/car_node && \ +# pip install stable_baselines3 && \ +# pip install 'shimmy>=0.2.1' && \ +# cd sim_ws/ && \ +# colcon build + +# pip install gym[atari,accept-rom-license]==0.19.0 && \ +# pip install 'stable-baselines3[extra]' && \ +# pip install -e /sim_ws/src/car_node/car_node && \ +# source /opt/ros/foxy/setup.bash && \ +# cd sim_ws/ && \ +# colcon build + +WORKDIR '/sim_ws' +ENTRYPOINT ["/bin/bash"] diff --git a/docker-compose-ros-env.yml b/docker-compose-ros-env.yml new file mode 100644 index 00000000..5d0fe869 --- /dev/null +++ b/docker-compose-ros-env.yml @@ -0,0 +1,46 @@ +# MIT License + +# Copyright (c) 2020 Hongrui Zheng + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +version: '3' +services: + sim: + image: manuandru/f1tenth-gym-ros-model-env:latest + # volumes: + # - .:/sim_ws/src/f1tenth_gym_ros + # - ../src:/sim_ws/src/ + environment: + - DISPLAY=novnc:0.0 + networks: + - x11 + stdin_open: true + tty: true + novnc: + image: theasp/novnc:latest + environment: + - DISPLAY_WIDTH=1728 + - DISPLAY_HEIGHT=972 + ports: + - "8080:8080" + networks: + - x11 +networks: + x11: diff --git a/f1tenth_gym-setup.md b/f1tenth_gym-setup.md index 7fd4552d..c2ca89ac 100644 --- a/f1tenth_gym-setup.md +++ b/f1tenth_gym-setup.md @@ -8,7 +8,6 @@ cd f1tenth_gym conda create -n f1tenth_gym python=3.8 conda activate f1tenth_gym pip install -e . -pip install -r requirements.txt ``` If there are any error during installation([source](https://github.com/freqtrade/freqtrade/issues/8376#issuecomment-1519257211)): diff --git a/model-gym-ros-env/car_node/car_node/main.py b/model-gym-ros-env/car_node/car_node/main.py new file mode 100644 index 00000000..bea425f0 --- /dev/null +++ b/model-gym-ros-env/car_node/car_node/main.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +# from __future__ import print_function +import numpy as np +import rclpy +from rclpy.node import Node + +#ROS Imports +from sensor_msgs.msg import LaserScan +from ackermann_msgs.msg import AckermannDriveStamped + + +import gym +from stable_baselines3 import PPO +from car_node.wrapper import F110_Wrapped +from f110_gym.envs.base_classes import Integrator +from car_node.map_utility import get_map, get_formatted_map, map_ext +from car_node.wrapper import convert_range + +timestep = 0.01 +tensorboard_path = './train_test/' +map = "BrandsHatch" +path = get_map(map) +map_path = get_formatted_map(path) +device = 'cpu' + +STEER_SCALE = 0.5 +SPEED_SCALE = 0.7 + +class PPOModelEvaluator(Node): + + def __init__(self): + super().__init__('wall_follow_node') + + #Topics & Subs, Pubs + lidarscan_topic = '/scan' + drive_topic = '/drive' + + # Model & Env + self.eval_env = gym.make('f110_gym:f110-v0', map=map_path, map_ext=map_ext, num_agents=1, timestep=timestep, integrator=Integrator.RK4) + self.eval_env = F110_Wrapped(self.eval_env, random_map=False) + self.eval_env.set_map_path(path) + self.model = PPO.load("/sim_ws/src/car_node/car_node/train_test/best_global_model", self.eval_env, device=device) + self.vec_env = self.model.get_env() + + # ROS + self.lidar_sub = self.create_subscription(LaserScan, lidarscan_topic, self.lidar_callback, 10) + self.drive_pub = self.create_publisher(AckermannDriveStamped, drive_topic, 10) + + def lidar_callback(self, data): + d = np.array(data.ranges, dtype=np.float64) + d = convert_range(d, [data.angle_min, data.angle_max], [-1, 1]) + + action, _states = self.model.predict(d, deterministic=True) + action = self.eval_env.un_normalise_actions(action) + + self.get_logger().info(f'{action[0], action[1]}') + + drive_msg = AckermannDriveStamped() + drive_msg.header.stamp = self.get_clock().now().to_msg() + drive_msg.header.frame_id = "laser" + drive_msg.drive.steering_angle = action[0] * STEER_SCALE + drive_msg.drive.speed = action[1] * SPEED_SCALE + self.drive_pub.publish(drive_msg) + +def main(args=None): + rclpy.init(args=args) + node = PPOModelEvaluator() + rclpy.spin(node) + node.destroy_node() + rclpy.shutdown() + +if __name__=='__main__': + main() diff --git a/model-gym-ros-env/car_node/car_node/map_utility.py b/model-gym-ros-env/car_node/car_node/map_utility.py new file mode 100644 index 00000000..000b325b --- /dev/null +++ b/model-gym-ros-env/car_node/car_node/map_utility.py @@ -0,0 +1,101 @@ +#get all the path of the map in ./map/f1tenth_racetracks +import os +import random +import numpy as np + + +wpt_delim= ';' +wpt_rowskip= 3 +wpt_xind= 1 +wpt_yind= 2 +wpt_thind= 3 +wpt_vind= 5 +map_ext= '.png' + +def get_map_path(): + path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) + map_path = "/sim_ws/src/car_node/car_node/map/f1tenth_racetracks" + return map_path + +def get_map_list(): + map_path = get_map_path() + map_list = os.listdir(map_path) + filtered_map_list = [] + + for map_file in map_list: + if os.path.isdir(os.path.join(map_path, map_file)): + raceline_file = os.path.join(map_path, map_file, f"{map_file}_raceline.csv") + if os.path.exists(raceline_file): + filtered_map_list.append(map_file) + + return filtered_map_list + +def get_map(map_name): + map_path = get_map_path() + map_file = map_path + "/" + map_name + return map_file + +def get_formatted_map(map_name): + map_file = map_name + "/" + get_map_name(map_name) + "_map" + return map_file + +def get_map_name(map_file): + map_name = map_file.split("/")[-1] + return map_name + +def get_map_list_name(): + map_list = get_map_list() + map_list_name = [] + for map_file in map_list: + map_list_name.append(get_map_name(map_file)) + return map_list_name + +def get_map_list_file(): + map_list = get_map_list() + map_list_file = [] + for map_file in map_list: + map_list_file.append(get_map(map_file)) + return map_list_file + +def get_map_list_name_file(): + map_list = get_map_list() + map_list_name_file = [] + for map_file in map_list: + map_list_name_file.append(get_map_name(map_file)) + map_list_name_file.append(get_map(map_file)) + return map_list_name_file + +def get_map_list_name_file_dict(): + map_list = get_map_list() + map_list_name_file_dict = {} + for map_file in map_list: + map_list_name_file_dict[get_map_name(map_file)] = get_map(map_file) + return map_list_name_file_dict + + + +def get_one_random_map(): + map_list = get_map_list() + map_file = random.choice(map_list) + return get_map(map_file) + + +def get_raceline(map_folder): + map_name = get_map_name(map_folder) + raceline_file = map_folder + "/"+ map_name + "_raceline.csv" + raceline = np.genfromtxt(raceline_file, delimiter=wpt_delim, skip_header=wpt_rowskip) + return raceline + +def get_x_y_theta_from_raceline(raceline): + x = raceline[:, wpt_xind] + y = raceline[:, wpt_yind] + theta = raceline[:, wpt_thind] + return x, y, theta + +def get_start_position(map_folder): + raceline = get_raceline(map_folder) + x, y, theta = get_x_y_theta_from_raceline(raceline) + start_pose = [x[0], y[0], theta[0]] + return start_pose + + diff --git a/model-gym-ros-env/car_node/car_node/setup.py b/model-gym-ros-env/car_node/car_node/setup.py new file mode 100644 index 00000000..22d56a61 --- /dev/null +++ b/model-gym-ros-env/car_node/car_node/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup + +setup(name='f110_gym', + version='0.2.1', + author='Hongrui Zheng', + author_email='billyzheng.bz@gmail.com', + url='https://f1tenth.org', + package_dir={'': 'gym'}, + install_requires=['gym==0.19.0', + 'numpy==1.23.0', + 'Pillow>=9.0.1', + 'scipy>=1.7.3', + 'numba>=0.55.2', + 'pyyaml>=5.3.1', + 'pyglet==1.5.28', + 'pyopengl', + # our deps + 'stable-baselines3', + 'shimmy>=0.2.1'] + ) diff --git a/model-gym-ros-env/car_node/car_node/wrapper.py b/model-gym-ros-env/car_node/car_node/wrapper.py new file mode 100644 index 00000000..befef1c7 --- /dev/null +++ b/model-gym-ros-env/car_node/car_node/wrapper.py @@ -0,0 +1,265 @@ +import gym +import numpy as np +#from gym import spaces +from gym import spaces +from pathlib import Path +from argparse import Namespace +from pyglet.gl import GL_POINTS +import car_node.map_utility as map_utility +from typing import Any, Dict, List, Optional, SupportsFloat, Tuple, Union + +def convert_range(value, input_range, output_range): + # converts value(s) from range to another range + # ranges ---> [min, max] + (in_min, in_max), (out_min, out_max) = input_range, output_range + in_range = in_max - in_min + out_range = out_max - out_min + return (((value - in_min) * out_range) / in_range) + out_min + + +class F110_Wrapped(gym.Wrapper): + """ + This is a wrapper for the F1Tenth Gym environment intended + for only one car, but should be expanded to handle multi-agent scenarios + """ + + def __init__(self, env, random_map=False): + super().__init__(env) + + # normalised action space, steer and speed + self.action_space = spaces.Box(low=np.array([-1.0, -1.0]), high=np.array([1.0, 1.0]), dtype=np.float) + + # normalised observations, just take the lidar scans + self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(1080,), dtype=np.float) + + # store allowed steering/speed/lidar ranges for normalisation + self.s_min = self.env.params['s_min'] + self.s_max = self.env.params['s_max'] + self.v_min = self.env.params['v_min'] + self.v_max = self.env.params['v_max'] + self.lidar_min = 0 + self.lidar_max = 30 # see ScanSimulator2D max_range + + # store car dimensions and some track info + self.car_length = self.env.params['length'] + self.car_width = self.env.params['width'] + self.track_width = 3.2 # ~= track width, see random_trackgen.py + + # radius of circle where car can start on track, relative to a centerpoint + self.start_radius = (self.track_width / 2) - ((self.car_length + self.car_width) / 2) # just extra wiggle room + + self.step_count = 0 + + # set threshold for maximum angle of car, to prevent spinning + self.max_theta = 100 + self.count = 0 + + self.map_path = None + self.random_map = random_map + + self.race_line_color = [255, 0, 0] + self.race_line_x = [] + self.race_line_y = [] + self.race_line_theta = [] + + self.episode_returns = [] + + self.is_rendering = False + + self.last_position = {'x': None, 'y': None} + + + def get_total_steps(self) -> int: + return self.step_count + + def get_episode_rewards(self) -> List[float]: + """ + Returns the rewards of all the episodes + + :return: + """ + return self.episode_returns + + + + + + def set_raceliens(self): + if self.map_path is not None: + raceline = map_utility.get_raceline(self.map_path) + self.race_line_x, self.race_line_y, self.race_line_theta = map_utility.get_x_y_theta_from_raceline(raceline) + self.race_line_x = self.race_line_x.tolist() + self.race_line_y = self.race_line_y.tolist() + self.race_line_theta = self.race_line_theta.tolist() + + def set_map_path(self, map_path): + self.map_path = map_path + self.set_raceliens() + + + def start_position(self): + if self.map_path is not None: + x, y, t = map_utility.get_start_position(self.map_path) + self.set_raceliens() + return x, y, t + else: + raise Exception("Map path not set") + + + + def step(self, action): + + #add noise to action + #action = action + np.random.normal(0, 0.1, 2) + + action_convert = self.un_normalise_actions(action) + observation, _, done, info = self.env.step(np.array([action_convert])) + + self.step_count += 1 + + + + next_x = self.race_line_x[self.count] + next_y = self.race_line_y[self.count] + + if self.env.renderer: + if done: + self.race_line_color = [np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)] + self.env.renderer.batch.add(1, GL_POINTS, None, ('v3f/stream', [next_x * 50, next_y * 50, 0.]), + ('c3B/stream', self.race_line_color)) + + reward = 0 + + + + + if self.count < len(self.race_line_x) - 1: + X, Y = observation['poses_x'][0], observation['poses_y'][0] + + dist = np.sqrt(np.power((X - next_x), 2) + np.power((Y - next_y), 2)) + if dist < 2: + self.count = self.count + 1 + reward += 0.01 + pass + + if dist < 2.5: + complete = 1#(self.count/len(self.race_line_x)) * 0.5 + reward += complete + else: + print("---- Lap Done ---->", self.map_path) + self.count = 0 + reward += 10 + + + # Check if the car has moved a significant distance from the last position + distance_from_last_position = np.sqrt( + np.power((observation["poses_x"][0] - self.last_position['x']), 2) + + + np.power((observation["poses_y"][0] - self.last_position['y']), 2) + ) + + # if distance_from_last_position > 0.0005: + # reward += 0.8 + # else: + # reward = 0 + # # Update the last position + reward += distance_from_last_position + + self.last_position['x'] = observation['poses_x'][0] + self.last_position['y'] = observation['poses_y'][0] + + + + + if observation['collisions'][0]: + done = True + self.count = 0 + reward = 0 + + if abs(observation['poses_theta'][0]) > self.max_theta: + done = True + self.count = 0 + reward = 0 + #if the car go out of the track the episode is done + if len(self.episode_returns) > 50_000: + print(reward) + self.episode_returns = [] + done = True + self.count = 0 + reward = 0 + print("Episod Done - Too slow") + + + + + + self.episode_returns.append(reward) + + + + + return self.normalise_observations(observation['scans'][0]), reward, bool(done), info + + + def reset(self): + if self.random_map: + path = map_utility.get_one_random_map() + map_path = map_utility.get_formatted_map(path) + map_ext = map_utility.map_ext + self.update_map(map_path, map_ext, update_render=True) + self.set_map_path(path) + + # Select a random point from the race line + race_line_x = self.race_line_x + race_line_y = self.race_line_y + race_line_theta = self.race_line_theta + random_index = np.random.randint(len(race_line_x)) + x = race_line_x[random_index] + y = race_line_y[random_index] + t = race_line_theta[random_index] + + + # Update the race line to start from the selected point + race_line_x = race_line_x[random_index:] + race_line_x[:random_index] + race_line_y = race_line_y[random_index:] + race_line_y[:random_index] + race_line_theta = race_line_theta[random_index:] + race_line_theta[:random_index] + + + self.race_line_x = race_line_x + self.race_line_y = race_line_y + self.race_line_theta = race_line_theta + + # else: + # x, y, t = self.start_position() + + self.episode_returns = [] + + self.last_position = {'x': x, 'y': y} + + + observation, _, _, _ = self.env.reset(np.array([[x, y, t]])) + return self.normalise_observations(observation['scans'][0]) + + def update_map(self, map_name, map_extension, update_render=False): + self.env.map_name = map_name + self.env.map_ext = map_extension + self.env.update_map(f"{map_name}.yaml", map_extension) + # if update_render and self.env.renderer: + # self.env.renderer.close() + # self.env.renderer = None + + def seed(self, seed): + self.current_seed = seed + np.random.seed(self.current_seed) + print(f"Seed -> {self.current_seed}") + + def un_normalise_actions(self, actions): + # convert actions from range [-1, 1] to normal steering/speed range + steer = convert_range(actions[0], [-1, 1], [self.s_min, self.s_max]) + speed = convert_range(actions[1], [-1, 1], [self.v_min, self.v_max]) + return np.array([steer, speed], dtype=np.float) + + def normalise_observations(self, observations): + # convert observations from normal lidar distances range to range [-1, 1] + return convert_range(observations, [self.lidar_min, self.lidar_max], [-1, 1]) + diff --git a/model-gym-ros-env/car_node/setup.py b/model-gym-ros-env/car_node/setup.py new file mode 100644 index 00000000..ea8b8ee9 --- /dev/null +++ b/model-gym-ros-env/car_node/setup.py @@ -0,0 +1,26 @@ +from setuptools import setup + +package_name = 'car_node' + +setup( + name=package_name, + version='0.0.0', + packages=[package_name], + data_files=[ + ('share/ament_index/resource_index/packages', + ['resource/' + package_name]), + ('share/' + package_name, ['package.xml']), + ], + install_requires=['setuptools'], + zip_safe=True, + maintainer='root', + maintainer_email='manuel.andruccioli@studio.unibo.it', + description='TODO: Package description', + license='TODO: License declaration', + tests_require=['pytest'], + entry_points={ + 'console_scripts': [ + 'car = car_node.main:main' + ], + }, +) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 4d002cdc..00000000 --- a/requirements.txt +++ /dev/null @@ -1,58 +0,0 @@ -absl-py==2.0.0 -cachetools==5.3.2 -certifi==2023.11.17 -charset-normalizer==3.3.2 -cloudpickle==1.6.0 -contourpy==1.1.1 -cycler==0.12.1 --e git+ssh://git@github.com/zucchero-sintattico/svs-f1tenth_gym.git@a4cd61809932d839f8d412682d61252a8a243c27#egg=f110_gym -Farama-Notifications==0.0.4 -filelock==3.13.1 -fonttools==4.46.0 -fsspec==2023.12.2 -google-auth==2.25.2 -google-auth-oauthlib==1.0.0 -grpcio==1.60.0 -gym==0.19.0 -gymnasium==0.29.1 -idna==3.6 -importlib-metadata==7.0.0 -importlib-resources==6.1.1 -Jinja2==3.1.2 -kiwisolver==1.4.5 -llvmlite==0.41.1 -Markdown==3.5.1 -MarkupSafe==2.1.3 -matplotlib==3.7.4 -mpmath==1.3.0 -networkx==3.1 -numba==0.58.1 -numpy==1.22.0 -oauthlib==3.2.2 -packaging==23.2 -pandas==2.0.3 -Pillow==10.1.0 -protobuf==4.25.1 -pyasn1==0.5.1 -pyasn1-modules==0.3.0 -pyglet==1.5.28 -PyOpenGL==3.1.7 -pyparsing==3.1.1 -python-dateutil==2.8.2 -pytz==2023.3.post1 -PyYAML==6.0.1 -requests==2.31.0 -requests-oauthlib==1.3.1 -rsa==4.9 -scipy==1.10.1 -six==1.16.0 -sympy==1.12 -tensorboard==2.14.0 -tensorboard-data-server==0.7.2 -torch==2.1.1 -torch-tb-profiler==0.4.3 -typing_extensions==4.9.0 -tzdata==2023.3 -urllib3==2.1.0 -Werkzeug==3.0.1 -zipp==3.17.0 diff --git a/setup.py b/setup.py index 4b411f93..872758d7 100644 --- a/setup.py +++ b/setup.py @@ -13,5 +13,10 @@ 'numba>=0.55.2', 'pyyaml>=5.3.1', 'pyglet==1.5.28', - 'pyopengl'] + 'pyopengl', + # our deps + 'stable-baselines3==2.2.1', + 'stable-baselines3[extra]', + 'Shimmy==1.3.0', + 'tensorboard==2.14.0'] ) diff --git a/src/full_train.py b/src/full_train.py deleted file mode 100644 index f7f4cb5d..00000000 --- a/src/full_train.py +++ /dev/null @@ -1,39 +0,0 @@ -import gym -from stable_baselines3 import PPO -from wrapper.wrapper import F110_Wrapped -from argparse import Namespace -from f110_gym.envs.base_classes import Integrator -import utility.map_utility as map_utility -from random_train import RandomTrain -from train_on_one_map import TrainOnOneMap - -RandomTrain().run() - -TrainOnOneMap("YasMarina").run() - - -timestep = 0.01 - -path = map_utility.get_map("YasMarina") -map_path = map_utility.get_formatted_map(path) -map_ext = map_utility.map_ext - - -eval_env = gym.make('f110_gym:f110-v0', map=map_path, map_ext=map_ext, num_agents=1, timestep=timestep, integrator=Integrator.RK4) -eval_env = F110_Wrapped(eval_env, random_map=False) -eval_env.set_map_path(path) - - -model = PPO.load("./train_test/best_model", eval_env, device='cpu') - -episode = 0 -while episode < 1000: - episode += 1 - obs = eval_env.reset() - done = False - while not done: - action, _ = model.predict(obs) - obs, reward, done, _ = eval_env.step(action) - if done: - print("Lap done", episode) - eval_env.render(mode='human') diff --git a/src/random_train.py b/src/random_train.py deleted file mode 100644 index bbb31390..00000000 --- a/src/random_train.py +++ /dev/null @@ -1,41 +0,0 @@ -import gym -from stable_baselines3 import PPO -from wrapper.wrapper import F110_Wrapped -from f110_gym.envs.base_classes import Integrator -import numpy as np -from stable_baselines3.common.callbacks import EvalCallback -import utility.map_utility as map_utility -from utility.linear_schedule import linear_schedule - -class RandomTrain: - def __init__(self): - self.timestep = 0.01 - self.total_timesteps = 1_000 - self.learning_rate = linear_schedule(0.0003) - self.gamma = 0.99 - self.gae_lambda = 0.95 - self.verbose = 1 - self.device = 'cpu' - - - self.eval_env = gym.make('f110_gym:f110-v0', num_agents=1, timestep=self.timestep, integrator=Integrator.RK4) - self.eval_env = F110_Wrapped(self.eval_env, random_map=True) - self.eval_env.seed(np.random.randint(pow(2, 31) - 1)) - - def run(self): - - - try: - model = PPO.load("./train_test/best_model", self.eval_env, device='cpu') - except: - model = PPO("MlpPolicy", self.eval_env, learning_rate=self.learning_rate, gamma=self.gamma, gae_lambda=self.gae_lambda, verbose=self.verbose, device=self.device) - - eval_callback = EvalCallback(self.eval_env, best_model_save_path='./train_test/', - log_path='./train_test/', eval_freq=1000, - deterministic=True, render=False) - - model.learn(total_timesteps=self.total_timesteps, callback=eval_callback) - -if __name__ == "__main__": - random_train = RandomTrain() - random_train.run() diff --git a/src/train.py b/src/train.py new file mode 100644 index 00000000..86428216 --- /dev/null +++ b/src/train.py @@ -0,0 +1,134 @@ +import gym +from stable_baselines3 import PPO, A2C +from wrapper.wrapper import F110_Wrapped +import yaml +from argparse import Namespace +from f110_gym.envs.base_classes import Integrator +import numpy as np +import os +from stable_baselines3.common.env_util import make_vec_env +from stable_baselines3.common.vec_env import SubprocVecEnv +from stable_baselines3.common.callbacks import BaseCallback +from stable_baselines3.common.evaluation import evaluate_policy +from stable_baselines3.common.env_checker import check_env +from utility.map_utility import get_map, get_formatted_map, map_ext +from utility.linear_schedule import linear_schedule +from stable_baselines3.common.callbacks import EvalCallback +from stable_baselines3.common.monitor import Monitor +from utility.SaveOnBestTrainingRewardCallback import SaveTheBestAndRestart +timestep = 0.01 +tensorboard_path = './train_test/' + + +map = "Zandvoort" +path = get_map(map) +map_path = get_formatted_map(path) + +eval_env = gym.make('f110_gym:f110-v0', map=map_path, map_ext=map_ext, num_agents=1, timestep=timestep, integrator=Integrator.RK4) + +eval_env = F110_Wrapped(eval_env, random_map=True) + +eval_env.seed(1773449316) + +skip_training = False + +max_timesteps = 200_000 +min_timesteps = 50_000 + +max_learning_rate = 0.0005 +min_learning_rate = 0.00005 +num_of_steps = 6 + +device = 'cpu' + +timesteps_list = np.logspace(np.log10(min_timesteps), np.log10(max_timesteps), num=num_of_steps, endpoint=True, base=10.0, dtype=int, axis=0) +learning_rate_list = np.logspace(np.log10(max_learning_rate), np.log10(min_learning_rate), num=num_of_steps, endpoint=True, base=10.0, dtype=None, axis=0) + +print("timestemp" , timesteps_list) +print("leaning rate", learning_rate_list) + +if not skip_training: + + for timesteps, learning_rate in zip(timesteps_list, learning_rate_list): + #if best_model exists, load it + # if os.path.exists("./train_test/best_model.zip"): + # print("Loading Existing Model") + # model = PPO.load("./train_test/best_model", eval_env, learning_rate=learning_rate) + if os.path.exists("./train_test/best_global_model.zip"): + print("Loading Existing Model") + model = PPO.load("./train_test/best_global_model", eval_env, learning_rate=linear_schedule(learning_rate), device=device) + else: + model = PPO("MlpPolicy", + eval_env, + gamma=0.99, + learning_rate=linear_schedule(learning_rate), + gae_lambda=0.95, + verbose=0, + tensorboard_log=tensorboard_path, + device=device + ) + + + eval_callback = EvalCallback(eval_env, best_model_save_path='./train_test/', + log_path='./train_test/', eval_freq= int(timesteps/20), + deterministic=True, render=False) + + + + model.learn(total_timesteps=timesteps, progress_bar=True, callback=eval_callback) + + del model + + model = PPO.load("./train_test/best_model", eval_env, device=device) + + mean_reward, _ = evaluate_policy(model, model.get_env(), n_eval_episodes=20) + + + #save in file the mean reward, if the file does not exist, create it + if not os.path.exists("./train_test/mean_reward.txt"): + with open("./train_test/mean_reward.txt", "w") as f: + f.write(f"{mean_reward}") + model.save("./train_test/best_global_model") + else: + #overwrite the file with the new mean reward if it is better + with open("./train_test/mean_reward.txt", "r") as f: + best_mean_reward = float(f.read()) + if mean_reward > best_mean_reward: + with open("./train_test/mean_reward.txt", "w") as f: + f.write(f"{mean_reward}") + model.save("./train_test/best_global_model") + print("Saved Best Model") + + del model + + + + + + +eval_env = gym.make('f110_gym:f110-v0', map=map_path, map_ext=map_ext, num_agents=1, timestep=timestep, integrator=Integrator.RK4) +eval_env = F110_Wrapped(eval_env, random_map=False) +eval_env.set_map_path(path) + +model = PPO.load("./train_test/best_global_model", eval_env, device=device) + +mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=1) + +print(mean_reward) + +# Enjoy trained agent +vec_env = model.get_env() +obs = vec_env.reset() + +episode = 0 +while episode < 100: + + episode += 1 + obs = vec_env.reset() + done = False + while not done: + action, _states = model.predict(obs, deterministic=True) + obs, rewards, dones, info = vec_env.step(action) + if done: + print("-->", episode) + eval_env.render(mode='human') diff --git a/src/train_on_one_map.py b/src/train_on_one_map.py deleted file mode 100644 index f4a5c2f3..00000000 --- a/src/train_on_one_map.py +++ /dev/null @@ -1,46 +0,0 @@ -import gym -from stable_baselines3 import PPO -from wrapper.wrapper import F110_Wrapped -from f110_gym.envs.base_classes import Integrator -import numpy as np -from stable_baselines3.common.callbacks import EvalCallback -import utility.map_utility as map_utility -from utility.linear_schedule import linear_schedule - -class TrainOnOneMap: - def __init__(self, map_name="Monza"): - self.timestep = 0.01 - self.total_timesteps = 1_000 - self.learning_rate = linear_schedule(0.0003) - self.gamma = 0.99 - self.gae_lambda = 0.95 - self.verbose = 1 - self.device = 'cpu' - self.eval_env = None - - path = map_utility.get_map(map_name) - map_path = map_utility.get_formatted_map(path) - map_ext = map_utility.map_ext - - self.eval_env = gym.make('f110_gym:f110-v0', map=map_path, map_ext=map_ext, num_agents=1, timestep=self.timestep, integrator=Integrator.RK4) - # Wrap basic gym with RL functions - self.eval_env = F110_Wrapped(self.eval_env, random_map=False) - self.eval_env.set_map_path(path) - self.eval_env.seed(np.random.randint(pow(2, 31) - 1)) - - - - def run(self): - - try: - model = PPO.load("./train_test/best_model", self.eval_env, device='cpu') - except: - model = PPO("MlpPolicy", self.eval_env, learning_rate=self.learning_rate, gamma=self.gamma, gae_lambda=self.gae_lambda, verbose=self.verbose, device=self.device) - - eval_callback = EvalCallback(self.eval_env, best_model_save_path='./train_test/', log_path='./train_test/', eval_freq=1000, deterministic=True, render=False) - - model.learn(total_timesteps=self.total_timesteps, callback=eval_callback) - -if __name__ == "__main__": - train_on_one_map = TrainOnOneMap() - train_on_one_map.run() diff --git a/src/utility/SaveOnBestTrainingRewardCallback.py b/src/utility/SaveOnBestTrainingRewardCallback.py new file mode 100644 index 00000000..97fdf330 --- /dev/null +++ b/src/utility/SaveOnBestTrainingRewardCallback.py @@ -0,0 +1,400 @@ +import os +import warnings +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union + +import gymnasium as gym +import numpy as np + +from stable_baselines3.common.logger import Logger + +try: + from tqdm import TqdmExperimentalWarning + + # Remove experimental warning + warnings.filterwarnings("ignore", category=TqdmExperimentalWarning) + from tqdm.rich import tqdm +except ImportError: + # Rich not installed, we only throw an error + # if the progress bar is used + tqdm = None + + +from stable_baselines3.common.evaluation import evaluate_policy +from stable_baselines3.common.vec_env import DummyVecEnv, VecEnv, sync_envs_normalization + +if TYPE_CHECKING: + from stable_baselines3.common import base_class + + +class BaseCallback(ABC): + """ + Base class for callback. + + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages + """ + + # The RL model + # Type hint as string to avoid circular import + model: "base_class.BaseAlgorithm" + + def __init__(self, verbose: int = 0): + super().__init__() + # Number of time the callback was called + self.n_calls = 0 # type: int + # n_envs * n times env.step() was called + self.num_timesteps = 0 # type: int + self.verbose = verbose + self.locals: Dict[str, Any] = {} + self.globals: Dict[str, Any] = {} + # Sometimes, for event callback, it is useful + # to have access to the parent object + self.parent = None # type: Optional[BaseCallback] + + @property + def training_env(self) -> VecEnv: + training_env = self.model.get_env() + assert ( + training_env is not None + ), "`model.get_env()` returned None, you must initialize the model with an environment to use callbacks" + return training_env + + @property + def logger(self) -> Logger: + return self.model.logger + + # Type hint as string to avoid circular import + def init_callback(self, model: "base_class.BaseAlgorithm") -> None: + """ + Initialize the callback by saving references to the + RL model and the training environment for convenience. + """ + self.model = model + self._init_callback() + + def _init_callback(self) -> None: + pass + + def on_training_start(self, locals_: Dict[str, Any], globals_: Dict[str, Any]) -> None: + # Those are reference and will be updated automatically + self.locals = locals_ + self.globals = globals_ + # Update num_timesteps in case training was done before + self.num_timesteps = self.model.num_timesteps + self._on_training_start() + + def _on_training_start(self) -> None: + pass + + def on_rollout_start(self) -> None: + self._on_rollout_start() + + def _on_rollout_start(self) -> None: + pass + + @abstractmethod + def _on_step(self) -> bool: + """ + :return: If the callback returns False, training is aborted early. + """ + return True + + def on_step(self) -> bool: + """ + This method will be called by the model after each call to ``env.step()``. + + For child callback (of an ``EventCallback``), this will be called + when the event is triggered. + + :return: If the callback returns False, training is aborted early. + """ + self.n_calls += 1 + self.num_timesteps = self.model.num_timesteps + + return self._on_step() + + def on_training_end(self) -> None: + self._on_training_end() + + def _on_training_end(self) -> None: + pass + + def on_rollout_end(self) -> None: + self._on_rollout_end() + + def _on_rollout_end(self) -> None: + pass + + def update_locals(self, locals_: Dict[str, Any]) -> None: + """ + Update the references to the local variables. + + :param locals_: the local variables during rollout collection + """ + self.locals.update(locals_) + self.update_child_locals(locals_) + + def update_child_locals(self, locals_: Dict[str, Any]) -> None: + """ + Update the references to the local variables on sub callbacks. + + :param locals_: the local variables during rollout collection + """ + pass + + +class EventCallback(BaseCallback): + """ + Base class for triggering callback on event. + + :param callback: Callback that will be called + when an event is triggered. + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages + """ + + def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0): + super().__init__(verbose=verbose) + self.callback = callback + # Give access to the parent + if callback is not None: + assert self.callback is not None + self.callback.parent = self + + def init_callback(self, model: "base_class.BaseAlgorithm") -> None: + super().init_callback(model) + if self.callback is not None: + self.callback.init_callback(self.model) + + def _on_training_start(self) -> None: + if self.callback is not None: + self.callback.on_training_start(self.locals, self.globals) + + def _on_event(self) -> bool: + if self.callback is not None: + return self.callback.on_step() + return True + + def _on_step(self) -> bool: + return True + + def update_child_locals(self, locals_: Dict[str, Any]) -> None: + """ + Update the references to the local variables. + + :param locals_: the local variables during rollout collection + """ + if self.callback is not None: + self.callback.update_locals(locals_) + + + + + + + +class SaveTheBestAndRestart(EventCallback): + """ + Callback for evaluating an agent. + + .. warning:: + + When using multiple environments, each call to ``env.step()`` + will effectively correspond to ``n_envs`` steps. + To account for that, you can use ``eval_freq = max(eval_freq // n_envs, 1)`` + + :param eval_env: The environment used for initialization + :param callback_on_new_best: Callback to trigger + when there is a new best model according to the ``mean_reward`` + :param callback_after_eval: Callback to trigger after every evaluation + :param n_eval_episodes: The number of episodes to test the agent + :param eval_freq: Evaluate the agent every ``eval_freq`` call of the callback. + :param log_path: Path to a folder where the evaluations (``evaluations.npz``) + will be saved. It will be updated at each evaluation. + :param best_model_save_path: Path to a folder where the best model + according to performance on the eval env will be saved. + :param deterministic: Whether the evaluation should + use a stochastic or deterministic actions. + :param render: Whether to render or not the environment during evaluation + :param verbose: Verbosity level: 0 for no output, 1 for indicating information about evaluation results + :param warn: Passed to ``evaluate_policy`` (warns if ``eval_env`` has not been + wrapped with a Monitor wrapper) + """ + + def __init__( + self, + eval_env: Union[gym.Env, VecEnv], + callback_on_new_best: Optional[BaseCallback] = None, + callback_after_eval: Optional[BaseCallback] = None, + n_eval_episodes: int = 5, + eval_freq: int = 10000, + log_path: Optional[str] = None, + best_model_save_path: Optional[str] = None, + deterministic: bool = True, + render: bool = False, + verbose: int = 1, + warn: bool = True, + ): + super().__init__(callback_after_eval, verbose=verbose) + + self.callback_on_new_best = callback_on_new_best + if self.callback_on_new_best is not None: + # Give access to the parent + self.callback_on_new_best.parent = self + + self.n_eval_episodes = n_eval_episodes + self.eval_freq = eval_freq + self.best_mean_reward = -np.inf + self.last_mean_reward = -np.inf + self.deterministic = deterministic + self.render = render + self.warn = warn + + # Convert to VecEnv for consistency + if not isinstance(eval_env, VecEnv): + eval_env = DummyVecEnv([lambda: eval_env]) # type: ignore[list-item, return-value] + + self.eval_env = eval_env + self.best_model_save_path = best_model_save_path + # Logs will be written in ``evaluations.npz`` + if log_path is not None: + log_path = os.path.join(log_path, "evaluations") + self.log_path = log_path + self.evaluations_results: List[List[float]] = [] + self.evaluations_timesteps: List[int] = [] + self.evaluations_length: List[List[int]] = [] + # For computing success rate + self._is_success_buffer: List[bool] = [] + self.evaluations_successes: List[List[bool]] = [] + + def _init_callback(self) -> None: + # Does not work in some corner cases, where the wrapper is not the same + if not isinstance(self.training_env, type(self.eval_env)): + warnings.warn("Training and eval env are not of the same type" f"{self.training_env} != {self.eval_env}") + + # Create folders if needed + if self.best_model_save_path is not None: + os.makedirs(self.best_model_save_path, exist_ok=True) + if self.log_path is not None: + os.makedirs(os.path.dirname(self.log_path), exist_ok=True) + + # Init callback called on new best model + if self.callback_on_new_best is not None: + self.callback_on_new_best.init_callback(self.model) + + def _log_success_callback(self, locals_: Dict[str, Any], globals_: Dict[str, Any]) -> None: + """ + Callback passed to the ``evaluate_policy`` function + in order to log the success rate (when applicable), + for instance when using HER. + + :param locals_: + :param globals_: + """ + info = locals_["info"] + + if locals_["done"]: + maybe_is_success = info.get("is_success") + if maybe_is_success is not None: + self._is_success_buffer.append(maybe_is_success) + + def _on_step(self) -> bool: + continue_training = True + + if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0: + # Sync training and eval env if there is VecNormalize + if self.model.get_vec_normalize_env() is not None: + try: + sync_envs_normalization(self.training_env, self.eval_env) + except AttributeError as e: + raise AssertionError( + "Training and eval env are not wrapped the same way, " + "see https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html#evalcallback " + "and warning above." + ) from e + + # Reset success rate buffer + self._is_success_buffer = [] + + episode_rewards, episode_lengths = evaluate_policy( + self.model, + self.eval_env, + n_eval_episodes=self.n_eval_episodes, + render=self.render, + deterministic=self.deterministic, + return_episode_rewards=True, + warn=self.warn, + callback=self._log_success_callback, + ) + + if self.log_path is not None: + assert isinstance(episode_rewards, list) + assert isinstance(episode_lengths, list) + self.evaluations_timesteps.append(self.num_timesteps) + self.evaluations_results.append(episode_rewards) + self.evaluations_length.append(episode_lengths) + + kwargs = {} + # Save success log if present + if len(self._is_success_buffer) > 0: + self.evaluations_successes.append(self._is_success_buffer) + kwargs = dict(successes=self.evaluations_successes) + + np.savez( + self.log_path, + timesteps=self.evaluations_timesteps, + results=self.evaluations_results, + ep_lengths=self.evaluations_length, + **kwargs, + ) + + mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards) + mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths) + self.last_mean_reward = float(mean_reward) + + if self.verbose >= 1: + print(f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}") + print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}") + # Add to current Logger + self.logger.record("eval/mean_reward", float(mean_reward)) + self.logger.record("eval/mean_ep_length", mean_ep_length) + + if len(self._is_success_buffer) > 0: + success_rate = np.mean(self._is_success_buffer) + if self.verbose >= 1: + print(f"Success rate: {100 * success_rate:.2f}%") + self.logger.record("eval/success_rate", success_rate) + + # Dump log so the evaluation results are printed with the correct timestep + self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard") + self.logger.dump(self.num_timesteps) + + if mean_reward > self.best_mean_reward: + if self.verbose >= 1: + print("New best mean reward!") + if self.best_model_save_path is not None: + self.model.save(os.path.join(self.best_model_save_path, "best_model")) + self.best_mean_reward = float(mean_reward) + # Trigger callback on new best model, if needed + if self.callback_on_new_best is not None: + continue_training = self.callback_on_new_best.on_step() + + # Trigger callback after every evaluation, if needed + if self.callback is not None: + continue_training = continue_training and self._on_event() + + return continue_training + + def update_child_locals(self, locals_: Dict[str, Any]) -> None: + """ + Update the references to the local variables. + + :param locals_: the local variables during rollout collection + """ + if self.callback: + self.callback.update_locals(locals_) + + + + + + diff --git a/src/wrapper/wrapper.py b/src/wrapper/wrapper.py index 180766f9..ff2b08ae 100644 --- a/src/wrapper/wrapper.py +++ b/src/wrapper/wrapper.py @@ -1,20 +1,13 @@ import gym import numpy as np +#from gym import spaces from gym import spaces from pathlib import Path import yaml from argparse import Namespace from pyglet.gl import GL_POINTS import utility.map_utility as map_utility - - - -def get_distance_from_closest_point(x_list, w_list, x, y, index): - closest_x = x_list[index] - closest_y = w_list[index] - distance = np.sqrt(np.power(x - closest_x, 2) + np.power(y - closest_y, 2)) - return distance - +from typing import Any, Dict, List, Optional, SupportsFloat, Tuple, Union def convert_range(value, input_range, output_range): # converts value(s) from range to another range @@ -68,13 +61,37 @@ def __init__(self, env, random_map=False): self.race_line_color = [255, 0, 0] self.race_line_x = [] self.race_line_y = [] + self.race_line_theta = [] + + self.episode_returns = [] + + self.is_rendering = False + + self.last_position = {'x': None, 'y': None} + + + def get_total_steps(self) -> int: + return self.step_count + + def get_episode_rewards(self) -> List[float]: + """ + Returns the rewards of all the episodes + + :return: + """ + return self.episode_returns + + + + def set_raceliens(self): if self.map_path is not None: raceline = map_utility.get_raceline(self.map_path) - self.race_line_x, self.race_line_y, _ = map_utility.get_x_y_theta_from_raceline(raceline) + self.race_line_x, self.race_line_y, self.race_line_theta = map_utility.get_x_y_theta_from_raceline(raceline) self.race_line_x = self.race_line_x.tolist() self.race_line_y = self.race_line_y.tolist() + self.race_line_theta = self.race_line_theta.tolist() def set_map_path(self, map_path): self.map_path = map_path @@ -92,74 +109,169 @@ def start_position(self): def step(self, action): - action_convert = self.un_normalise_actions(action) - observation, _, done, info = self.env.step(np.array([action_convert])) - self.step_count += 1 + #add noise to action + #action = action + np.random.normal(0, 0.1, 2) + + action_convert = self.un_normalise_actions(action) + observation, _, done, info = self.env.step(np.array([action_convert])) + + self.step_count += 1 + + + + next_x = self.race_line_x[self.count] + next_y = self.race_line_y[self.count] - next_x = self.race_line_x[self.count] - next_y = self.race_line_y[self.count] + if self.env.renderer: + if done: + self.race_line_color = [np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)] + self.env.renderer.batch.add(1, GL_POINTS, None, ('v3f/stream', [next_x * 50, next_y * 50, 0.]), + ('c3B/stream', self.race_line_color)) - if self.env.renderer: - if done: - self.race_line_color = [np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)] - self.env.renderer.batch.add(1, GL_POINTS, None, ('v3f/stream', [next_x * 50, next_y * 50, 0.]), - ('c3B/stream', self.race_line_color)) - reward = 0 - reward = reward + 0.9 + reward = 0 + + - if self.count < len(self.race_line_x) - 1: - X, Y = observation['poses_x'][0], observation['poses_y'][0] + + if self.count < len(self.race_line_x) - 1: + X, Y = observation['poses_x'][0], observation['poses_y'][0] + + dist = np.sqrt(np.power((X - next_x), 2) + np.power((Y - next_y), 2)) + if dist < 1: + self.count = self.count + 1 + reward += 0.01 - dist = np.sqrt(np.power((X - next_x), 2) + np.power((Y - next_y), 2)) - if dist < 2: - self.count += 1 + if dist < 1.5: + complete = 1 #(self.count/len(self.race_line_x)) * 0.5 + reward += complete + else: + print("---- Lap Done ---->", self.map_path) + self.count = 0 + reward += 10 + + reward += 0.1 - if dist < 2.5: - complete = (self.count / len(self.race_line_x)) * 0.5 - reward += complete - else: - self.count = 0 - reward += 100 - print("Lap Done") + # # Check if the car has moved a significant distance from the last position + distance_from_last_position = \ + np.power((observation["poses_x"][0] - self.last_position['x']), 2) \ + + \ + np.power((observation["poses_y"][0] - self.last_position['y']), 2) \ + + distance_from_last_position = round(distance_from_last_position, 4) + + if distance_from_last_position < 0.0005 : + reward = 0 + + + + # # Assicurati che gli input siano all'interno dei range specificati + # angolo_sterzo = max(self.s_min, min(self.s_max, action_convert[0])) + # accelerazione = max(self.v_min, min(self.v_max, action_convert[1])) + + # # Normalizza i valori per ottenere un valore tra 0 e 1 + # norm_angolo_sterzo = (angolo_sterzo - self.s_min) / (self.s_max - self.s_min) + # norm_accelerazione = (accelerazione - self.v_min) / (self.v_max - self.v_min) + + # # Calcola la reward inversamente proporzionale alla differenza tra i due valori normalizzati + # reward += (1 - abs(norm_angolo_sterzo - norm_accelerazione)) + + # if distance_from_last_position < 0.005: + # reward += -0.5 + + # if abs(action_convert[1]) < 1: + # reward = 0 + + + reward = round(reward, 4) + + + + + self.last_position['x'] = observation['poses_x'][0] + self.last_position['y'] = observation['poses_y'][0] - if observation['collisions'][0]: - self.count = 0 - reward = -2 - if abs(observation['poses_theta'][0]) > self.max_theta: - done = True - if self.env.lap_counts[0] > 0: - self.count = 0 - reward += 1 - if self.env.lap_counts[0] > 1: - reward += 1 - self.env.lap_counts[0] = 0 - return self.normalise_observations(observation['scans'][0]), reward, bool(done), info + if observation['collisions'][0]: + done = True + self.count = 0 + reward = 0 + + # if abs(observation['poses_theta'][0]) > self.max_theta: + # done = True + # self.count = 0 + # reward = 0 + #if the car go out of the track the episode is done + if len(self.episode_returns) > 70_000: + print(reward) + self.episode_returns = [] + done = True + self.count = 0 + reward = -100000 + print("Episod Done - Too slow") + + + + + + self.episode_returns.append(reward) + + + + + return self.normalise_observations(observation['scans'][0]), reward, bool(done), info + + def reset(self): if self.random_map: path = map_utility.get_one_random_map() map_path = map_utility.get_formatted_map(path) - print("Rand Map :", map_path) map_ext = map_utility.map_ext self.update_map(map_path, map_ext, update_render=True) self.set_map_path(path) - x, y, t = self.start_position() + # Select a random point from the race line + race_line_x = self.race_line_x + race_line_y = self.race_line_y + race_line_theta = self.race_line_theta + random_index = np.random.randint(len(race_line_x)) + x = race_line_x[random_index] + y = race_line_y[random_index] + t = race_line_theta[random_index] + + + # Update the race line to start from the selected point + race_line_x = race_line_x[random_index:] + race_line_x[:random_index] + race_line_y = race_line_y[random_index:] + race_line_y[:random_index] + race_line_theta = race_line_theta[random_index:] + race_line_theta[:random_index] + + + self.race_line_x = race_line_x + self.race_line_y = race_line_y + self.race_line_theta = race_line_theta + + # else: + # x, y, t = self.start_position() + + self.episode_returns = [] + + self.last_position = {'x': x, 'y': y} + + observation, _, _, _ = self.env.reset(np.array([[x, y, t]])) return self.normalise_observations(observation['scans'][0]) - def update_map(self, map_name, map_extension, update_render=True): + def update_map(self, map_name, map_extension, update_render=False): self.env.map_name = map_name self.env.map_ext = map_extension self.env.update_map(f"{map_name}.yaml", map_extension) - if update_render and self.env.renderer: - self.env.renderer.close() - self.env.renderer = None + # if update_render and self.env.renderer: + # self.env.renderer.close() + # self.env.renderer = None def seed(self, seed): self.current_seed = seed @@ -175,3 +287,4 @@ def un_normalise_actions(self, actions): def normalise_observations(self, observations): # convert observations from normal lidar distances range to range [-1, 1] return convert_range(observations, [self.lidar_min, self.lidar_max], [-1, 1]) + diff --git a/train_test/best_global_model.zip b/train_test/best_global_model.zip new file mode 100644 index 00000000..eeb7667b Binary files /dev/null and b/train_test/best_global_model.zip differ diff --git a/train_test/best_model.zip b/train_test/best_model.zip index 6bf2c211..cd788b79 100644 Binary files a/train_test/best_model.zip and b/train_test/best_model.zip differ diff --git a/train_test/evaluations.npz b/train_test/evaluations.npz index fad3c286..42b855d8 100644 Binary files a/train_test/evaluations.npz and b/train_test/evaluations.npz differ diff --git a/train_test/mean_reward.txt b/train_test/mean_reward.txt new file mode 100644 index 00000000..95571131 --- /dev/null +++ b/train_test/mean_reward.txt @@ -0,0 +1 @@ +206.90949999999998 \ No newline at end of file