diff --git a/src/best_global_model.zip b/src/best_global_model.zip new file mode 100644 index 00000000..0ca0aa29 Binary files /dev/null and b/src/best_global_model.zip differ diff --git a/src/wrapper/wrapper.py b/src/wrapper/wrapper.py index 2ee13e5c..def825eb 100644 --- a/src/wrapper/wrapper.py +++ b/src/wrapper/wrapper.py @@ -189,9 +189,14 @@ def episode_end(reason = None, rew = 0): if not self.one_lap_done: steps_done = len(self.episode_returns) elif self.one_lap_done: - steps_done = len(self.episode_returns) / 2 + steps_done = len(self.episode_returns) / 2 - reward += (( steps_goal - steps_done ) * 0.05) + 400 + k = (steps_done - steps_goal)/steps_goal + + reward += (1-k) * 100 + + + #reward += (( steps_goal - steps_done ) * 0.05) + 400 print("----------------- Lap Done ----------------->", self.map_path, len(self.episode_returns) * 0.01, reward) diff --git a/train_test/best_global_model.zip b/train_test/best_global_model.zip index 0ca0aa29..c092d71a 100644 Binary files a/train_test/best_global_model.zip and b/train_test/best_global_model.zip differ diff --git a/train_test/best_model.zip b/train_test/best_model.zip index b3cc5c1b..95ce03d6 100644 Binary files a/train_test/best_model.zip and b/train_test/best_model.zip differ diff --git a/train_test/evaluations.npz b/train_test/evaluations.npz index 081d926c..409a66f0 100644 Binary files a/train_test/evaluations.npz and b/train_test/evaluations.npz differ diff --git a/train_test/mean_reward.txt b/train_test/mean_reward.txt index 76a3ca8c..0f18d4b7 100644 --- a/train_test/mean_reward.txt +++ b/train_test/mean_reward.txt @@ -1 +1 @@ -1738.5742371499996 \ No newline at end of file +1107.9332437999997 \ No newline at end of file