From 7a6a5d5064cd635ebfcd4249b0fd91b973980e70 Mon Sep 17 00:00:00 2001 From: gael1130 Date: Sat, 20 Jan 2024 23:07:23 +0100 Subject: [PATCH] Update unit1.ipynb I added a vectorized monitor solution and explanation --- notebooks/unit1/unit1.ipynb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/notebooks/unit1/unit1.ipynb b/notebooks/unit1/unit1.ipynb index 80064086..02313a16 100644 --- a/notebooks/unit1/unit1.ipynb +++ b/notebooks/unit1/unit1.ipynb @@ -730,6 +730,7 @@ "source": [ "## Evaluate the agent 📈\n", "- Remember to wrap the environment in a [Monitor](https://stable-baselines3.readthedocs.io/en/master/common/monitor.html).\n", + "- If you used a vectorized environment since the beginning, you will need a [Vectorized monitor](https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#multiprocessing-unleashing-the-power-of-vectorized-environments) to wrap the environment. You can search for 'VecMonitor' on the documentation. \n", "- Now that our Lunar Lander agent is trained 🚀, we need to **check its performance**.\n", "- Stable-Baselines3 provides a method to do that: `evaluate_policy`.\n", "- To fill that part you need to [check the documentation](https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#basic-usage-training-saving-loading)\n", @@ -778,7 +779,12 @@ "#@title\n", "eval_env = Monitor(gym.make(\"LunarLander-v2\"))\n", "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)\n", - "print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")" + "print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")", + "# Alternative version for vectorized environments\n", + "eval_env = make_vec_env('LunarLander-v2', n_envs=16)", + "eval_env = VecMonitor(venv=eval_env)" +# Load the trained model +model = model.load("ppo-LunarLander-v2", env=eval_env, print_system_info=True) ] }, {