From d6bd3fc8c25ad4d6bbb296a18190794849cec039 Mon Sep 17 00:00:00 2001
From: martinballa <ballamartin95@gmail.com>
Date: Thu, 3 Dec 2020 20:40:18 +0000
Subject: [PATCH] Updated various documentations

---
 README.md                                     | 118 ++------
 examples/README.md                            |  31 --
 .../checkpoint_209/checkpoint-209             | Bin 24091589 -> 24091612 bytes
 .../notebooks/rllib_evaluate_checkpoint.ipynb | 272 ++++++++++++++++++
 .../notebooks/rllib_restore_checkpoint.ipynb  |   2 +-
 examples/single_agent_evaluation.py           |   4 +-
 examples/single_agent_restoration.py          |   4 +-
 7 files changed, 303 insertions(+), 128 deletions(-)
 create mode 100644 examples/notebooks/rllib_evaluate_checkpoint.ipynb

diff --git a/README.md b/README.md
index 008ea372d..2507614ad 100644
--- a/README.md
+++ b/README.md
@@ -1,54 +1,47 @@
 # Malmö #
-
 Project Malmö is a platform for Artificial Intelligence experimentation and research built on top of Minecraft. We aim to inspire a new generation of research into challenging new problems presented by this unique environment.
 
 [![Join the chat at https://gitter.im/Microsoft/malmo](https://badges.gitter.im/Microsoft/malmo.svg)](https://gitter.im/Microsoft/malmo?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Build Status](https://travis-ci.org/Microsoft/malmo.svg?branch=master)](https://travis-ci.org/Microsoft/malmo) [![license](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/Microsoft/malmo/blob/master/LICENSE.txt)
 ----
-    
 ## Getting Started ##
 
 ### MalmoEnv ###
 
-MalmoEnv implements an Open AI "gym"-like environment in Python without any native code (communicating directly with Java Minecraft). If you only need this functionallity then please see [MalmoEnv](https://github.com/Microsoft/malmo/tree/master/MalmoEnv). This will most likely be the preferred way to develop with Malmo Minecraft going forward.
-
-If you wish to use the "native" Malmo implementation, either install the "Malmo native Python wheel" (if available for your platform) or a pre-built binary release (more on these options below). Building Malmo yourself from source is always an option!
-
-Advantages:
-    
-1. No native code - you don't have to build or install platform dependent code.
-2. A single network connection is used to run missions. No dynamic ports means it's more virtualization friendly.
-3. A simpler multi-agent coordination protocol. 
-One Minecraft client instance, one single port is used to start missions.
-4. Less impedance miss-match with the gym api.
-
-Disadvantages:
+MalmoEnv implements an Open AI "gym"-like environment in Python without any native code (communicating directly with Java Minecraft). If you only need this functionality then please see [MalmoEnv](MalmoEnv/README.md). This will most likely be the preferred way to develop with Malmo Minecraft going forward.
 
-1. The existing Malmo examples are not supported (as API used is different). 
-Marlo envs should work with this [port](https://github.com/AndKram/marLo/tree/malmoenv).
-2. The API is more limited (e.g. selecting video options) - can edit mission xml directly.
+## Setup process 
+- 1, clone malmo (```git clone https://github.com/martinballa/malmo```)
+- 2, install java 8 and python 3. 
+- 3, ```cd malmo/``` and install malmo using pip ```pip install -e MalmoEnv/``` 
+- 4, Test if Malmo works correctly by running the examples in the ```examples/``` directory.
+- 4*, Some examples requires ```ray``` (with ```tune``` and ```rllib```) installed and ```ffmpeg-python```. 
+- +1, to run malmo headless on a linux headless server you should install xvfb ```sudo apt-get install -y xvfb```
 
-### Malmo as a native Python wheel ###
+*Note:* Minecraft uses gradle to build the project and it's not compatible with newer versions of Java, so make sure that you use java version 8 for the build and make sure that $JAVA_HOME is pointing to the correct version.
+If you have any issues with running Malmo check the [FAQ](FAQ.md) as it might cover the issues.
 
-On common Windows, MacOSX and Linux variants it is possible to use ```pip3 install malmo``` to install Malmo as a python with native code package: [Pip install for Malmo](https://github.com/Microsoft/malmo/blob/master/scripts/python-wheel/README.md). Once installed, the malmo Python module can be used to download source and examples and start up Minecraft with the Malmo game mod. 
+This repository contains various improvements to the Malmo framework. This mainly involves the launcher to automatically handle the Malmo instances instead of the need to run them manually. We also updated the ```malmoenv``` python package to facilitate working with malmo. We also got some guides and examples to show how to work with Malmo in both single and multi-agent setups. The examples use RLlib, which provides a wide range of state-of-the-art Reinforcement Learning algorithms. In the examples we have created wrappers to make Malmo compatible to RLlib, but based on these examples it is easy to adapt Malmo to other frameworks.
 
-Alternatively, a pre-built version of Malmo can be installed as follows:
+We provide some examples with explanations in the form of IPython notebooks that are ready to run after getting the dependencies installed.
+The notebooks go through the basics and we recommend to check them in the following order as they explain different ideas along the way:
+- 1, [Random Player in Malmo](notebooks/random_agent_malmo.ipynb) - Explains the setup and shows how to interact with the environment using random action sampling.
+- 2, [RLlib single agent training](notebooks/rllib_single_agent.ipynb) - Expands the random agent example with using RLlib to handle RL experiments.
+- 3, [RLlib multi-agent training](notebooks/rllib_multi_agent.ipynb) - A multi-agent version of the previous example.
+- 4, [RLlib checkpoint restoration](notebooks/rllib_restore_checkpoint.ipynb) - load checkpoint and evaluate the trained agent with capturing the agent's observations as a GIF. Can use this method to continue a training using ray's tune API.
+- 5, [RLlib checkpoint evaluation](notebooks/rllib_evaluate_checkpoint.ipynb) - load a checkpoint and manually evaluate it by extracting the agent's policy.
 
-1. [Download the latest *pre-built* version, for Windows, Linux or MacOSX.](https://github.com/Microsoft/malmo/releases)   
-      NOTE: This is _not_ the same as downloading a zip of the source from Github. _Doing this **will not work** unless you are planning to build the source code yourself (which is a lengthier process). If you get errors along the lines of "`ImportError: No module named MalmoPython`" it will probably be because you have made this mistake._
+We also provided non-notebook versions of these guides, which contain less explanation, but might be more reusable in your projects.
 
-2. Install the dependencies for your OS: [Windows](doc/install_windows.md), [Linux](doc/install_linux.md), [MacOSX](doc/install_macosx.md).
-
-3. Launch Minecraft with our Mod installed. Instructions below.
-
-4. Launch one of our sample agents, as Python, C#, C++ or Java. Instructions below.
-
-5. Follow the [Tutorial](https://github.com/Microsoft/malmo/blob/master/Malmo/samples/Python_examples/Tutorial.pdf) 
+----
+## Baseline results
+**PPO Single-agent mobchase**
 
-6. Explore the [Documentation](http://microsoft.github.io/malmo/). This is also available in the readme.html in the release zip.
+We trained PPO in single and multi-agent setups on the Mob chases tasks. The tensorboard learning curves are shown below from a run of 1 million agent-env interactions. The checkpoint is available in the ```examples/checkpoints/``` package.
+![Single Agent PPO learning curves](imgs/PPO_single_agent_mobchase.png)
 
-7. Read the [Blog](http://microsoft.github.io/malmo/blog) for more information.
+![Evaluation](imgs/PPO_single_agent_mobchase.gif)
 
-If you want to build from source then see the build instructions for your OS: [Windows](doc/build_windows.md), [Linux](doc/build_linux.md), [MacOSX](doc/build_macosx.md).
+**PPO Multi-agent mobchase**
 
 ----
 
@@ -84,65 +77,6 @@ a machine for network use these TCP ports should be open.
 
 ----
 
-## Launch an agent: ##
-
-#### Running a Python agent: ####
-
-```
-cd Python_Examples
-python3 run_mission.py
-``` 
-
-#### Running a C++ agent: ####
-
-`cd Cpp_Examples`
-
-To run the pre-built sample:
-
-`run_mission` (on Windows)  
-`./run_mission` (on Linux or MacOSX)
-
-To build the sample yourself:
-
-`cmake .`  
-`cmake --build .`  
-`./run_mission` (on Linux or MacOSX)  
-`Debug\run_mission.exe` (on Windows)
-
-#### Running a C# agent: ####
-
-To run the pre-built sample (on Windows):
-
-`cd CSharp_Examples`  
-`CSharpExamples_RunMission.exe`
-
-To build the sample yourself, open CSharp_Examples/RunMission.csproj in Visual Studio.
-
-Or from the command-line:
-
-`cd CSharp_Examples`
-
-Then, on Windows:  
-```
-msbuild RunMission.csproj /p:Platform=x64
-bin\x64\Debug\CSharpExamples_RunMission.exe
-```
-
-#### Running a Java agent: ####
-
-`cd Java_Examples`  
-`java -cp MalmoJavaJar.jar:JavaExamples_run_mission.jar -Djava.library.path=. JavaExamples_run_mission` (on Linux or MacOSX)  
-`java -cp MalmoJavaJar.jar;JavaExamples_run_mission.jar -Djava.library.path=. JavaExamples_run_mission` (on Windows)
-
-#### Running an Atari agent: (Linux only) ####
-
-```
-cd Python_Examples
-python3 ALE_HAC.py
-```
-
-----
-
 # Citations #
 
 Please cite Malmo as:
diff --git a/examples/README.md b/examples/README.md
index df398d837..9ddae3c2b 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,32 +1 @@
 # Malmo
-
-This repository contains various improvements to the Malmo framework. This mainly involves the launcher to automatically handle the Malmo instances instead of the need to run them manually. We also updated the ```malmoenv``` python package to facilitate working with malmo. We also got some guides and examples to show how to work with Malmo in both single and multi-agent setups. The examples use RLlib, which provides a wide range of state-of-the-art Reinforcement Learning algorithms. In the examples we have created wrappers to make Malmo compatible to RLlib, but based on these examples it is easy to adapt Malmo to other frameworks.
-
-We provide some examples with explanations in the form of IPython notebooks that are ready to run after getting the dependencies installed.
-The notebooks go through the basics and we recommend to check them in the following order as they explain different ideas along the way:
-- 1 [Random Player in Malmo](notebooks/random_agent_malmo.ipynb) - Explains the setup and shows how to interact with the environment using random action sampling.
-- 2 [RLlib single agent training](notebooks/rllib_single_agent.ipynb) - Expands the random agent example with using RLlib to handle RL experiments.
-- 3 [RLlib multi-agent training](notebooks/rllib_multi_agent.ipynb) - A multi-agent version of the previous example.
-- 4 [RLlib checkpoint restoration](notebooks/rllib_restore_checkpoint.ipynb) - load checkpoint and evaluate the trained agent with capturing the agent's observations as a GIF. Can use this method to continue a training using ray's tune API.
-- 5 [RLlib checkpoint evaluation](notebooks/rllib_evaluate_checkpoint.ipynb) - load a checkpoint and manually evaluate it by extracting the agent's policy.
-
-We also provided non-notebook versions of these guides, which contain less explanation, but might be more reusable in your projects.
-
-## Setup process 
-- 1, clone malmo (```git clone https://github.com/martinballa/malmo```)
-- 2, install java 8 and python 3. 
-- 3, ```cd malmo/``` and install malmo using pip ```pip install -e MalmoEnv/``` 
-- 4, Test if Malmo works correctly by running the examples in the ```examples/``` directory.
-- 4*, Some examples requires ```ray``` (with ```tune``` and ```rllib```) installed and ```ffmpeg-python```. 
-- +1, to run malmo headless on a linux headless server you should install xvfb ```sudo apt-get install -y xvfb```
-
-*Note:* Minecraft uses gradle to build the project and it's not compatible with newer versions of Java, so make sure that you use java version 8 for the build and make sure that $JAVA_HOME is pointing to the correct version.
-If you have any issues with running Malmo check the [FAQ](FAQ.md) as it might cover the issues.
-
-## Baseline results
-**Single-agent PPO**
-
-We trained PPO in single and multi-agent setups on the Mob chases tasks. The tensorboard learning curves are shown below from a run of 1 million agent-env interactions. The checkpoint is available in the ```examples/checkpoints/``` package.
-![Single Agent PPO learning curves](imgs/PPO_single_agent_mobchase.png)
-
-Multi-agent PPO
\ No newline at end of file
diff --git a/examples/checkpoints/PPO_malmo_single_agent/checkpoint_209/checkpoint-209 b/examples/checkpoints/PPO_malmo_single_agent/checkpoint_209/checkpoint-209
index dcd7c45bd89c35d7bdcace620d6e364ecdfd797f..271d8c836ef82a2efd1d52930905d020a24287f2 100644
GIT binary patch
delta 882
zcmWN=2XqJq07cP!n}{vM-ZK)3y(RVt5-UL#Nr({=(W;v5v%k?j(wZGcCvA14Xl*Us
z(;ls@MbVa0_nf<D+xEbt56(ptpdf`POc9DwjN+6akdg#Ziqe#!EafOq1u7CuB`Om_
z6{=E=>eQenwWv)U>QayTG@v1kXiO8D5=t||2&Xv_MACwmw4ya_XiGcV(}9k3qBC9S
zN)*v_qdPt5NiTZShraZqKQRnoAcKe{j=>CJD8m>|JPC|oB%??qiDXhpWi(?*Bb^M!
zGLB5fGl7XrB8$mPA)6eglFKw6;$f!q2s3z;napA~bC}CK9^-LfJ_}gLA{MiRrI=+b
zX9X*Hf+u;3r+J2Fd5-6Kffvc+C0=F~t69TZ*0G*fc$L?9oegYc6PtO1H+hS<d53q|
z!dBkneLi3tAMz0&^9lKU%64|Jlh62^UF>ELd)dc+4seh!_>w~$<}1GD8@}Z`zUK#i
z<R^aS7k=dkM>)oEPH>V_oaPK?`HkN>$9XPrkxN|W3Rn4qKe@(rZtxd3xy5b%<{$p$
z4tM#Fd)!|nOD-T6lncp)<sx!XxtLsBE+GfXCFLNwlw4XaBbSxS$>rq=az!~<t|V8M
zL*y!QRk@m6U9KV5lxxYg<vMa*xt?5KZXh?58_A93CUR3bRBk4R$>DNyIYN$<TgWZt
zR&r~(joemlC%2b7$Q|WQa%Z`V+*OW}qvdXLce#h$Q|=}Amix$k<$iL1IYu5J50nSV
zv2vU|SRNt|m50g0<#;(k9wCpEN6CqDlAJ83$f@#Zd5oMUr^^}gSb3bBDUX*Y$P?vB
ma+dNZYgb-Kd{96@KyFTIW>$Jm^3>ea-1O|IS=rIk!u|)hQfMat

delta 859
zcmWN=2UHLQ07cRB847|687hh>f+EX-;6NE}QB;sURU9Zn%l5E(mDO*j4cdE8jWj}w
zY?rpv%+W4wm$t_}=k9Co4BPqt^@soh2_l$obSH#RdJsl9J?TYn`p}mM`VmPK(ZtZ7
z0SqLTK@292A;dG3VI(k|M3NZ6NRk;v3aO-#P6neH!&t_V$#}BJW&#tL#AI@q!c=m}
zBcB4MF`XIAWEQiT!(0lP$9#%dz(N*LObLrArHpbasHBQ&YN(}-dKy^5QkK!ka#paC
zRWz}hH8iu97Fs!u<2iv7If-?g%qg78X`Id(oXJ_7%{i>+T+ZWs-~uk>A~tX_mvAZO
zGA`!|uH-7N<{GYLBb&I6>$!m&Y2zkt<`!<{Hg4w*?&L1+<{s|lKJMoM9^@e&<`Ev{
zF&^g$o@6syc#3wOW-Hrxh7O*klkM!_Ii9DB7ud;*yu{1A!mGT->%766>|!@>@iy=9
zF7NR^AMhc2*vm)k<74)7fP)<36F%iLKIaR*<SV}B8xC`XZ~2bz`GFt#iJ$p}UpdM#
ze&cuk;7|VIZ~o!m4YK3_IZzIggXL~=cR553m3zoxa=6@6?j`q@`^bIe2)UmeDM!iC
za*W(x9v}~tW932eU^z}6BFD=^<zaGyJX}tcljITdNI6*^C8x-#a+;hjXUL=FG4fb>
zoSZ3-m$T$-d4fDqo+M9}bL1)VR5@49lk?>Qd73<3o*~baXUVhWIr3b&P@X5xmy6^D
z@<Mr$Tr8K!i{(<eOfHuz<Vv|pu9j=$TDeZHmmB0I@=|%3+$b-XSI8^nRdSQOT3#bJ
Q%WLHpb+oFhEnr>xf4lKuQ~&?~

diff --git a/examples/notebooks/rllib_evaluate_checkpoint.ipynb b/examples/notebooks/rllib_evaluate_checkpoint.ipynb
new file mode 100644
index 000000000..b42df5838
--- /dev/null
+++ b/examples/notebooks/rllib_evaluate_checkpoint.ipynb
@@ -0,0 +1,272 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "# Single Agent evaluation using Malmo\n",
+    "This guide uses a trained checkpoint from RLlib and evaluates it for a few episodes on the same level it was used for training. We use a PPO checkpoint here, in case of using a different algorithm the other algorithm's trainer should be loaded.\n",
+    "\n",
+    "We do not use the screen capturer in this guide, but can add it as done in the other evaluation guide."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# imports\n",
+    "import gym, os, sys, argparse\n",
+    "sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))\n",
+    "from pathlib import Path\n",
+    "import pickle\n",
+    "import numpy as np\n",
+    "\n",
+    "# malmoenv imports\n",
+    "import malmoenv\n",
+    "from malmoenv.utils.launcher import launch_minecraft\n",
+    "from malmoenv.utils.wrappers import DownsampleObs\n",
+    "\n",
+    "from examples.utils.utils import update_checkpoint_for_rollout, get_config\n",
+    "\n",
+    "# ray dependencies\n",
+    "import ray\n",
+    "from ray import tune\n",
+    "from ray.rllib.agents.ppo import PPOTrainer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Define some constants.\n",
+    "When training with ray's tune it might create multiple checkpoints, so we specifically have to select the one we would like to use."
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "EPISODES = 10\n",
+    "ENV_NAME = \"malmo\"\n",
+    "MISSION_XML = os.path.realpath('missions/mobchase_single_agent.xml')\n",
+    "xml = Path(MISSION_XML).read_text()\n",
+    "\n",
+    "env_config = {\n",
+    "    \"xml\": xml,\n",
+    "    \"port\": 8999, # first port's number\n",
+    "}\n",
+    "\n",
+    "CHECKPOINT_FREQ = 100     # in terms of number of algorithm iterations\n",
+    "LOG_DIR = \"results/\"       # creates a new directory and puts results there\n",
+    "\n",
+    "NUM_WORKERS = 1\n",
+    "NUM_GPUS = 0\n",
+    "TOTAL_STEPS = int(1e6)\n",
+    "launch_script = \"./launchClient_quiet.sh\"\n",
+    "\n",
+    "checkpoint_file = \"examples/checkpoints/PPO_malmo_single_agent/checkpoint_209/checkpoint-209\"\n",
+    "update_checkpoint_for_rollout(checkpoint_file)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Env creator function. This is the part where the ScreenCapturer can be utilised.\n",
+    "Note that for this sort of checkpoint restoration we have to register the environment."
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "def create_env(config):\n",
+    "    env = malmoenv.make()\n",
+    "    env.init(config.xml, config.port, reshape=True)\n",
+    "    env.reward_range = (-float('inf'), float('inf'))\n",
+    "\n",
+    "    env = DownsampleObs(env, shape=tuple((84, 84)))\n",
+    "    return env\n",
+    "\n",
+    "tune.register_env(ENV_NAME, create_env)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "The next step is to load the original config and overwrite some parameters. We want to get the same setting as we did for the training, but we don't necessarily want to use the same hardware for evaluation. Let's say we trained an agent on a Server with multiple CPUs and a GPU, but we would like to evaluate the checkpoint locally using a single env and without a GPU. To do this we can just overwrite these entries in the config. We can also disable the exploration as shown below. Depending on the chosen algorithm there are more configurations that might be useful for evaluation see the RLlib documentation for more details."
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "config = get_config(checkpoint_file)\n",
+    "config[\"num_workers\"] = NUM_WORKERS\n",
+    "config[\"num_gpus\"] = NUM_GPUS\n",
+    "config[\"explore\"] = False"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "# Load agent\n",
+    "ray.init()\n",
+    "trainer = PPOTrainer(config)\n",
+    "trainer.restore(checkpoint_file)\n",
+    "policy = trainer.get_policy()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "As in the previous examples, the next step is to start the Malmo instances. In this version we manually create the environment, which gives us more flexibility over the evaluation."
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "GAME_INSTANCE_PORTS = [env_config.port + 1 + i for i in range(NUM_WORKERS)]\n",
+    "instances = launch_minecraft(GAME_INSTANCE_PORTS, launch_script=launch_script)\n",
+    "\n",
+    "env = create_env(config)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "In this setup we have more flexibility over the evaluation.\n",
+    "RLlib expects 4 dimensions for input [Batch, Width, Height, Channels], to satisfy this requirement we expand the state's dimension.\n",
+    "The ```action``` variable returned by the ```policy.compute_actions``` does not only return the best action but various algorithm specific output, such as value function, Q-values or action distributions.\n",
+    "The evaluation loop below is a simple example, but it can be used to extract more information about malmo. The ```info``` output returns various symbolic information about the current state."
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "# Custom evaluation loop\n",
+    "print(f\"running evaluations for {EPISODES} episodes\")\n",
+    "for ep in range(EPISODES):\n",
+    "    state = env.reset()\n",
+    "    done = False\n",
+    "    ep_length = 0\n",
+    "    ep_reward = 0\n",
+    "    while not done:\n",
+    "        # actions returns multiple algorithm specific entries such as value, action distribution...\n",
+    "        actions = policy.compute_actions(np.expand_dims(state, 0))\n",
+    "        state, reward, done, info = env.step(actions[0][0])\n",
+    "        ep_length += 1\n",
+    "        ep_reward += reward\n",
+    "        if done:\n",
+    "            print(f\"Episode #{ep} finished in {ep_length} steps with reward {ep_reward}\")\n",
+    "            ep_length = 0\n",
+    "            ep_reward = 0"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
\ No newline at end of file
diff --git a/examples/notebooks/rllib_restore_checkpoint.ipynb b/examples/notebooks/rllib_restore_checkpoint.ipynb
index ad80197f8..ffee17fd0 100644
--- a/examples/notebooks/rllib_restore_checkpoint.ipynb
+++ b/examples/notebooks/rllib_restore_checkpoint.ipynb
@@ -70,7 +70,7 @@
     "TOTAL_STEPS = int(1e6)\n",
     "launch_script = \"./launchClient_quiet.sh\"\n",
     "\n",
-    "checkpoint_file = \"/home/mballa/data/PPO/PPO_malmo_5ef58_00000_0_2020-11-15_00-34-50/checkpoint_80/checkpoint-80\"\n",
+    "checkpoint_file = \"examples/checkpoints/PPO_malmo_single_agent/checkpoint_209/checkpoint-209\"\n",
     "update_checkpoint_for_rollout(checkpoint_file)"
    ],
    "metadata": {
diff --git a/examples/single_agent_evaluation.py b/examples/single_agent_evaluation.py
index 2ebc52e07..2d30c96dc 100644
--- a/examples/single_agent_evaluation.py
+++ b/examples/single_agent_evaluation.py
@@ -29,10 +29,10 @@ def create_env(config):
 
 
 if __name__ == "__main__":
-    checkpoint_file = "/home/mballa/data/PPO/PPO_malmo_5ef58_00000_0_2020-11-15_00-34-50/checkpoint_80/checkpoint-80"
+    checkpoint_file = "checkpoints/PPO_malmo_single_agent/checkpoint_209/checkpoint-209"
     update_checkpoint_for_rollout(checkpoint_file)
     parser = argparse.ArgumentParser(description='malmoenv arguments')
-    parser.add_argument('--mission', type=str, default='../missions/mobchase_single_agent.xml',
+    parser.add_argument('--mission', type=str, default='../MalmoEnv/missions/mobchase_single_agent.xml',
                         help='the mission xml')
     parser.add_argument('--port', type=int, default=8999, help='the first mission server port')
     parser.add_argument('--server', type=str, default='127.0.0.1', help='the mission server DNS or IP address')
diff --git a/examples/single_agent_restoration.py b/examples/single_agent_restoration.py
index 370231c55..ce798148a 100644
--- a/examples/single_agent_restoration.py
+++ b/examples/single_agent_restoration.py
@@ -29,10 +29,10 @@ def create_env(config):
 
 
 if __name__ == "__main__":
-    checkpoint_file = "/home/mballa/data/PPO/PPO_malmo_5ef58_00000_0_2020-11-15_00-34-50/checkpoint_80/checkpoint-80"
+    checkpoint_file = "checkpoints/PPO_malmo_single_agent/checkpoint_209/checkpoint-209"
     update_checkpoint_for_rollout(checkpoint_file)
     parser = argparse.ArgumentParser(description='malmoenv arguments')
-    parser.add_argument('--mission', type=str, default='missions/mobchase_single_agent.xml',
+    parser.add_argument('--mission', type=str, default='../MalmoEnv/missions/mobchase_single_agent.xml',
                         help='the mission xml')
     parser.add_argument('--port', type=int, default=8999, help='the first mission server port')
     parser.add_argument('--server', type=str, default='127.0.0.1', help='the mission server DNS or IP address')