diff --git a/jetbot/apps/stats.py b/jetbot/apps/stats.py index d1ae2bab..e018bef7 100644 --- a/jetbot/apps/stats.py +++ b/jetbot/apps/stats.py @@ -32,8 +32,16 @@ # 128x32 display with hardware I2C: disp = Adafruit_SSD1306.SSD1306_128_32(rst=None, i2c_bus=1, gpio=1) # setting gpio to 1 is hack to avoid platform detection -# Initialize library. -disp.begin() +while True: + + try: + # Try to connect to the OLED display module via I2C. + disp.begin() + except OSError as err: + print("OS error: {0}".format(err)) + time.sleep(10) + else: + break # Clear display. disp.clear() diff --git a/jetbot/camera.py b/jetbot/camera.py index dd3b5d0a..492d646c 100644 --- a/jetbot/camera.py +++ b/jetbot/camera.py @@ -14,8 +14,8 @@ class Camera(SingletonConfigurable): width = traitlets.Integer(default_value=224).tag(config=True) height = traitlets.Integer(default_value=224).tag(config=True) fps = traitlets.Integer(default_value=21).tag(config=True) - capture_width = traitlets.Integer(default_value=3280).tag(config=True) - capture_height = traitlets.Integer(default_value=2464).tag(config=True) + capture_width = traitlets.Integer(default_value=816).tag(config=True) + capture_height = traitlets.Integer(default_value=616).tag(config=True) def __init__(self, *args, **kwargs): self.value = np.empty((self.height, self.width, 3), dtype=np.uint8) diff --git a/jetbot/utils/utils.py b/jetbot/utils/utils.py index 51cf0933..70806af5 100644 --- a/jetbot/utils/utils.py +++ b/jetbot/utils/utils.py @@ -25,11 +25,24 @@ def platform_is_nano(): def get_ip_address(interface): - if get_network_interface_state(interface) == 'down': + state = get_network_interface_state(interface) + if state == 'down' or state == None: return None + cmd = "ifconfig %s | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1'" % interface return subprocess.check_output(cmd, shell=True).decode('ascii')[:-1] def get_network_interface_state(interface): - return subprocess.check_output('cat /sys/class/net/%s/operstate' % interface, shell=True).decode('ascii')[:-1] + if not os.path.exists('/sys/class/net/%s/operstate' % interface): + #print("%s file does NOT exist" % interface) + return None + + try: + status = subprocess.check_output('cat /sys/class/net/%s/operstate' % interface, shell=True).decode('ascii')[:-1] + except Exception as err: + print("Exception: {0}".format(err)) + return None + else: + return status + diff --git a/notebooks/collision_avoidance/live_demo_resnet18.ipynb b/notebooks/collision_avoidance/live_demo_resnet18.ipynb new file mode 100644 index 00000000..e23e29fe --- /dev/null +++ b/notebooks/collision_avoidance/live_demo_resnet18.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Collision Avoidance - Live Demo (Resnet18)\n", + "\n", + "In this notebook we'll use the model we trained to detect whether the robot is ``free`` or ``blocked`` to enable a collision avoidance behavior on the robot. \n", + "\n", + "## Load the trained model\n", + "\n", + "We'll assumed that you've already downloaded the ``best_model.pth`` to your workstation as instructed in the training notebook. Now, you should upload this model into this notebook's\n", + "directory by using the Jupyter Lab upload tool. Once that's finished there should be a file named ``best_model.pth`` in this notebook's directory. \n", + "\n", + "> Please make sure the file has uploaded fully before calling the next cell\n", + "\n", + "Execute the code below to initialize the PyTorch model. This should look very familiar from the training notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torchvision\n", + "\n", + "model = torchvision.models.resnet18(pretrained=False)\n", + "model.fc = torch.nn.Linear(512, 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, load the trained weights from the ``best_model_resnet18.pth`` file that you uploaded" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.load_state_dict(torch.load('best_model_resnet18.pth'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Currently, the model weights are located on the CPU memory execute the code below to transfer to the GPU device." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device('cuda')\n", + "model = model.to(device)\n", + "model = model.eval().half()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create the preprocessing function\n", + "\n", + "We have now loaded our model, but there's a slight issue. The format that we trained our model doesnt *exactly* match the format of the camera. To do that, \n", + "we need to do some *preprocessing*. This involves the following steps\n", + "\n", + "1. Convert from HWC layout to CHW layout\n", + "2. Normalize using same parameters as we did during training (our camera provides values in [0, 255] range and training loaded images in [0, 1] range so we need to scale by 255.0\n", + "3. Transfer the data from CPU memory to GPU memory\n", + "4. Add a batch dimension" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torchvision.transforms as transforms\n", + "import torch.nn.functional as F\n", + "import cv2\n", + "import PIL.Image\n", + "import numpy as np\n", + "\n", + "mean = torch.Tensor([0.485, 0.456, 0.406]).cuda().half()\n", + "std = torch.Tensor([0.229, 0.224, 0.225]).cuda().half()\n", + "\n", + "normalize = torchvision.transforms.Normalize(mean, std)\n", + "\n", + "def preprocess(image):\n", + " image = PIL.Image.fromarray(image)\n", + " image = transforms.functional.to_tensor(image).to(device).half()\n", + " image.sub_(mean[:, None, None]).div_(std[:, None, None])\n", + " return image[None, ...]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great! We've now defined our pre-processing function which can convert images from the camera format to the neural network input format.\n", + "\n", + "Now, let's start and display our camera. You should be pretty familiar with this by now. We'll also create a slider that will display the\n", + "probability that the robot is blocked." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import traitlets\n", + "from IPython.display import display\n", + "import ipywidgets.widgets as widgets\n", + "from jetbot import Camera, bgr8_to_jpeg\n", + "\n", + "camera = Camera.instance(width=224, height=224)\n", + "image = widgets.Image(format='jpeg', width=224, height=224)\n", + "blocked_slider = widgets.FloatSlider(description='blocked', min=0.0, max=1.0, orientation='vertical')\n", + "\n", + "camera_link = traitlets.dlink((camera, 'value'), (image, 'value'), transform=bgr8_to_jpeg)\n", + "\n", + "display(widgets.HBox([image, blocked_slider]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll also create our robot instance which we'll need to drive the motors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from jetbot import Robot\n", + "\n", + "robot = Robot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we'll create a function that will get called whenever the camera's value changes. This function will do the following steps\n", + "\n", + "1. Pre-process the camera image\n", + "2. Execute the neural network\n", + "3. While the neural network output indicates we're blocked, we'll turn left, otherwise we go forward." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import torch.nn.functional as F\n", + "import time\n", + "\n", + "def update(change):\n", + " global blocked_slider, robot\n", + " x = change['new'] \n", + " x = preprocess(x)\n", + " y = model(x)\n", + " \n", + " # we apply the `softmax` function to normalize the output vector so it sums to 1 (which makes it a probability distribution)\n", + " y = F.softmax(y, dim=1)\n", + " \n", + " prob_blocked = float(y.flatten()[0])\n", + " \n", + " blocked_slider.value = prob_blocked\n", + " \n", + " if prob_blocked < 0.5:\n", + " robot.forward(0.2)\n", + " else:\n", + " robot.left(0.2)\n", + " \n", + " time.sleep(0.001)\n", + " \n", + "update({'new': camera.value}) # we call the function once to intialize" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cool! We've created our neural network execution function, but now we need to attach it to the camera for processing. \n", + "\n", + "We accomplish that with the ``observe`` function.\n", + "\n", + "> WARNING: This code will move the robot!! Please make sure your robot has clearance. The collision avoidance should work, but the neural\n", + "> network is only as good as the data it's trained on!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera.observe(update, names='value') # this attaches the 'update' function to the 'value' traitlet of our camera" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Awesome! If your robot is plugged in it should now be generating new commands with each new camera frame. Perhaps start by placing your robot on the ground and seeing what it does when it reaches an obstacle.\n", + "\n", + "If you want to stop this behavior, you can unattach this callback by executing the code below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "camera.unobserve(update, names='value')\n", + "\n", + "time.sleep(0.1) # add a small sleep to make sure frames have finished processing\n", + "\n", + "robot.stop()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Perhaps you want the robot to run without streaming video to the browser. You can unlink the camera as below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera_link.unlink() # don't stream to browser (will still run camera)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To continue streaming call the following." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera_link.link() # stream to browser (wont run camera)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Conclusion\n", + "\n", + "That's it for this live demo! Hopefully you had some fun and your robot avoided collisions intelligently! \n", + "\n", + "If your robot wasn't avoiding collisions very well, try to spot where it fails. The beauty is that we can collect more data for these failure scenarios\n", + "and the robot should get even better :)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/collision_avoidance/live_demo_resnet18_build_trt.ipynb b/notebooks/collision_avoidance/live_demo_resnet18_build_trt.ipynb new file mode 100644 index 00000000..90979f99 --- /dev/null +++ b/notebooks/collision_avoidance/live_demo_resnet18_build_trt.ipynb @@ -0,0 +1,158 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Collision Avoidance - Build TensorRT model for live demo\n", + "\n", + "In this notebook we'll use the model we trained to detect whether the robot is ``free`` or ``blocked`` to enable a collision avoidance behavior on the robot. \n", + "\n", + "## Load the trained model\n", + "\n", + "We'll assumed that you've already downloaded the ``best_model.pth`` to your workstation as instructed in the training notebook. Now, you should upload this model into this notebook's\n", + "directory by using the Jupyter Lab upload tool. Once that's finished there should be a file named ``best_model.pth`` in this notebook's directory. \n", + "\n", + "> Please make sure the file has uploaded fully before calling the next cell\n", + "\n", + "Execute the code below to initialize the PyTorch model. This should look very familiar from the training notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torchvision\n", + "\n", + "model = torchvision.models.resnet18(pretrained=False)\n", + "model.fc = torch.nn.Linear(512, 2)\n", + "model = model.cuda().eval().half()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, load the trained weights from the ``best_model_resnet18.pth`` file that you uploaded" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.load_state_dict(torch.load('best_model_resnet18.pth'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Currently, the model weights are located on the CPU memory execute the code below to transfer to the GPU device." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device('cuda')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TensorRT" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> If your setup does not have `torch2trt` installed, you need to first install `torch2trt` by executing the following in the console.\n", + "```bash\n", + "cd $HOME\n", + "git clone https://github.com/NVIDIA-AI-IOT/torch2trt\n", + "cd torch2trt\n", + "sudo python3 setup.py install\n", + "```\n", + "\n", + "Convert and optimize the model using torch2trt for faster inference with TensorRT. Please see the torch2trt readme for more details.\n", + "\n", + "> This optimization process can take a couple minutes to complete." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from torch2trt import torch2trt\n", + "\n", + "data = torch.zeros((1, 3, 224, 224)).cuda().half()\n", + "\n", + "model_trt = torch2trt(model, [data], fp16_mode=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save the optimized model using the cell below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(model_trt.state_dict(), 'best_model_trt.pth')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next\n", + "\n", + "Open live_demo_resnet18_build_trt.ipynb to move JetBot with the TensorRT optimized model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/collision_avoidance/live_demo_resnet18_trt.ipynb b/notebooks/collision_avoidance/live_demo_resnet18_trt.ipynb new file mode 100644 index 00000000..2065c50a --- /dev/null +++ b/notebooks/collision_avoidance/live_demo_resnet18_trt.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Collision Avoidance - Live Demo (TensorRT)\n", + "\n", + "In this notebook we'll use the model we trained to detect whether the robot is ``free`` or ``blocked`` to enable a collision avoidance behavior on the robot. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TensorRT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "device = torch.device('cuda')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load the optimized model by executing the cell below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torch2trt import TRTModule\n", + "\n", + "model_trt = TRTModule()\n", + "model_trt.load_state_dict(torch.load('best_model_trt.pth'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create the preprocessing function\n", + "\n", + "We have now loaded our model, but there's a slight issue. The format that we trained our model doesnt *exactly* match the format of the camera. To do that, \n", + "we need to do some *preprocessing*. This involves the following steps\n", + "\n", + "1. Convert from HWC layout to CHW layout\n", + "2. Normalize using same parameters as we did during training (our camera provides values in [0, 255] range and training loaded images in [0, 1] range so we need to scale by 255.0\n", + "3. Transfer the data from CPU memory to GPU memory\n", + "4. Add a batch dimension" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torchvision.transforms as transforms\n", + "import torch.nn.functional as F\n", + "import cv2\n", + "import PIL.Image\n", + "import numpy as np\n", + "\n", + "mean = torch.Tensor([0.485, 0.456, 0.406]).cuda().half()\n", + "std = torch.Tensor([0.229, 0.224, 0.225]).cuda().half()\n", + "\n", + "normalize = torchvision.transforms.Normalize(mean, std)\n", + "\n", + "def preprocess(image):\n", + " image = PIL.Image.fromarray(image)\n", + " image = transforms.functional.to_tensor(image).to(device).half()\n", + " image.sub_(mean[:, None, None]).div_(std[:, None, None])\n", + " return image[None, ...]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great! We've now defined our pre-processing function which can convert images from the camera format to the neural network input format.\n", + "\n", + "Now, let's start and display our camera. You should be pretty familiar with this by now. We'll also create a slider that will display the\n", + "probability that the robot is blocked." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import traitlets\n", + "from IPython.display import display\n", + "import ipywidgets.widgets as widgets\n", + "from jetbot import Camera, bgr8_to_jpeg\n", + "\n", + "camera = Camera.instance(width=224, height=224)\n", + "image = widgets.Image(format='jpeg', width=224, height=224)\n", + "blocked_slider = widgets.FloatSlider(description='blocked', min=0.0, max=1.0, orientation='vertical')\n", + "\n", + "camera_link = traitlets.dlink((camera, 'value'), (image, 'value'), transform=bgr8_to_jpeg)\n", + "\n", + "display(widgets.HBox([image, blocked_slider]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll also create our robot instance which we'll need to drive the motors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from jetbot import Robot\n", + "\n", + "robot = Robot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we'll create a function that will get called whenever the camera's value changes. This function will do the following steps\n", + "\n", + "1. Pre-process the camera image\n", + "2. Execute the neural network\n", + "3. While the neural network output indicates we're blocked, we'll turn left, otherwise we go forward." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import torch.nn.functional as F\n", + "import time\n", + "\n", + "def update(change):\n", + " global blocked_slider, robot\n", + " x = change['new'] \n", + " x = preprocess(x)\n", + " y = model_trt(x)\n", + " #print(y)\n", + " \n", + " # we apply the `softmax` function to normalize the output vector so it sums to 1 (which makes it a probability distribution)\n", + " y = F.softmax(y, dim=1)\n", + " #print(y)\n", + " \n", + " prob_blocked = float(y.flatten()[0])\n", + " #print(prob_blocked)\n", + " \n", + " blocked_slider.value = prob_blocked\n", + " \n", + " if prob_blocked < 0.5:\n", + " robot.forward(0.2)\n", + " else:\n", + " robot.left(0.2)\n", + " \n", + " time.sleep(0.001)\n", + " \n", + "update({'new': camera.value}) # we call the function once to intialize" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cool! We've created our neural network execution function, but now we need to attach it to the camera for processing. \n", + "\n", + "We accomplish that with the ``observe`` function.\n", + "\n", + "> WARNING: This code will move the robot!! Please make sure your robot has clearance. The collision avoidance should work, but the neural\n", + "> network is only as good as the data it's trained on!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera.observe(update, names='value') # this attaches the 'update' function to the 'value' traitlet of our camera" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Awesome! If your robot is plugged in it should now be generating new commands with each new camera frame. Perhaps start by placing your robot on the ground and seeing what it does when it reaches an obstacle.\n", + "\n", + "If you want to stop this behavior, you can unattach this callback by executing the code below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "camera.unobserve(update, names='value')\n", + "\n", + "time.sleep(0.1) # add a small sleep to make sure frames have finished processing\n", + "\n", + "robot.stop()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Perhaps you want the robot to run without streaming video to the browser. You can unlink the camera as below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera_link.unlink() # don't stream to browser (will still run camera)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To continue streaming call the following." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera_link.link() # stream to browser (wont run camera)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Conclusion\n", + "\n", + "That's it for this live demo! Hopefully you had some fun and your robot avoided collisions intelligently! \n", + "\n", + "If your robot wasn't avoiding collisions very well, try to spot where it fails. The beauty is that we can collect more data for these failure scenarios\n", + "and the robot should get even better :)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/collision_avoidance/train_model.ipynb b/notebooks/collision_avoidance/train_model.ipynb index 83141963..a3fa8e55 100644 --- a/notebooks/collision_avoidance/train_model.ipynb +++ b/notebooks/collision_avoidance/train_model.ipynb @@ -128,16 +128,16 @@ "source": [ "train_loader = torch.utils.data.DataLoader(\n", " train_dataset,\n", - " batch_size=16,\n", + " batch_size=8,\n", " shuffle=True,\n", - " num_workers=4\n", + " num_workers=1\n", ")\n", "\n", "test_loader = torch.utils.data.DataLoader(\n", " test_dataset,\n", - " batch_size=16,\n", + " batch_size=8,\n", " shuffle=True,\n", - " num_workers=4\n", + " num_workers=1\n", ")" ] }, @@ -271,9 +271,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.9" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/notebooks/collision_avoidance/train_model_plot.ipynb b/notebooks/collision_avoidance/train_model_plot.ipynb new file mode 100644 index 00000000..0ff77b4a --- /dev/null +++ b/notebooks/collision_avoidance/train_model_plot.ipynb @@ -0,0 +1,360 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Collision Avoidance - Train Model (with live graph)\n", + "\n", + "Welcome to this host side Jupyter Notebook! This should look familiar if you ran through the notebooks that run on the robot. In this notebook we'll train our image classifier to detect two classes\n", + "``free`` and ``blocked``, which we'll use for avoiding collisions. For this, we'll use a popular deep learning library *PyTorch*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.optim as optim\n", + "import torch.nn.functional as F\n", + "import torchvision\n", + "import torchvision.datasets as datasets\n", + "import torchvision.models as models\n", + "import torchvision.transforms as transforms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Upload and extract dataset\n", + "\n", + "Before you start, you should upload the ``dataset.zip`` file that you created in the ``data_collection.ipynb`` notebook on the robot.\n", + "\n", + "You should then extract this dataset by calling the command below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!unzip -q dataset.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should see a folder named ``dataset`` appear in the file browser." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create dataset instance" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we use the ``ImageFolder`` dataset class available with the ``torchvision.datasets`` package. We attach transforms from the ``torchvision.transforms`` package to prepare the data for training. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = datasets.ImageFolder(\n", + " 'dataset',\n", + " transforms.Compose([\n", + " transforms.ColorJitter(0.1, 0.1, 0.1, 0.1),\n", + " transforms.Resize((224, 224)),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n", + " ])\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Split dataset into train and test sets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we split the dataset into *training* and *test* sets. The test set will be used to verify the accuracy of the model we train." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_dataset, test_dataset = torch.utils.data.random_split(dataset, [len(dataset) - 50, 50])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create data loaders to load data in batches" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll create two ``DataLoader`` instances, which provide utilities for shuffling data, producing *batches* of images, and loading the samples in parallel with multiple workers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = torch.utils.data.DataLoader(\n", + " train_dataset,\n", + " batch_size=16,\n", + " shuffle=True,\n", + " num_workers=4\n", + ")\n", + "\n", + "test_loader = torch.utils.data.DataLoader(\n", + " test_dataset,\n", + " batch_size=16,\n", + " shuffle=True,\n", + " num_workers=4\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define the neural network\n", + "\n", + "Now, we define the neural network we'll be training. The *torchvision* package provides a collection of pre-trained models that we can use.\n", + "\n", + "In a process called *transfer learning*, we can repurpose a pre-trained model (trained on millions of images) for a new task that has possibly much less data available.\n", + "\n", + "Important features that were learned in the original training of the pre-trained model are re-usable for the new task. We'll use the ``alexnet`` model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = models.alexnet(pretrained=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ``alexnet`` model was originally trained for a dataset that had 1000 class labels, but our dataset only has two class labels! We'll replace\n", + "the final layer with a new, untrained layer that has only two outputs. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we transfer our model for execution on the GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device('cuda')\n", + "model = model.to(device)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualization utilities\n", + "\n", + "Exececute the cell below to enable live plotting. \n", + "\n", + "> You need to install bokeh (https://docs.bokeh.org/en/latest/docs/installation.html)\n", + "\n", + "```bash\n", + "sudo pip3 install bokeh\n", + "sudo jupyter labextension install @jupyter-widgets/jupyterlab-manager\n", + "sudo jupyter labextension install @bokeh/jupyter_bokeh\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from bokeh.io import push_notebook, show, output_notebook\n", + "from bokeh.layouts import row\n", + "from bokeh.plotting import figure\n", + "from bokeh.models import ColumnDataSource\n", + "from bokeh.models.tickers import SingleIntervalTicker\n", + "output_notebook()\n", + "\n", + "colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']\n", + "\n", + "p1 = figure(title=\"Loss\", x_axis_label=\"Epoch\", plot_height=300, plot_width=360)\n", + "p2 = figure(title=\"Accuracy\", x_axis_label=\"Epoch\", plot_height=300, plot_width=360)\n", + "\n", + "source1 = ColumnDataSource(data={'epochs': [], 'trainlosses': [], 'testlosses': [] })\n", + "source2 = ColumnDataSource(data={'epochs': [], 'train_accuracies': [], 'test_accuracies': []})\n", + "\n", + "#r = p1.multi_line(ys=['trainlosses', 'testlosses'], xs='epochs', color=colors, alpha=0.8, legend_label=['Training','Test'], source=source)\n", + "r1 = p1.line(x='epochs', y='trainlosses', line_width=2, color=colors[0], alpha=0.8, legend_label=\"Train\", source=source1)\n", + "r2 = p1.line(x='epochs', y='testlosses', line_width=2, color=colors[1], alpha=0.8, legend_label=\"Test\", source=source1)\n", + "\n", + "r3 = p2.line(x='epochs', y='train_accuracies', line_width=2, color=colors[0], alpha=0.8, legend_label=\"Train\", source=source2)\n", + "r4 = p2.line(x='epochs', y='test_accuracies', line_width=2, color=colors[1], alpha=0.8, legend_label=\"Test\", source=source2)\n", + "\n", + "p1.legend.location = \"top_right\"\n", + "p1.legend.click_policy=\"hide\"\n", + "\n", + "p2.legend.location = \"bottom_right\"\n", + "p2.legend.click_policy=\"hide\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train the neural network\n", + "\n", + "Using the code below we will train the neural network for 30 epochs, saving the best performing model after each epoch.\n", + "\n", + "> An epoch is a full run through our data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "NUM_EPOCHS = 30\n", + "BEST_MODEL_PATH = 'best_model.pth'\n", + "best_accuracy = 0.0\n", + "\n", + "optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", + "\n", + "handle = show(row(p1, p2), notebook_handle=True)\n", + "\n", + "for epoch in range(NUM_EPOCHS):\n", + " \n", + " train_loss = 0.0\n", + " train_error_count = 0.0\n", + " for images, labels in iter(train_loader):\n", + " images = images.to(device)\n", + " labels = labels.to(device)\n", + " optimizer.zero_grad()\n", + " outputs = model(images)\n", + " loss = F.cross_entropy(outputs, labels)\n", + " train_loss += loss\n", + " train_error_count += float(torch.sum(torch.abs(labels - outputs.argmax(1))))\n", + " loss.backward()\n", + " optimizer.step()\n", + " train_loss /= len(train_loader)\n", + " \n", + " test_loss = 0.0\n", + " test_error_count = 0.0\n", + " for images, labels in iter(test_loader):\n", + " images = images.to(device)\n", + " labels = labels.to(device)\n", + " outputs = model(images)\n", + " loss = F.cross_entropy(outputs, labels)\n", + " test_loss += loss\n", + " test_error_count += float(torch.sum(torch.abs(labels - outputs.argmax(1))))\n", + " test_loss /= len(test_loader)\n", + " \n", + " train_accuracy = 1.0 - float(train_error_count) / float(len(train_dataset))\n", + " test_accuracy = 1.0 - float(test_error_count) / float(len(test_dataset))\n", + " print('%d: %f, %f, %f, %f' % (epoch+1, train_loss, test_loss, train_accuracy, test_accuracy))\n", + " \n", + " \n", + " new_data1 = {'epochs': [epoch+1],\n", + " 'trainlosses': [float(train_loss)],\n", + " 'testlosses': [float(test_loss)] }\n", + " source1.stream(new_data1)\n", + " new_data2 = {'epochs': [epoch+1],\n", + " 'train_accuracies': [float(train_accuracy)],\n", + " 'test_accuracies': [float(test_accuracy)] }\n", + " source2.stream(new_data2)\n", + " push_notebook(handle=handle)\n", + " \n", + " if test_accuracy > best_accuracy:\n", + " torch.save(model.state_dict(), BEST_MODEL_PATH)\n", + " best_accuracy = test_accuracy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once that is finished, you should see a file ``best_model.pth`` in the Jupyter Lab file browser. Select ``Right click`` -> ``Download`` to download the model to your workstation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/collision_avoidance/train_model_resnet18.ipynb b/notebooks/collision_avoidance/train_model_resnet18.ipynb new file mode 100644 index 00000000..1b64793e --- /dev/null +++ b/notebooks/collision_avoidance/train_model_resnet18.ipynb @@ -0,0 +1,286 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Collision Avoidance - Train Model (ResNet18)\n", + "\n", + "Welcome to this host side Jupyter Notebook! This should look familiar if you ran through the notebooks that run on the robot. In this notebook we'll train our image classifier to detect two classes\n", + "``free`` and ``blocked``, which we'll use for avoiding collisions. For this, we'll use a popular deep learning library *PyTorch*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.optim as optim\n", + "import torch.nn.functional as F\n", + "import torchvision\n", + "import torchvision.datasets as datasets\n", + "import torchvision.models as models\n", + "import torchvision.transforms as transforms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Upload and extract dataset\n", + "\n", + "Before you start, you should upload the ``dataset.zip`` file that you created in the ``data_collection.ipynb`` notebook on the robot.\n", + "\n", + "You should then extract this dataset by calling the command below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!unzip -q dataset.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should see a folder named ``dataset`` appear in the file browser." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create dataset instance" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we use the ``ImageFolder`` dataset class available with the ``torchvision.datasets`` package. We attach transforms from the ``torchvision.transforms`` package to prepare the data for training. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = datasets.ImageFolder(\n", + " 'dataset',\n", + " transforms.Compose([\n", + " transforms.ColorJitter(0.1, 0.1, 0.1, 0.1),\n", + " transforms.Resize((224, 224)),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n", + " ])\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Split dataset into train and test sets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we split the dataset into *training* and *test* sets. The test set will be used to verify the accuracy of the model we train." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_dataset, test_dataset = torch.utils.data.random_split(dataset, [len(dataset) - 50, 50])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create data loaders to load data in batches" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll create two ``DataLoader`` instances, which provide utilities for shuffling data, producing *batches* of images, and loading the samples in parallel with multiple workers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = torch.utils.data.DataLoader(\n", + " train_dataset,\n", + " batch_size=16,\n", + " shuffle=True,\n", + " num_workers=4\n", + ")\n", + "\n", + "test_loader = torch.utils.data.DataLoader(\n", + " test_dataset,\n", + " batch_size=16,\n", + " shuffle=True,\n", + " num_workers=4\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define the neural network\n", + "\n", + "Now, we define the neural network we'll be training. The *torchvision* package provides a collection of pre-trained models that we can use.\n", + "\n", + "In a process called *transfer learning*, we can repurpose a pre-trained model (trained on millions of images) for a new task that has possibly much less data available.\n", + "\n", + "Important features that were learned in the original training of the pre-trained model are re-usable for the new task. We'll use the ``resnet18`` model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = models.resnet18(pretrained=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ``resnet18`` model was originally trained for a dataset that had 1000 class labels, but our dataset only has two class labels! We'll replace\n", + "the final layer with a new, untrained layer that has only two outputs. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.fc = torch.nn.Linear(512, 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we transfer our model for execution on the GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device('cuda')\n", + "model = model.to(device)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train the neural network\n", + "\n", + "Using the code below we will train the neural network for 30 epochs, saving the best performing model after each epoch.\n", + "\n", + "> An epoch is a full run through our data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "NUM_EPOCHS = 30\n", + "BEST_MODEL_PATH = 'best_model_resnet18.pth'\n", + "best_accuracy = 0.0\n", + "\n", + "optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", + "\n", + "for epoch in range(NUM_EPOCHS):\n", + " \n", + " for images, labels in iter(train_loader):\n", + " images = images.to(device)\n", + " labels = labels.to(device)\n", + " optimizer.zero_grad()\n", + " outputs = model(images)\n", + " loss = F.cross_entropy(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + " \n", + " test_error_count = 0.0\n", + " for images, labels in iter(test_loader):\n", + " images = images.to(device)\n", + " labels = labels.to(device)\n", + " outputs = model(images)\n", + " test_error_count += float(torch.sum(torch.abs(labels - outputs.argmax(1))))\n", + " \n", + " test_accuracy = 1.0 - float(test_error_count) / float(len(test_dataset))\n", + " print('%d: %f' % (epoch, test_accuracy))\n", + " if test_accuracy > best_accuracy:\n", + " torch.save(model.state_dict(), BEST_MODEL_PATH)\n", + " best_accuracy = test_accuracy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once that is finished, you should see a file ``best_model_resnet18.pth`` in the Jupyter Lab file browser. Select ``Right click`` -> ``Download`` to download the model to your workstation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/road_following/live_demo_build_trt.ipynb b/notebooks/road_following/live_demo_build_trt.ipynb new file mode 100644 index 00000000..981a64ce --- /dev/null +++ b/notebooks/road_following/live_demo_build_trt.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Road Following - Build TensorRT model for live demo" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we will optimize the model we trained using TensorRT." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the trained model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will assume that you have already downloaded ``best_steering_model_xy.pth`` to work station as instructed in \"train_model.ipynb\" notebook. Now, you should upload model file to JetBot in to this notebooks's directory. Once that's finished there should be a file named ``best_steering_model_xy.pth`` in this notebook's directory." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> Please make sure the file has uploaded fully before calling the next cell" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Execute the code below to initialize the PyTorch model. This should look very familiar from the training notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torchvision\n", + "import torch\n", + "\n", + "model = torchvision.models.resnet18(pretrained=False)\n", + "model.fc = torch.nn.Linear(512, 2)\n", + "model = model.cuda().eval().half()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, load the trained weights from the ``best_steering_model_xy.pth`` file that you uploaded." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.load_state_dict(torch.load('best_steering_model_xy.pth'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Currently, the model weights are located on the CPU memory execute the code below to transfer to the GPU device." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device('cuda')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## TensorRT" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> If your setup does not have `torch2trt` installed, you need to first install `torch2trt` by executing the following in the console.\n", + "```bash\n", + "cd $HOME\n", + "git clone https://github.com/NVIDIA-AI-IOT/torch2trt\n", + "cd torch2trt\n", + "sudo python3 setup.py install\n", + "```\n", + "\n", + "Convert and optimize the model using torch2trt for faster inference with TensorRT. Please see the [torch2trt](https://github.com/NVIDIA-AI-IOT/torch2trt) readme for more details.\n", + "\n", + "> This optimization process can take a couple minutes to complete." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from torch2trt import torch2trt\n", + "\n", + "data = torch.zeros((1, 3, 224, 224)).cuda().half()\n", + "\n", + "model_trt = torch2trt(model, [data], fp16_mode=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save the optimized model using the cell below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(model_trt.state_dict(), 'best_steering_model_xy_trt.pth')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next\n", + "Open live_demo_trt.ipynb to move JetBot with the TensorRT optimized model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/road_following/live_demo_trt.ipynb b/notebooks/road_following/live_demo_trt.ipynb new file mode 100644 index 00000000..5435454e --- /dev/null +++ b/notebooks/road_following/live_demo_trt.ipynb @@ -0,0 +1,341 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Road Following - Live demo (TensorRT)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we will use model we trained to move JetBot smoothly on track. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TensorRT" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "device = torch.device('cuda')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load the TRT optimized model by executing the cell below" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import torch\n", + "from torch2trt import TRTModule\n", + "\n", + "model_trt = TRTModule()\n", + "model_trt.load_state_dict(torch.load('best_steering_model_xy_trt.pth'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating the Pre-Processing Function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have now loaded our model, but there's a slight issue. The format that we trained our model doesnt exactly match the format of the camera. To do that, we need to do some preprocessing. This involves the following steps:\n", + "\n", + "1. Convert from HWC layout to CHW layout\n", + "2. Normalize using same parameters as we did during training (our camera provides values in [0, 255] range and training loaded images in [0, 1] range so we need to scale by 255.0\n", + "3. Transfer the data from CPU memory to GPU memory\n", + "4. Add a batch dimension" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import torchvision.transforms as transforms\n", + "import torch.nn.functional as F\n", + "import cv2\n", + "import PIL.Image\n", + "import numpy as np\n", + "\n", + "mean = torch.Tensor([0.485, 0.456, 0.406]).cuda().half()\n", + "std = torch.Tensor([0.229, 0.224, 0.225]).cuda().half()\n", + "\n", + "def preprocess(image):\n", + " image = PIL.Image.fromarray(image)\n", + " image = transforms.functional.to_tensor(image).to(device).half()\n", + " image.sub_(mean[:, None, None]).div_(std[:, None, None])\n", + " return image[None, ...]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Awesome! We've now defined our pre-processing function which can convert images from the camera format to the neural network input format.\n", + "\n", + "Now, let's start and display our camera. You should be pretty familiar with this by now. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import display\n", + "import ipywidgets\n", + "import traitlets\n", + "from jetbot import Camera, bgr8_to_jpeg\n", + "\n", + "camera = Camera()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "image_widget = ipywidgets.Image()\n", + "\n", + "traitlets.dlink((camera, 'value'), (image_widget, 'value'), transform=bgr8_to_jpeg)\n", + "\n", + "display(image_widget)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll also create our robot instance which we'll need to drive the motors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from jetbot import Robot\n", + "\n", + "robot = Robot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we will define sliders to control JetBot\n", + "> Note: We have initialize the slider values for best known configurations, however these might not work for your dataset, therefore please increase or decrease the sliders according to your setup and environment\n", + "\n", + "1. Speed Control (speed_gain_slider): To start your JetBot increase ``speed_gain_slider`` \n", + "2. Steering Gain Control (steering_gain_sloder): If you see JetBot is woblling, you need to reduce ``steering_gain_slider`` till it is smooth\n", + "3. Steering Bias control (steering_bias_slider): If you see JetBot is biased towards extreme right or extreme left side of the track, you should control this slider till JetBot start following line or track in the center. This accounts for motor biases as well as camera offsets\n", + "\n", + "> Note: You should play around above mentioned sliders with lower speed to get smooth JetBot road following behavior." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "speed_gain_slider = ipywidgets.FloatSlider(min=0.0, max=1.0, step=0.01, description='speed gain')\n", + "steering_gain_slider = ipywidgets.FloatSlider(min=0.0, max=1.0, step=0.01, value=0.2, description='steering gain')\n", + "steering_dgain_slider = ipywidgets.FloatSlider(min=0.0, max=0.5, step=0.001, value=0.0, description='steering kd')\n", + "steering_bias_slider = ipywidgets.FloatSlider(min=-0.3, max=0.3, step=0.01, value=0.0, description='steering bias')\n", + "\n", + "display(speed_gain_slider, steering_gain_slider, steering_dgain_slider, steering_bias_slider)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, let's display some sliders that will let us see what JetBot is thinking. The x and y sliders will display the predicted x, y values.\n", + "\n", + "The steering slider will display our estimated steering value. Please remember, this value isn't the actual angle of the target, but simply a value that is\n", + "nearly proportional. When the actual angle is ``0``, this will be zero, and it will increase / decrease with the actual angle. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_slider = ipywidgets.FloatSlider(min=-1.0, max=1.0, description='x')\n", + "y_slider = ipywidgets.FloatSlider(min=0, max=1.0, orientation='vertical', description='y')\n", + "steering_slider = ipywidgets.FloatSlider(min=-1.0, max=1.0, description='steering')\n", + "speed_slider = ipywidgets.FloatSlider(min=0, max=1.0, orientation='vertical', description='speed')\n", + "\n", + "display(ipywidgets.HBox([y_slider, speed_slider]))\n", + "display(x_slider, steering_slider)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we'll create a function that will get called whenever the camera's value changes. This function will do the following steps\n", + "\n", + "1. Pre-process the camera image\n", + "2. Execute the neural network\n", + "3. Compute the approximate steering value\n", + "4. Control the motors using proportional / derivative control (PD)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "angle = 0.0\n", + "angle_last = 0.0\n", + "\n", + "def execute(change):\n", + " global angle, angle_last\n", + " image = change['new']\n", + " xy = model_trt(preprocess(image)).detach().float().cpu().numpy().flatten()\n", + " x = xy[0]\n", + " y = (0.5 - xy[1]) / 2.0\n", + " \n", + " x_slider.value = x\n", + " y_slider.value = y\n", + " \n", + " speed_slider.value = speed_gain_slider.value\n", + " \n", + " angle = np.arctan2(x, y)\n", + " pid = angle * steering_gain_slider.value + (angle - angle_last) * steering_dgain_slider.value\n", + " angle_last = angle\n", + " \n", + " steering_slider.value = pid + steering_bias_slider.value\n", + " \n", + " robot.left_motor.value = max(min(speed_slider.value + steering_slider.value, 1.0), 0.0)\n", + " robot.right_motor.value = max(min(speed_slider.value - steering_slider.value, 1.0), 0.0)\n", + " \n", + "execute({'new': camera.value})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cool! We've created our neural network execution function, but now we need to attach it to the camera for processing.\n", + "\n", + "We accomplish that with the observe function." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ">WARNING: This code will move the robot!! Please make sure your robot has clearance and it is on Lego or Track you have collected data on. The road follower should work, but the neural network is only as good as the data it's trained on!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera.observe(execute, names='value')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Awesome! If your robot is plugged in it should now be generating new commands with each new camera frame. \n", + "\n", + "You can now place JetBot on Lego or Track you have collected data on and see whether it can follow track.\n", + "\n", + "If you want to stop this behavior, you can unattach this callback by executing the code below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "camera.unobserve(execute, names='value')\n", + "\n", + "time.sleep(0.1) # add a small sleep to make sure frames have finished processing\n", + "\n", + "robot.stop()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Conclusion\n", + "That's it for this live demo! Hopefully you had some fun seeing your JetBot moving smoothly on track follwing the road!!!\n", + "\n", + "If your JetBot wasn't following road very well, try to spot where it fails. The beauty is that we can collect more data for these failure scenarios and the JetBot should get even better :)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/road_following/train_model.ipynb b/notebooks/road_following/train_model.ipynb index d705fa99..424c6944 100644 --- a/notebooks/road_following/train_model.ipynb +++ b/notebooks/road_following/train_model.ipynb @@ -157,16 +157,16 @@ "source": [ "train_loader = torch.utils.data.DataLoader(\n", " train_dataset,\n", - " batch_size=16,\n", + " batch_size=8,\n", " shuffle=True,\n", - " num_workers=4\n", + " num_workers=1\n", ")\n", "\n", "test_loader = torch.utils.data.DataLoader(\n", " test_dataset,\n", - " batch_size=16,\n", + " batch_size=8,\n", " shuffle=True,\n", - " num_workers=4\n", + " num_workers=1\n", ")" ] }, diff --git a/scripts/create-sdcard-image-from-scratch.sh b/scripts/create-sdcard-image-from-scratch.sh index 3067cd80..da01e2e2 100755 --- a/scripts/create-sdcard-image-from-scratch.sh +++ b/scripts/create-sdcard-image-from-scratch.sh @@ -17,6 +17,14 @@ while true; do sudo -n true; sleep 120; kill -0 "$$" || exit; done 2>/dev/null & echo -e "\e[100m Enable i2c permissions \e[0m" sudo usermod -aG i2c $USER +# Make swapfile +cd +sudo fallocate -l 4G /var/swapfile +sudo chmod 600 /var/swapfile +sudo mkswap /var/swapfile +sudo swapon /var/swapfile +sudo bash -c 'echo "/var/swapfile swap swap defaults 0 0" >> /etc/fstab' + # Install pip and some python dependencies echo -e "\e[104m Install pip and some python dependencies \e[0m" sudo apt-get update @@ -54,6 +62,12 @@ cd vision #git checkout v0.4.0 sudo -H python3 setup.py install +# Install torch2trt +cd $HOME +git clone https://github.com/NVIDIA-AI-IOT/torch2trt +cd torch2trt +sudo python3 setup.py install + # Install traitlets (master, to support the unlink() method) echo -e "\e[48;5;172m Install traitlets \e[0m" #sudo python3 -m pip install git+https://github.com/ipython/traitlets@master @@ -84,6 +98,11 @@ sudo -H pip3 install -e . sudo jupyter labextension install js sudo jupyter lab build +# Install bokeh +sudo pip3 install bokeh +sudo jupyter labextension install @bokeh/jupyter_bokeh + + # install jetbot python module cd sudo apt install -y python3-smbus @@ -102,13 +121,10 @@ sudo mv jetbot_jupyter.service /etc/systemd/system/jetbot_jupyter.service sudo systemctl enable jetbot_jupyter sudo systemctl start jetbot_jupyter -# Make swapfile -cd -sudo fallocate -l 4G /var/swapfile -sudo chmod 600 /var/swapfile -sudo mkswap /var/swapfile -sudo swapon /var/swapfile -sudo bash -c 'echo "/var/swapfile swap swap defaults 0 0" >> /etc/fstab' +# Optimize the system configuration to create more headroom +sudo nvpmodel -m 0 +sudo systemctl set-default multi-user +sudo systemctl disable nvzramconfig.service # Copy JetBot notebooks to home directory cp -r ~/jetbot/notebooks ~/Notebooks