diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..f69f217 Binary files /dev/null and b/.DS_Store differ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e9822b5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +BSD 2-Clause License + +Copyright (c) 2020, Chenfeng Xu +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..3cd6384 --- /dev/null +++ b/README.md @@ -0,0 +1,160 @@ +# SqueezeSegV3: Spatially-Adaptive Convolution for Efficient Point-Cloud Segmentation. +By Chenfeng Xu, Bichen Wu, Zining Wang, Wei Zhan, Peter Vajda, Kurt Keutzer, and Masayoshi Tomizuka. + +This repository contains a Pytorch implementation of SqueezeSegV3, a state-of-the-art model for LiDAR segmentation. The framework of our SqueezeSegV3 can be found below: + +

+ +

+ + +Selected quantitative results of different approaches on the SemanticKITTI dataset (* means KNN post-processing): + +| Method | mIoU | car | bicycle| motorcycle | truck | person | bicyclist | motorcyclist | road | +| ---------------|------|-----|--------|------------|-------|--------|-----------|--------------|------| +| SqueezeSeg | 29.5 |68.8 |16.0 |4.1 |3.3 |12.9 |13.1 |0.9 |85.4 | +|SqueezeSegV2 | 39.7 | 81.8|18.5 | 17.9 |13.4 |20.1 |25.1 |3.9 |88.6 | +| RangeNet21 | 47.4 |85.4 |26.2 |26.5 |18.6 |31.8 |33.6 |4.0 |91.4 | +| RangeNet53 | 49.9 |86.4 |24.5 |32.7 | 25.5 |36.2 |33.6 |4.7 |**91.8**| +|SqueezeSegV3-21 |48.8 |84.6 |31.5 |32.4 | 11.3 |39.4 |36.1 |**21.3** | 90.8 | +|SqueezeSegV3-53 |52.9 |87.4 |35.2 |33.7 |29.0 | 41.8 |39.1 | 20.1 | **91.8**| +|SqueezeSegV3-21*|51.6 |89.4 |33.7 |34.9 |11.3 |42.6 |44.9 |21.2 |90.8| +|SqueezeSegV3-53*|**55.9**|**92.5**|**38.7**|**36.5**|**29.6**|**45.6**|**46.2** | 20.1 | 91.7 | + +Visualization results of SqueezeSegV3: +

+ + +

+ + +For more details, please refer to our paper: [SqueezeSegV3](https://arxiv.org/abs/2004.01803). The work is a follow-up work to [SqueezeSeg](https://github.com/BichenWuUCB/SqueezeSeg), [SqueezeSegV2](https://github.com/xuanyuzhou98/SqueezeSegV2) and [LATTE](https://github.com/bernwang/latte). If you find this work useful for your research, please consider citing: + +``` +@article{xu2020squeezesegv3, + title={SqueezeSegV3: Spatially-Adaptive Convolution for Efficient Point-Cloud Segmentation}, + author={Xu, Chenfeng and Wu, Bichen and Wang, Zining and Zhan, Wei and Vajda, Peter and Keutzer, Kurt and Tomizuka, Masayoshi}, + journal={arXiv preprint arXiv:2004.01803}, + year={2020} +} +``` + +Related works: + +``` + @inproceedings{wu2018squeezesegv2, + title={SqueezeSegV2: Improved Model Structure and Unsupervised Domain Adaptation + for Road-Object Segmentation from a LiDAR Point Cloud}, + author={Wu, Bichen and Zhou, Xuanyu and Zhao, Sicheng and Yue, Xiangyu and Keutzer, Kurt}, + booktitle={ICRA}, + year={2019}, + } + +@inproceedings{wu2017squeezeseg, + title={Squeezeseg: Convolutional neural nets with recurrent crf for real-time road-object segmentation from 3d lidar point cloud}, + author={Wu, Bichen and Wan, Alvin and Yue, Xiangyu and Keutzer, Kurt}, + booktitle={ICRA}, + year={2018} + } + +@inproceedings{wang2019latte, + title={LATTE: accelerating lidar point cloud annotation via sensor fusion, one-click annotation, and tracking}, + author={Wang, Bernie and Wu, Virginia and Wu, Bichen and Keutzer, Kurt}, + booktitle={2019 IEEE Intelligent Transportation Systems Conference (ITSC)}, + pages={265--272}, + year={2019}, + organization={IEEE} +} +``` + +## License +**SqueezeSegV3** is released under the BSD license (See [LICENSE](https://github.com/chenfengxu714/SqueezeSegV3/blob/master/LICENSE) for details). + +## Installation + +The instructions are tested on Ubuntu 16.04 with python 3.6 and Pytorch 1.1.0 with GPU support. + +* Clone the SqueezeSeg3 repository: + + +```shell +git clone https://github.com/chenfengxu714/SqueezeSegV3.git +``` + +* Use pip to install required Python packages: + + +```shell +pip install -r requirements.txt +``` +* The SemanticKITTI dataset can be download [here](http://semantic-kitti.org/dataset.html). + + +## Pre-trained Models + +The pre-trained SqueezezSegV3-21 and SqueezeSegV3-53 are avaliable at [Google Drive](https://drive.google.com/drive/folders/1oIZXnMxQPaEINlI11V3kn_kXdSTfUgm6?usp=sharing), you can directly download the two files. + + +## Demo + +We provide a demo script: +```shell +cd ./src/tasks/semantic/ +python demo.py -m /path/to/model +``` +You can find the prediction .label files and projected map in ./src/sample_output file, an example is shown below: +

+ +

+ + +## Inference +To infer the predictions for the entire dataset: + +```shell +cd ./src/tasks/semantic/ +python infer.py -d /path/to/dataset/ -l /path/for/predictions -m /path/to/model +``` + +To visualize the prediction for the sequence point cloud: +```shell +python visualize.py -d /path/to/dataset/ -p /path/to/predictions/ -s SQ_Number +``` + + +## Training + +```shell +cd ./src/tasks/semantic/ +``` + +To train a network (from scratch): + +```shell +python train.py -d /path/to/dataset -ac /config/arch/CHOICE.yaml -l /path/to/log +``` + + +To train a network (from pretrained model): + +```shell +python train.py -d /path/to/dataset -ac /config/arch/CHOICE.yaml -l /path/to/log -p /path/to/pretrained +``` + +We can monitor the training process using tensorboard. +```shell +tensorboard --logdir /file_path/ +``` + +## Evaluation + +```shell +python evaluate_iou.py -d /path/to/dataset -p /path/to/predictions/ --split valid +``` + + +## Credits +We referred to RangeNet++ ([Paper](http://www.ipb.uni-bonn.de/wp-content/papercite-data/pdf/milioto2019iros.pdf), [Code](https://github.com/PRBonn/lidar-bonnetal)) during our development. We thank the authors of RangeNet++ for open-sourcing their code. + + + diff --git a/figure/.DS_Store b/figure/.DS_Store new file mode 100644 index 0000000..1138c30 Binary files /dev/null and b/figure/.DS_Store differ diff --git a/figure/000000.png b/figure/000000.png new file mode 100644 index 0000000..df00e78 Binary files /dev/null and b/figure/000000.png differ diff --git a/figure/framework.png b/figure/framework.png new file mode 100644 index 0000000..1f18e62 Binary files /dev/null and b/figure/framework.png differ diff --git a/figure/result.png b/figure/result.png new file mode 100644 index 0000000..845e531 Binary files /dev/null and b/figure/result.png differ diff --git a/figure/sample3.gif b/figure/sample3.gif new file mode 100644 index 0000000..ccbfe82 Binary files /dev/null and b/figure/sample3.gif differ diff --git a/figure/sample4.gif b/figure/sample4.gif new file mode 100644 index 0000000..022443e Binary files /dev/null and b/figure/sample4.gif differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2dcd254 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +# first apt install python3-tk +numpy==1.14.0 +torchvision==0.2.2.post3 +matplotlib==2.2.3 +tensorflow==1.13.1 +scipy==0.19.1 +torch==1.1.0 +vispy==0.5.3 +opencv_python==4.1.0.25 +opencv_contrib_python==4.1.0.25 +Pillow==6.1.0 +PyYAML==5.1.1 diff --git a/sample_data/.DS_Store b/sample_data/.DS_Store new file mode 100644 index 0000000..c62a272 Binary files /dev/null and b/sample_data/.DS_Store differ diff --git a/sample_data/sequences/.DS_Store b/sample_data/sequences/.DS_Store new file mode 100644 index 0000000..857589d Binary files /dev/null and b/sample_data/sequences/.DS_Store differ diff --git a/sample_data/sequences/00/.DS_Store b/sample_data/sequences/00/.DS_Store new file mode 100644 index 0000000..eb74f92 Binary files /dev/null and b/sample_data/sequences/00/.DS_Store differ diff --git a/sample_data/sequences/00/velodyne/.DS_Store b/sample_data/sequences/00/velodyne/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/sample_data/sequences/00/velodyne/.DS_Store differ diff --git a/sample_data/sequences/00/velodyne/000000.bin b/sample_data/sequences/00/velodyne/000000.bin new file mode 100644 index 0000000..bd75ae7 Binary files /dev/null and b/sample_data/sequences/00/velodyne/000000.bin differ diff --git a/sample_data/sequences/00/velodyne/000000.label b/sample_data/sequences/00/velodyne/000000.label new file mode 100644 index 0000000..6f5f410 Binary files /dev/null and b/sample_data/sequences/00/velodyne/000000.label differ diff --git a/sample_data/sequences/00/velodyne/000001.bin b/sample_data/sequences/00/velodyne/000001.bin new file mode 100644 index 0000000..664baeb Binary files /dev/null and b/sample_data/sequences/00/velodyne/000001.bin differ diff --git a/sample_data/sequences/00/velodyne/000001.label b/sample_data/sequences/00/velodyne/000001.label new file mode 100644 index 0000000..4689552 Binary files /dev/null and b/sample_data/sequences/00/velodyne/000001.label differ diff --git a/sample_data/sequences/00/velodyne/000002.bin b/sample_data/sequences/00/velodyne/000002.bin new file mode 100644 index 0000000..c821667 Binary files /dev/null and b/sample_data/sequences/00/velodyne/000002.bin differ diff --git a/sample_data/sequences/00/velodyne/000002.label b/sample_data/sequences/00/velodyne/000002.label new file mode 100644 index 0000000..b3a615c Binary files /dev/null and b/sample_data/sequences/00/velodyne/000002.label differ diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000..91865c7 Binary files /dev/null and b/src/.DS_Store differ diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/auxiliary/__init__.py b/src/auxiliary/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/backbones/.DS_Store b/src/backbones/.DS_Store new file mode 100644 index 0000000..38734ca Binary files /dev/null and b/src/backbones/.DS_Store differ diff --git a/src/backbones/SAC.py b/src/backbones/SAC.py new file mode 100644 index 0000000..0377837 --- /dev/null +++ b/src/backbones/SAC.py @@ -0,0 +1,178 @@ +# This file was modified from https://github.com/BobLiu20/YOLOv3_PyTorch +# It needed to be modified in order to accomodate for different strides in the +from __future__ import division +import torch +import torch.nn as nn +from collections import OrderedDict +import torch.nn.functional as F + + +class SACBlock(nn.Module): + def __init__(self, inplanes, expand1x1_planes, bn_d = 0.1): + + super(SACBlock, self).__init__() + self.inplanes = inplanes + self.bn_d = bn_d + + self.attention_x = nn.Sequential( + nn.Conv2d(3, 9 * self.inplanes, kernel_size = 7, padding = 3), + nn.BatchNorm2d(9 * self.inplanes, momentum = 0.1), + ) + + self.position_mlp_2 = nn.Sequential( + nn.Conv2d(9 * self.inplanes, self.inplanes, kernel_size = 1), + nn.BatchNorm2d(self.inplanes, momentum = 0.1), + nn.ReLU(inplace = True), + nn.Conv2d(self.inplanes, self.inplanes, kernel_size = 3, padding = 1), + nn.BatchNorm2d(self.inplanes, momentum = 0.1), + nn.ReLU(inplace = True), + ) + + def forward(self, input): + xyz = input[0] + new_xyz= input[1] + feature = input[2] + N,C,H,W = feature.size() + + new_feature = F.unfold(feature, kernel_size = 3, padding = 1).view(N, -1, H, W) + attention = F.sigmoid(self.attention_x(new_xyz)) + new_feature = new_feature * attention + new_feature = self.position_mlp_2(new_feature) + fuse_feature = new_feature + feature + + return xyz, new_xyz, fuse_feature + +# ****************************************************************************** + +# number of layers per model +model_blocks = { + 21: [1, 1, 2, 2, 1], + 53: [1, 2, 8, 8, 4], +} + + +class Backbone(nn.Module): + """ + Class for DarknetSeg. Subclasses PyTorch's own "nn" module + """ + + def __init__(self, params): + super(Backbone, self).__init__() + self.use_range = params["input_depth"]["range"] + self.use_xyz = params["input_depth"]["xyz"] + self.use_remission = params["input_depth"]["remission"] + self.drop_prob = params["dropout"] + self.bn_d = params["bn_d"] + self.OS = params["OS"] + self.layers = params["extra"]["layers"] + print("Using squeezesegv3" + str(self.layers) + " Backbone") + self.input_depth = 0 + self.input_idxs = [] + if self.use_range: + self.input_depth += 1 + self.input_idxs.append(0) + if self.use_xyz: + self.input_depth += 3 + self.input_idxs.extend([1, 2, 3]) + if self.use_remission: + self.input_depth += 1 + self.input_idxs.append(4) + print("Depth of backbone input = ", self.input_depth) + + self.strides = [2, 2, 2, 1, 1] + + current_os = 1 + for s in self.strides: + current_os *= s + print("Original OS: ", current_os) + + if self.OS > current_os: + print("Can't do OS, ", self.OS, + " because it is bigger than original ", current_os) + else: + + for i, stride in enumerate(reversed(self.strides), 0): + if int(current_os) != self.OS: + if stride == 2: + current_os /= 2 + self.strides[-1 - i] = 1 + if int(current_os) == self.OS: + break + print("New OS: ", int(current_os)) + print("Strides: ", self.strides) + + assert self.layers in model_blocks.keys() + + + self.blocks = model_blocks[self.layers] + + self.conv1 = nn.Conv2d(self.input_depth, 32, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(32, momentum=self.bn_d) + self.relu1 = nn.LeakyReLU(0.1) + + self.enc1 = self._make_enc_layer(SACBlock, [32, 64], self.blocks[0], + stride=self.strides[0], DS=True, bn_d=self.bn_d) + self.enc2 = self._make_enc_layer(SACBlock, [64, 128], self.blocks[1], + stride=self.strides[1], DS=True, bn_d=self.bn_d) + self.enc3 = self._make_enc_layer(SACBlock, [128, 256], self.blocks[2], + stride=self.strides[2], DS=True, bn_d=self.bn_d) + self.enc4 = self._make_enc_layer(SACBlock, [256, 256], self.blocks[3], + stride=self.strides[3], DS=False, bn_d=self.bn_d) + self.enc5 = self._make_enc_layer(SACBlock, [256, 256], self.blocks[4], + stride=self.strides[4], DS=False, bn_d=self.bn_d) + + self.dropout = nn.Dropout2d(self.drop_prob) + + self.last_channels = 256 + + def _make_enc_layer(self, block, planes, blocks, stride, DS, bn_d=0.1): + layers = [] + + inplanes = planes[0] + for i in range(0, blocks): + layers.append(("residual_{}".format(i), + block(inplanes, planes,bn_d))) + if DS==True: + layers.append(("conv", nn.Conv2d(planes[0], planes[1], + kernel_size=3, + stride=[1, stride], dilation=1, + padding=1, bias=False))) + layers.append(("bn", nn.BatchNorm2d(planes[1], momentum=bn_d))) + layers.append(("relu", nn.LeakyReLU(0.1))) + + return nn.Sequential(OrderedDict(layers)) + + def run_layer(self, xyz, feature, layer, skips, os, flag=True): + new_xyz = xyz + if flag == True: + xyz, new_xyz, y = layer[:-3]([xyz, new_xyz, feature]) + y = layer[-3:](y) + xyz = F.upsample_bilinear(xyz, size=[xyz.size()[2], xyz.size()[3]//2]) + else: + xyz,new_xyz,y = layer([xyz, new_xyz, feature]) + if y.shape[2] < feature.shape[2] or y.shape[3] < feature.shape[3]: + skips[os] = feature.detach() + os *= 2 + feature = self.dropout(y) + return xyz, feature, skips, os + + def forward(self, feature): + skips = {} + os = 1 + xyz = feature[:,1:4,:,:] + feature = self.relu1(self.bn1(self.conv1(feature))) + + xyz,feature, skips, os = self.run_layer(xyz,feature, self.enc1, skips, os) + xyz,feature, skips, os = self.run_layer(xyz,feature, self.enc2, skips, os) + xyz,feature, skips, os = self.run_layer(xyz,feature, self.enc3, skips, os) + xyz,feature, skips, os = self.run_layer(xyz,feature, self.enc4, skips, os, flag=False) + xyz,feature, skips, os = self.run_layer(xyz,feature, self.enc5, skips, os, flag=False) + + return feature, skips + + def get_last_depth(self): + return self.last_channels + + def get_input_depth(self): + return self.input_depth diff --git a/src/backbones/__init__.py b/src/backbones/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/backbones/__pycache__/.DS_Store b/src/backbones/__pycache__/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/src/backbones/__pycache__/.DS_Store differ diff --git a/src/backbones/__pycache__/SAC.cpython-36.pyc b/src/backbones/__pycache__/SAC.cpython-36.pyc new file mode 100644 index 0000000..cb02e28 Binary files /dev/null and b/src/backbones/__pycache__/SAC.cpython-36.pyc differ diff --git a/src/common/__init__.py b/src/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/common/__pycache__/__init__.cpython-36.pyc b/src/common/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..2d46336 Binary files /dev/null and b/src/common/__pycache__/__init__.cpython-36.pyc differ diff --git a/src/common/__pycache__/avgmeter.cpython-36.pyc b/src/common/__pycache__/avgmeter.cpython-36.pyc new file mode 100644 index 0000000..e2607aa Binary files /dev/null and b/src/common/__pycache__/avgmeter.cpython-36.pyc differ diff --git a/src/common/__pycache__/laserscan.cpython-36.pyc b/src/common/__pycache__/laserscan.cpython-36.pyc new file mode 100644 index 0000000..5ce584d Binary files /dev/null and b/src/common/__pycache__/laserscan.cpython-36.pyc differ diff --git a/src/common/__pycache__/logger.cpython-36.pyc b/src/common/__pycache__/logger.cpython-36.pyc new file mode 100644 index 0000000..818ce6c Binary files /dev/null and b/src/common/__pycache__/logger.cpython-36.pyc differ diff --git a/src/common/__pycache__/onehot.cpython-36.pyc b/src/common/__pycache__/onehot.cpython-36.pyc new file mode 100644 index 0000000..864f991 Binary files /dev/null and b/src/common/__pycache__/onehot.cpython-36.pyc differ diff --git a/src/common/__pycache__/warmupLR.cpython-36.pyc b/src/common/__pycache__/warmupLR.cpython-36.pyc new file mode 100644 index 0000000..96d3116 Binary files /dev/null and b/src/common/__pycache__/warmupLR.cpython-36.pyc differ diff --git a/src/common/avgmeter.py b/src/common/avgmeter.py new file mode 100644 index 0000000..a9a0530 --- /dev/null +++ b/src/common/avgmeter.py @@ -0,0 +1,20 @@ +# This file is covered by the LICENSE file in the root of this project. + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count diff --git a/src/common/laserscan.py b/src/common/laserscan.py new file mode 100644 index 0000000..2bf5499 --- /dev/null +++ b/src/common/laserscan.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. +import numpy as np + + +class LaserScan: + """Class that contains LaserScan with x,y,z,r""" + EXTENSIONS_SCAN = ['.bin'] + + def __init__(self, project=False, H=64, W=2048, fov_up=3.0, fov_down=-25.0): + self.project = project + self.proj_H = H + self.proj_W = W + self.proj_fov_up = fov_up + self.proj_fov_down = fov_down + self.reset() + + def reset(self): + """ Reset scan members. """ + self.points = np.zeros((0, 3), dtype=np.float32) # [m, 3]: x, y, z + self.remissions = np.zeros((0, 1), dtype=np.float32) # [m ,1]: remission + + # projected range image - [H,W] range (-1 is no data) + self.proj_range = np.full((self.proj_H, self.proj_W), -1, + dtype=np.float32) + + # unprojected range (list of depths for each point) + self.unproj_range = np.zeros((0, 1), dtype=np.float32) + + # projected point cloud xyz - [H,W,3] xyz coord (-1 is no data) + self.proj_xyz = np.full((self.proj_H, self.proj_W, 3), -1, + dtype=np.float32) + + # projected remission - [H,W] intensity (-1 is no data) + self.proj_remission = np.full((self.proj_H, self.proj_W), -1, + dtype=np.float32) + + # projected index (for each pixel, what I am in the pointcloud) + # [H,W] index (-1 is no data) + self.proj_idx = np.full((self.proj_H, self.proj_W), -1, + dtype=np.int32) + + # for each point, where it is in the range image + self.proj_x = np.zeros((0, 1), dtype=np.int32) # [m, 1]: x + self.proj_y = np.zeros((0, 1), dtype=np.int32) # [m, 1]: y + + # mask containing for each pixel, if it contains a point or not + self.proj_mask = np.zeros((self.proj_H, self.proj_W), + dtype=np.int32) # [H,W] mask + + def size(self): + """ Return the size of the point cloud. """ + return self.points.shape[0] + + def __len__(self): + return self.size() + + def open_scan(self, filename): + """ Open raw scan and fill in attributes + """ + # reset just in case there was an open structure + self.reset() + + # check filename is string + if not isinstance(filename, str): + raise TypeError("Filename should be string type, " + "but was {type}".format(type=str(type(filename)))) + + # check extension is a laserscan + if not any(filename.endswith(ext) for ext in self.EXTENSIONS_SCAN): + raise RuntimeError("Filename extension is not valid scan file.") + + # if all goes well, open pointcloud + scan = np.fromfile(filename, dtype=np.float32) + scan = scan.reshape((-1, 4)) + + # put in attribute + points = scan[:, 0:3] # get xyz + remissions = scan[:, 3] # get remission + self.set_points(points, remissions) + + def set_points(self, points, remissions=None): + """ Set scan attributes (instead of opening from file) + """ + # reset just in case there was an open structure + self.reset() + + # check scan makes sense + if not isinstance(points, np.ndarray): + raise TypeError("Scan should be numpy array") + + # check remission makes sense + if remissions is not None and not isinstance(remissions, np.ndarray): + raise TypeError("Remissions should be numpy array") + + # put in attribute + self.points = points # get xyz + if remissions is not None: + self.remissions = remissions # get remission + else: + self.remissions = np.zeros((points.shape[0]), dtype=np.float32) + + # if projection is wanted, then do it and fill in the structure + if self.project: + self.do_range_projection() + + def do_range_projection(self): + """ Project a pointcloud into a spherical projection image.projection. + Function takes no arguments because it can be also called externally + if the value of the constructor was not set (in case you change your + mind about wanting the projection) + """ + # laser parameters + fov_up = self.proj_fov_up / 180.0 * np.pi # field of view up in rad + fov_down = self.proj_fov_down / 180.0 * np.pi # field of view down in rad + fov = abs(fov_down) + abs(fov_up) # get field of view total in rad + + # get depth of all points + depth = np.linalg.norm(self.points, 2, axis=1) + + # get scan components + scan_x = self.points[:, 0] + scan_y = self.points[:, 1] + scan_z = self.points[:, 2] + + # get angles of all points + yaw = -np.arctan2(scan_y, scan_x) + pitch = np.arcsin(scan_z / depth) + + # get projections in image coords + proj_x = 0.5 * (yaw / np.pi + 1.0) # in [0.0, 1.0] + proj_y = 1.0 - (pitch + abs(fov_down)) / fov # in [0.0, 1.0] + + # scale to image size using angular resolution + proj_x *= self.proj_W # in [0.0, W] + proj_y *= self.proj_H # in [0.0, H] + + # round and clamp for use as index + proj_x = np.floor(proj_x) + proj_x = np.minimum(self.proj_W - 1, proj_x) + proj_x = np.maximum(0, proj_x).astype(np.int32) # in [0,W-1] + self.proj_x = np.copy(proj_x) # store a copy in orig order + + proj_y = np.floor(proj_y) + proj_y = np.minimum(self.proj_H - 1, proj_y) + proj_y = np.maximum(0, proj_y).astype(np.int32) # in [0,H-1] + self.proj_y = np.copy(proj_y) # stope a copy in original order + + # copy of depth in original order + self.unproj_range = np.copy(depth) + + # order in decreasing depth + indices = np.arange(depth.shape[0]) + order = np.argsort(depth)[::-1] + depth = depth[order] + indices = indices[order] + points = self.points[order] + remission = self.remissions[order] + proj_y = proj_y[order] + proj_x = proj_x[order] + + # assing to images + self.proj_range[proj_y, proj_x] = depth + self.proj_xyz[proj_y, proj_x] = points + self.proj_remission[proj_y, proj_x] = remission + self.proj_idx[proj_y, proj_x] = indices + self.proj_mask = (self.proj_idx > 0).astype(np.int32) + + +class SemLaserScan(LaserScan): + """Class that contains LaserScan with x,y,z,r,sem_label,sem_color_label,inst_label,inst_color_label""" + EXTENSIONS_LABEL = ['.label'] + + def __init__(self, sem_color_dict=None, project=False, H=64, W=2048, fov_up=3.0, fov_down=-25.0, max_classes=300): + super(SemLaserScan, self).__init__(project, H, W, fov_up, fov_down) + self.reset() + + # make semantic colors + if sem_color_dict: + # if I have a dict, make it + max_sem_key = 0 + for key, data in sem_color_dict.items(): + if key + 1 > max_sem_key: + max_sem_key = key + 1 + self.sem_color_lut = np.zeros((max_sem_key + 100, 3), dtype=np.float32) + for key, value in sem_color_dict.items(): + self.sem_color_lut[key] = np.array(value, np.float32) / 255.0 + else: + # otherwise make random + max_sem_key = max_classes + self.sem_color_lut = np.random.uniform(low=0.0, + high=1.0, + size=(max_sem_key, 3)) + # force zero to a gray-ish color + self.sem_color_lut[0] = np.full((3), 0.1) + + # make instance colors + max_inst_id = 100000 + self.inst_color_lut = np.random.uniform(low=0.0, + high=1.0, + size=(max_inst_id, 3)) + # force zero to a gray-ish color + self.inst_color_lut[0] = np.full((3), 0.1) + + def reset(self): + """ Reset scan members. """ + super(SemLaserScan, self).reset() + + # semantic labels + self.sem_label = np.zeros((0, 1), dtype=np.int32) # [m, 1]: label + self.sem_label_color = np.zeros((0, 3), dtype=np.float32) # [m ,3]: color + + # instance labels + self.inst_label = np.zeros((0, 1), dtype=np.int32) # [m, 1]: label + self.inst_label_color = np.zeros((0, 3), dtype=np.float32) # [m ,3]: color + + # projection color with semantic labels + self.proj_sem_label = np.zeros((self.proj_H, self.proj_W), + dtype=np.int32) # [H,W] label + self.proj_sem_color = np.zeros((self.proj_H, self.proj_W, 3), + dtype=np.float) # [H,W,3] color + + # projection color with instance labels + self.proj_inst_label = np.zeros((self.proj_H, self.proj_W), + dtype=np.int32) # [H,W] label + self.proj_inst_color = np.zeros((self.proj_H, self.proj_W, 3), + dtype=np.float) # [H,W,3] color + + def open_label(self, filename): + """ Open raw scan and fill in attributes + """ + # check filename is string + if not isinstance(filename, str): + raise TypeError("Filename should be string type, " + "but was {type}".format(type=str(type(filename)))) + + # check extension is a laserscan + if not any(filename.endswith(ext) for ext in self.EXTENSIONS_LABEL): + raise RuntimeError("Filename extension is not valid label file.") + + # if all goes well, open label + label = np.fromfile(filename, dtype=np.int32) + label = label.reshape((-1)) + + # set it + self.set_label(label) + + def set_label(self, label): + """ Set points for label not from file but from np + """ + # check label makes sense + if not isinstance(label, np.ndarray): + raise TypeError("Label should be numpy array") + + # only fill in attribute if the right size + if label.shape[0] == self.points.shape[0]: + self.sem_label = label & 0xFFFF # semantic label in lower half + self.inst_label = label >> 16 # instance id in upper half + else: + print("Points shape: ", self.points.shape) + print("Label shape: ", label.shape) + raise ValueError("Scan and Label don't contain same number of points") + + # sanity check + assert((self.sem_label + (self.inst_label << 16) == label).all()) + + if self.project: + self.do_label_projection() + + def colorize(self): + """ Colorize pointcloud with the color of each semantic label + """ + self.sem_label_color = self.sem_color_lut[self.sem_label] + self.sem_label_color = self.sem_label_color.reshape((-1, 3)) + + self.inst_label_color = self.inst_color_lut[self.inst_label] + self.inst_label_color = self.inst_label_color.reshape((-1, 3)) + + def do_label_projection(self): + # only map colors to labels that exist + mask = self.proj_idx >= 0 + + # semantics + self.proj_sem_label[mask] = self.sem_label[self.proj_idx[mask]] + self.proj_sem_color[mask] = self.sem_color_lut[self.sem_label[self.proj_idx[mask]]] + + # instances + self.proj_inst_label[mask] = self.inst_label[self.proj_idx[mask]] + self.proj_inst_color[mask] = self.inst_color_lut[self.inst_label[self.proj_idx[mask]]] diff --git a/src/common/laserscanvis.py b/src/common/laserscanvis.py new file mode 100644 index 0000000..c8309a3 --- /dev/null +++ b/src/common/laserscanvis.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import vispy +from vispy.scene import visuals, SceneCanvas +import numpy as np +from matplotlib import pyplot as plt +from common.laserscan import LaserScan, SemLaserScan + + +class LaserScanVis: + """Class that creates and handles a visualizer for a pointcloud""" + + def __init__(self, scan, scan_names, label_names, offset=0, + semantics=True, instances=False): + self.scan = scan + self.scan_names = scan_names + self.label_names = label_names + self.offset = offset + self.semantics = semantics + self.instances = instances + # sanity check + if not self.semantics and self.instances: + print("Instances are only allowed in when semantics=True") + raise ValueError + + self.reset() + self.update_scan() + + def reset(self): + """ Reset. """ + # last key press (it should have a mutex, but visualization is not + # safety critical, so let's do things wrong) + self.action = "no" # no, next, back, quit are the possibilities + + # new canvas prepared for visualizing data + self.canvas = SceneCanvas(keys='interactive', show=True) + # interface (n next, b back, q quit, very simple) + self.canvas.events.key_press.connect(self.key_press) + self.canvas.events.draw.connect(self.draw) + # grid + self.grid = self.canvas.central_widget.add_grid() + + # laserscan part + self.scan_view = vispy.scene.widgets.ViewBox( + border_color='white', parent=self.canvas.scene) + self.grid.add_widget(self.scan_view, 0, 0) + self.scan_vis = visuals.Markers() + self.scan_view.camera = 'turntable' + self.scan_view.add(self.scan_vis) + visuals.XYZAxis(parent=self.scan_view.scene) + # add semantics + if self.semantics: + print("Using semantics in visualizer") + self.sem_view = vispy.scene.widgets.ViewBox( + border_color='white', parent=self.canvas.scene) + self.grid.add_widget(self.sem_view, 0, 1) + self.sem_vis = visuals.Markers() + self.sem_view.camera = 'turntable' + self.sem_view.add(self.sem_vis) + visuals.XYZAxis(parent=self.sem_view.scene) + # self.sem_view.camera.link(self.scan_view.camera) + + if self.instances: + print("Using instances in visualizer") + self.inst_view = vispy.scene.widgets.ViewBox( + border_color='white', parent=self.canvas.scene) + self.grid.add_widget(self.inst_view, 0, 2) + self.inst_vis = visuals.Markers() + self.inst_view.camera = 'turntable' + self.inst_view.add(self.inst_vis) + visuals.XYZAxis(parent=self.inst_view.scene) + # self.inst_view.camera.link(self.scan_view.camera) + + # img canvas size + self.multiplier = 1 + self.canvas_W = 1024 + self.canvas_H = 64 + if self.semantics: + self.multiplier += 1 + if self.instances: + self.multiplier += 1 + + # new canvas for img + self.img_canvas = SceneCanvas(keys='interactive', show=True, + size=(self.canvas_W, self.canvas_H * self.multiplier)) + # grid + self.img_grid = self.img_canvas.central_widget.add_grid() + # interface (n next, b back, q quit, very simple) + self.img_canvas.events.key_press.connect(self.key_press) + self.img_canvas.events.draw.connect(self.draw) + + # add a view for the depth + self.img_view = vispy.scene.widgets.ViewBox( + border_color='white', parent=self.img_canvas.scene) + self.img_grid.add_widget(self.img_view, 0, 0) + self.img_vis = visuals.Image(cmap='viridis') + self.img_view.add(self.img_vis) + + # add semantics + if self.semantics: + self.sem_img_view = vispy.scene.widgets.ViewBox( + border_color='white', parent=self.img_canvas.scene) + self.img_grid.add_widget(self.sem_img_view, 1, 0) + self.sem_img_vis = visuals.Image(cmap='viridis') + self.sem_img_view.add(self.sem_img_vis) + + # add instances + if self.instances: + self.inst_img_view = vispy.scene.widgets.ViewBox( + border_color='white', parent=self.img_canvas.scene) + self.img_grid.add_widget(self.inst_img_view, 2, 0) + self.inst_img_vis = visuals.Image(cmap='viridis') + self.inst_img_view.add(self.inst_img_vis) + + def get_mpl_colormap(self, cmap_name): + cmap = plt.get_cmap(cmap_name) + + # Initialize the matplotlib color map + sm = plt.cm.ScalarMappable(cmap=cmap) + + # Obtain linear color range + color_range = sm.to_rgba(np.linspace(0, 1, 256), bytes=True)[:, 2::-1] + + return color_range.reshape(256, 3).astype(np.float32) / 255.0 + + def update_scan(self): + # first open data + self.scan.open_scan(self.scan_names[self.offset]) + if self.semantics: + self.scan.open_label(self.label_names[self.offset]) + self.scan.colorize() + + # then change names + title = "scan " + str(self.offset) + " of " + str(len(self.scan_names)) + self.canvas.title = title + self.img_canvas.title = title + + # then do all the point cloud stuff + + # plot scan + power = 16 + # print() + range_data = np.copy(self.scan.unproj_range) + # print(range_data.max(), range_data.min()) + range_data = range_data**(1 / power) + # print(range_data.max(), range_data.min()) + viridis_range = ((range_data - range_data.min()) / + (range_data.max() - range_data.min()) * + 255).astype(np.uint8) + viridis_map = self.get_mpl_colormap("viridis") + viridis_colors = viridis_map[viridis_range] + self.scan_vis.set_data(self.scan.points, + face_color=viridis_colors[..., ::-1], + edge_color=viridis_colors[..., ::-1], + size=1) + + # plot semantics + if self.semantics: + self.sem_vis.set_data(self.scan.points, + face_color=self.scan.sem_label_color[..., ::-1], + edge_color=self.scan.sem_label_color[..., ::-1], + size=1) + + # plot instances + if self.instances: + self.inst_vis.set_data(self.scan.points, + face_color=self.scan.inst_label_color[..., ::-1], + edge_color=self.scan.inst_label_color[..., ::-1], + size=1) + + # now do all the range image stuff + # plot range image + data = np.copy(self.scan.proj_range) + # print(data[data > 0].max(), data[data > 0].min()) + data[data > 0] = data[data > 0]**(1 / power) + data[data < 0] = data[data > 0].min() + # print(data.max(), data.min()) + data = (data - data[data > 0].min()) / \ + (data.max() - data[data > 0].min()) + # print(data.max(), data.min()) + self.img_vis.set_data(data) + self.img_vis.update() + + if self.semantics: + self.sem_img_vis.set_data(self.scan.proj_sem_color[..., ::-1]) + self.sem_img_vis.update() + + if self.instances: + self.inst_img_vis.set_data(self.scan.proj_inst_color[..., ::-1]) + self.inst_img_vis.update() + + # interface + def key_press(self, event): + self.canvas.events.key_press.block() + self.img_canvas.events.key_press.block() + if event.key == 'N': + self.offset += 1 + self.update_scan() + elif event.key == 'B': + self.offset -= 1 + self.update_scan() + elif event.key == 'Q' or event.key == 'Escape': + self.destroy() + + def draw(self, event): + if self.canvas.events.key_press.blocked(): + self.canvas.events.key_press.unblock() + if self.img_canvas.events.key_press.blocked(): + self.img_canvas.events.key_press.unblock() + + def destroy(self): + # destroy the visualization + self.canvas.close() + self.img_canvas.close() + vispy.app.quit() + + def run(self): + vispy.app.run() diff --git a/src/common/logger.py b/src/common/logger.py new file mode 100644 index 0000000..4fd7563 --- /dev/null +++ b/src/common/logger.py @@ -0,0 +1,76 @@ +# Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 + +import tensorflow as tf +import numpy as np +import scipy.misc +try: + from StringIO import StringIO # Python 2.7 +except ImportError: + from io import BytesIO # Python 3.x + + +class Logger(object): + + def __init__(self, log_dir): + """Create a summary writer logging to log_dir.""" + self.writer = tf.summary.FileWriter(log_dir) + + def scalar_summary(self, tag, value, step): + """Log a scalar variable.""" + summary = tf.Summary( + value=[tf.Summary.Value(tag=tag, simple_value=value)]) + self.writer.add_summary(summary, step) + self.writer.flush() + + def image_summary(self, tag, images, step): + """Log a list of images.""" + + img_summaries = [] + for i, img in enumerate(images): + # Write the image to a string + try: + s = StringIO() + except: + s = BytesIO() + scipy.misc.toimage(img).save(s, format="png") + + # Create an Image object + img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), + height=img.shape[0], + width=img.shape[1]) + # Create a Summary value + img_summaries.append(tf.Summary.Value( + tag='%s/%d' % (tag, i), image=img_sum)) + + # Create and write Summary + summary = tf.Summary(value=img_summaries) + self.writer.add_summary(summary, step) + self.writer.flush() + + def histo_summary(self, tag, values, step, bins=1000): + """Log a histogram of the tensor of values.""" + + # Create a histogram using numpy + counts, bin_edges = np.histogram(values, bins=bins) + + # Fill the fields of the histogram proto + hist = tf.HistogramProto() + hist.min = float(np.min(values)) + hist.max = float(np.max(values)) + hist.num = int(np.prod(values.shape)) + hist.sum = float(np.sum(values)) + hist.sum_squares = float(np.sum(values**2)) + + # Drop the start of the first bin + bin_edges = bin_edges[1:] + + # Add bin edges and counts + for edge in bin_edges: + hist.bucket_limit.append(edge) + for c in counts: + hist.bucket.append(c) + + # Create and write Summary + summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) + self.writer.add_summary(summary, step) + self.writer.flush() diff --git a/src/common/onehot.py b/src/common/onehot.py new file mode 100644 index 0000000..5748e49 --- /dev/null +++ b/src/common/onehot.py @@ -0,0 +1,137 @@ +# This file is covered by the LICENSE file in the root of this project. +import torch +import torch.nn as nn +import torch.nn.functional as F +import __init__ as booger + + +class oneHot(nn.Module): + def __init__(self, device, nclasses, spatial_dim=2): + super().__init__() + self.device = device + self.nclasses = nclasses + self.spatial_dim = spatial_dim + + def onehot1dspatial(self, x): + # we only do tensors that 1d tensors that are batched or not, so check + assert(len(x.shape) == 1 or len(x.shape) == 2) + # if not batched, batch + remove_dim = False # flag to unbatch + if len(x.shape) == 1: + # add batch dimension + x = x[None, ...] + remove_dim = True + + # get tensor shape + n, b = x.shape + + # scatter to onehot + one_hot = torch.zeros((n, self.nclasses, b), + device=self.device).scatter_(1, x.unsqueeze(1), 1) + + # x is now [n,classes,b] + + # if it used to be unbatched, then unbatch it + if remove_dim: + one_hot = one_hot[0] + + return one_hot + + def onehot2dspatial(self, x): + # we only do tensors that 2d tensors that are batched or not, so check + assert(len(x.shape) == 2 or len(x.shape) == 3) + # if not batched, batch + remove_dim = False # flag to unbatch + if len(x.shape) == 2: + # add batch dimension + x = x[None, ...] + remove_dim = True + + # get tensor shape + n, h, w = x.shape + + # scatter to onehot + one_hot = torch.zeros((n, self.nclasses, h, w), + device=self.device).scatter_(1, x.unsqueeze(1), 1) + + # x is now [n,classes,b] + + # if it used to be unbatched, then unbatch it + if remove_dim: + one_hot = one_hot[0] + + return one_hot + + def forward(self, x): + # do onehot here + if self.spatial_dim == 1: + return self.onehot1dspatial(x) + elif self.spatial_dim == 2: + return self.onehot2dspatial(x) + + +if __name__ == "__main__": + # get device + if torch.cuda.is_available(): + device = torch.device('cuda') + else: + device = torch.device('cpu') + + # define number of classes + nclasses = 6 + print("*"*80) + print("Num classes 1d =", nclasses) + print("*"*80) + + # test 1d unbatched case + print("Tensor 1d spat dim, unbatched") + tensor = torch.arange(0, nclasses).to(device) # [0,1,2,3,4,5] + print("in:", tensor) + module = oneHot(device, nclasses, spatial_dim=1) + print("out:", module(tensor)) + print("*"*80) + + # test 1d batched case + print("*"*80) + print("Tensor 1d spat dim, batched") + tensor = torch.arange(0, nclasses).to(device) # [0,1,2,3,4,5] + tensor = torch.cat([tensor.unsqueeze(0), + tensor.unsqueeze(0)]) # [[0,1,2,3,4,5], [0,1,2,3,4,5]] + print("in:", tensor) + module = oneHot(device, nclasses, spatial_dim=1) + print("out:", module(tensor)) + print("*"*80) + + # for 2 use less classes + nclasses = 3 + print("*"*80) + print("Num classes 2d =", nclasses) + print("*"*80) + + # test 2d unbatched case + print("*"*80) + print("Tensor 2d spat dim, unbatched") + tensor = torch.arange(0, nclasses).to(device) # [0,1,2] + tensor = torch.cat([tensor.unsqueeze(0), # [[0,1,2], + tensor.unsqueeze(0), # [0,1,2], + tensor.unsqueeze(0), # [0,1,2], + tensor.unsqueeze(0)]) # [0,1,2]] + print("in:", tensor) + module = oneHot(device, nclasses, spatial_dim=2) + print("out:", module(tensor)) + print("*"*80) + + # test 2d batched case + print("*"*80) + print("Tensor 2d spat dim, unbatched") + tensor = torch.arange(0, nclasses).to(device) # [0,1,2] + tensor = torch.cat([tensor.unsqueeze(0), # [[0,1,2], + tensor.unsqueeze(0), # [0,1,2], + tensor.unsqueeze(0), # [0,1,2], + tensor.unsqueeze(0)]) # [0,1,2]] + tensor = torch.cat([tensor.unsqueeze(0), + tensor.unsqueeze(0)]) # 2 of the same 2d tensor + print("in:", tensor) + module = oneHot(device, nclasses, spatial_dim=2) + print("out:", module(tensor)) + print("*"*80) diff --git a/src/common/sync_batchnorm/__init__.py b/src/common/sync_batchnorm/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/common/sync_batchnorm/__pycache__/__init__.cpython-36.pyc b/src/common/sync_batchnorm/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..bdb4a40 Binary files /dev/null and b/src/common/sync_batchnorm/__pycache__/__init__.cpython-36.pyc differ diff --git a/src/common/sync_batchnorm/__pycache__/batchnorm.cpython-36.pyc b/src/common/sync_batchnorm/__pycache__/batchnorm.cpython-36.pyc new file mode 100644 index 0000000..906b545 Binary files /dev/null and b/src/common/sync_batchnorm/__pycache__/batchnorm.cpython-36.pyc differ diff --git a/src/common/sync_batchnorm/__pycache__/comm.cpython-36.pyc b/src/common/sync_batchnorm/__pycache__/comm.cpython-36.pyc new file mode 100644 index 0000000..cb4d84a Binary files /dev/null and b/src/common/sync_batchnorm/__pycache__/comm.cpython-36.pyc differ diff --git a/src/common/sync_batchnorm/__pycache__/replicate.cpython-36.pyc b/src/common/sync_batchnorm/__pycache__/replicate.cpython-36.pyc new file mode 100644 index 0000000..473cd74 Binary files /dev/null and b/src/common/sync_batchnorm/__pycache__/replicate.cpython-36.pyc differ diff --git a/src/common/sync_batchnorm/batchnorm.py b/src/common/sync_batchnorm/batchnorm.py new file mode 100644 index 0000000..3521697 --- /dev/null +++ b/src/common/sync_batchnorm/batchnorm.py @@ -0,0 +1,369 @@ +# -*- coding: utf-8 -*- +# File : batchnorm.py +# Author : Jiayuan Mao +# Email : maojiayuan@gmail.com +# Date : 27/01/2018 +# +# This file is part of Synchronized-BatchNorm-PyTorch. +# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch +# Distributed under MIT License. + +import collections + +import torch +import torch.nn.functional as F + +from torch.nn.modules.batchnorm import _BatchNorm +from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast + +from .comm import SyncMaster +from .replicate import DataParallelWithCallback + +__all__ = ['SynchronizedBatchNorm1d', 'SynchronizedBatchNorm2d', + 'SynchronizedBatchNorm3d', 'convert_model'] + + +def _sum_ft(tensor): + """sum over the first and last dimention""" + return tensor.sum(dim=0).sum(dim=-1) + + +def _unsqueeze_ft(tensor): + """add new dementions at the front and the tail""" + return tensor.unsqueeze(0).unsqueeze(-1) + + +_ChildMessage = collections.namedtuple( + '_ChildMessage', ['sum', 'ssum', 'sum_size']) +_MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std']) + + +class _SynchronizedBatchNorm(_BatchNorm): + def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True): + super(_SynchronizedBatchNorm, self).__init__( + num_features, eps=eps, momentum=momentum, affine=affine) + + self._sync_master = SyncMaster(self._data_parallel_master) + + self._is_parallel = False + self._parallel_id = None + self._slave_pipe = None + + def forward(self, input): + # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation. + if not (self._is_parallel and self.training): + return F.batch_norm( + input, self.running_mean, self.running_var, self.weight, self.bias, + self.training, self.momentum, self.eps) + + # Resize the input to (B, C, -1). + input_shape = input.size() + input = input.view(input.size(0), self.num_features, -1) + + # Compute the sum and square-sum. + sum_size = input.size(0) * input.size(2) + input_sum = _sum_ft(input) + input_ssum = _sum_ft(input ** 2) + + # Reduce-and-broadcast the statistics. + if self._parallel_id == 0: + mean, inv_std = self._sync_master.run_master( + _ChildMessage(input_sum, input_ssum, sum_size)) + else: + mean, inv_std = self._slave_pipe.run_slave( + _ChildMessage(input_sum, input_ssum, sum_size)) + + # Compute the output. + if self.affine: + # MJY:: Fuse the multiplication for speed. + output = (input - _unsqueeze_ft(mean)) * \ + _unsqueeze_ft(inv_std * self.weight) + _unsqueeze_ft(self.bias) + else: + output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std) + + # Reshape it. + return output.view(input_shape) + + def __data_parallel_replicate__(self, ctx, copy_id): + self._is_parallel = True + self._parallel_id = copy_id + + # parallel_id == 0 means master device. + if self._parallel_id == 0: + ctx.sync_master = self._sync_master + else: + self._slave_pipe = ctx.sync_master.register_slave(copy_id) + + def _data_parallel_master(self, intermediates): + """Reduce the sum and square-sum, compute the statistics, and broadcast it.""" + + # Always using same "device order" makes the ReduceAdd operation faster. + # Thanks to:: Tete Xiao (http://tetexiao.com/) + intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device()) + + to_reduce = [i[1][:2] for i in intermediates] + to_reduce = [j for i in to_reduce for j in i] # flatten + target_gpus = [i[1].sum.get_device() for i in intermediates] + + sum_size = sum([i[1].sum_size for i in intermediates]) + sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce) + mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size) + + broadcasted = Broadcast.apply(target_gpus, mean, inv_std) + + outputs = [] + for i, rec in enumerate(intermediates): + outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2]))) + + return outputs + + def _compute_mean_std(self, sum_, ssum, size): + """Compute the mean and standard-deviation with sum and square-sum. This method + also maintains the moving average on the master device.""" + assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.' + mean = sum_ / size + sumvar = ssum - sum_ * mean + unbias_var = sumvar / (size - 1) + bias_var = sumvar / size + + self.running_mean = (1 - self.momentum) * \ + self.running_mean + self.momentum * mean.data + self.running_var = (1 - self.momentum) * \ + self.running_var + self.momentum * unbias_var.data + + return mean, bias_var.clamp(self.eps) ** -0.5 + + +class SynchronizedBatchNorm1d(_SynchronizedBatchNorm): + r"""Applies Synchronized Batch Normalization over a 2d or 3d input that is seen as a + mini-batch. + + .. math:: + + y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta + + This module differs from the built-in PyTorch BatchNorm1d as the mean and + standard-deviation are reduced across all devices during training. + + For example, when one uses `nn.DataParallel` to wrap the network during + training, PyTorch's implementation normalize the tensor on each device using + the statistics only on that device, which accelerated the computation and + is also easy to implement, but the statistics might be inaccurate. + Instead, in this synchronized version, the statistics will be computed + over all training samples distributed on multiple devices. + + Note that, for one-GPU or CPU-only case, this module behaves exactly same + as the built-in PyTorch implementation. + + The mean and standard-deviation are calculated per-dimension over + the mini-batches and gamma and beta are learnable parameter vectors + of size C (where C is the input size). + + During training, this layer keeps a running estimate of its computed mean + and variance. The running sum is kept with a default momentum of 0.1. + + During evaluation, this running mean/variance is used for normalization. + + Because the BatchNorm is done over the `C` dimension, computing statistics + on `(N, L)` slices, it's common terminology to call this Temporal BatchNorm + + Args: + num_features: num_features from an expected input of size + `batch_size x num_features [x width]` + eps: a value added to the denominator for numerical stability. + Default: 1e-5 + momentum: the value used for the running_mean and running_var + computation. Default: 0.1 + affine: a boolean value that when set to ``True``, gives the layer learnable + affine parameters. Default: ``True`` + + Shape: + - Input: :math:`(N, C)` or :math:`(N, C, L)` + - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input) + + Examples: + >>> # With Learnable Parameters + >>> m = SynchronizedBatchNorm1d(100) + >>> # Without Learnable Parameters + >>> m = SynchronizedBatchNorm1d(100, affine=False) + >>> input = torch.autograd.Variable(torch.randn(20, 100)) + >>> output = m(input) + """ + + def _check_input_dim(self, input): + if input.dim() != 2 and input.dim() != 3: + raise ValueError('expected 2D or 3D input (got {}D input)' + .format(input.dim())) + super(SynchronizedBatchNorm1d, self)._check_input_dim(input) + + +class SynchronizedBatchNorm2d(_SynchronizedBatchNorm): + r"""Applies Batch Normalization over a 4d input that is seen as a mini-batch + of 3d inputs + + .. math:: + + y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta + + This module differs from the built-in PyTorch BatchNorm2d as the mean and + standard-deviation are reduced across all devices during training. + + For example, when one uses `nn.DataParallel` to wrap the network during + training, PyTorch's implementation normalize the tensor on each device using + the statistics only on that device, which accelerated the computation and + is also easy to implement, but the statistics might be inaccurate. + Instead, in this synchronized version, the statistics will be computed + over all training samples distributed on multiple devices. + + Note that, for one-GPU or CPU-only case, this module behaves exactly same + as the built-in PyTorch implementation. + + The mean and standard-deviation are calculated per-dimension over + the mini-batches and gamma and beta are learnable parameter vectors + of size C (where C is the input size). + + During training, this layer keeps a running estimate of its computed mean + and variance. The running sum is kept with a default momentum of 0.1. + + During evaluation, this running mean/variance is used for normalization. + + Because the BatchNorm is done over the `C` dimension, computing statistics + on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm + + Args: + num_features: num_features from an expected input of + size batch_size x num_features x height x width + eps: a value added to the denominator for numerical stability. + Default: 1e-5 + momentum: the value used for the running_mean and running_var + computation. Default: 0.1 + affine: a boolean value that when set to ``True``, gives the layer learnable + affine parameters. Default: ``True`` + + Shape: + - Input: :math:`(N, C, H, W)` + - Output: :math:`(N, C, H, W)` (same shape as input) + + Examples: + >>> # With Learnable Parameters + >>> m = SynchronizedBatchNorm2d(100) + >>> # Without Learnable Parameters + >>> m = SynchronizedBatchNorm2d(100, affine=False) + >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45)) + >>> output = m(input) + """ + + def _check_input_dim(self, input): + if input.dim() != 4: + raise ValueError('expected 4D input (got {}D input)' + .format(input.dim())) + super(SynchronizedBatchNorm2d, self)._check_input_dim(input) + + +class SynchronizedBatchNorm3d(_SynchronizedBatchNorm): + r"""Applies Batch Normalization over a 5d input that is seen as a mini-batch + of 4d inputs + + .. math:: + + y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta + + This module differs from the built-in PyTorch BatchNorm3d as the mean and + standard-deviation are reduced across all devices during training. + + For example, when one uses `nn.DataParallel` to wrap the network during + training, PyTorch's implementation normalize the tensor on each device using + the statistics only on that device, which accelerated the computation and + is also easy to implement, but the statistics might be inaccurate. + Instead, in this synchronized version, the statistics will be computed + over all training samples distributed on multiple devices. + + Note that, for one-GPU or CPU-only case, this module behaves exactly same + as the built-in PyTorch implementation. + + The mean and standard-deviation are calculated per-dimension over + the mini-batches and gamma and beta are learnable parameter vectors + of size C (where C is the input size). + + During training, this layer keeps a running estimate of its computed mean + and variance. The running sum is kept with a default momentum of 0.1. + + During evaluation, this running mean/variance is used for normalization. + + Because the BatchNorm is done over the `C` dimension, computing statistics + on `(N, D, H, W)` slices, it's common terminology to call this Volumetric BatchNorm + or Spatio-temporal BatchNorm + + Args: + num_features: num_features from an expected input of + size batch_size x num_features x depth x height x width + eps: a value added to the denominator for numerical stability. + Default: 1e-5 + momentum: the value used for the running_mean and running_var + computation. Default: 0.1 + affine: a boolean value that when set to ``True``, gives the layer learnable + affine parameters. Default: ``True`` + + Shape: + - Input: :math:`(N, C, D, H, W)` + - Output: :math:`(N, C, D, H, W)` (same shape as input) + + Examples: + >>> # With Learnable Parameters + >>> m = SynchronizedBatchNorm3d(100) + >>> # Without Learnable Parameters + >>> m = SynchronizedBatchNorm3d(100, affine=False) + >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45, 10)) + >>> output = m(input) + """ + + def _check_input_dim(self, input): + if input.dim() != 5: + raise ValueError('expected 5D input (got {}D input)' + .format(input.dim())) + super(SynchronizedBatchNorm3d, self)._check_input_dim(input) + + +def convert_model(module): + """Traverse the input module and its child recursively + and replace all instance of torch.nn.modules.batchnorm.BatchNorm*N*d + to SynchronizedBatchNorm*N*d + + Args: + module: the input module needs to be convert to SyncBN model + + Examples: + >>> import torch.nn as nn + >>> import torchvision + >>> # m is a standard pytorch model + >>> m = torchvision.models.resnet18(True) + >>> m = nn.DataParallel(m) + >>> # after convert, m is using SyncBN + >>> m = convert_model(m) + """ + if isinstance(module, torch.nn.DataParallel): + mod = module.module + mod = convert_model(mod) + mod = DataParallelWithCallback(mod) + return mod + + mod = module + for pth_module, sync_module in zip([torch.nn.modules.batchnorm.BatchNorm1d, + torch.nn.modules.batchnorm.BatchNorm2d, + torch.nn.modules.batchnorm.BatchNorm3d], + [SynchronizedBatchNorm1d, + SynchronizedBatchNorm2d, + SynchronizedBatchNorm3d]): + if isinstance(module, pth_module): + mod = sync_module(module.num_features, module.eps, + module.momentum, module.affine) + mod.running_mean = module.running_mean + mod.running_var = module.running_var + if module.affine: + mod.weight.data = module.weight.data.clone().detach() + mod.bias.data = module.bias.data.clone().detach() + + for name, child in module.named_children(): + mod.add_module(name, convert_model(child)) + + return mod diff --git a/src/common/sync_batchnorm/comm.py b/src/common/sync_batchnorm/comm.py new file mode 100644 index 0000000..7dfb9fb --- /dev/null +++ b/src/common/sync_batchnorm/comm.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- +# File : comm.py +# Author : Jiayuan Mao +# Email : maojiayuan@gmail.com +# Date : 27/01/2018 +# +# This file is part of Synchronized-BatchNorm-PyTorch. +# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch +# Distributed under MIT License. + +import queue +import collections +import threading + +__all__ = ['FutureResult', 'SlavePipe', 'SyncMaster'] + + +class FutureResult(object): + """A thread-safe future implementation. Used only as one-to-one pipe.""" + + def __init__(self): + self._result = None + self._lock = threading.Lock() + self._cond = threading.Condition(self._lock) + + def put(self, result): + with self._lock: + assert self._result is None, 'Previous result has\'t been fetched.' + self._result = result + self._cond.notify() + + def get(self): + with self._lock: + if self._result is None: + self._cond.wait() + + res = self._result + self._result = None + return res + + +_MasterRegistry = collections.namedtuple('MasterRegistry', ['result']) +_SlavePipeBase = collections.namedtuple( + '_SlavePipeBase', ['identifier', 'queue', 'result']) + + +class SlavePipe(_SlavePipeBase): + """Pipe for master-slave communication.""" + + def run_slave(self, msg): + self.queue.put((self.identifier, msg)) + ret = self.result.get() + self.queue.put(True) + return ret + + +class SyncMaster(object): + """An abstract `SyncMaster` object. + + - During the replication, as the data parallel will trigger an callback of each module, all slave devices should + call `register(id)` and obtain an `SlavePipe` to communicate with the master. + - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected, + and passed to a registered callback. + - After receiving the messages, the master device should gather the information and determine to message passed + back to each slave devices. + """ + + def __init__(self, master_callback): + """ + + Args: + master_callback: a callback to be invoked after having collected messages from slave devices. + """ + self._master_callback = master_callback + self._queue = queue.Queue() + self._registry = collections.OrderedDict() + self._activated = False + + def __getstate__(self): + return {'master_callback': self._master_callback} + + def __setstate__(self, state): + self.__init__(state['master_callback']) + + def register_slave(self, identifier): + """ + Register an slave device. + + Args: + identifier: an identifier, usually is the device id. + + Returns: a `SlavePipe` object which can be used to communicate with the master device. + + """ + if self._activated: + assert self._queue.empty(), 'Queue is not clean before next initialization.' + self._activated = False + self._registry.clear() + future = FutureResult() + self._registry[identifier] = _MasterRegistry(future) + return SlavePipe(identifier, self._queue, future) + + def run_master(self, master_msg): + """ + Main entry for the master device in each forward pass. + The messages were first collected from each devices (including the master device), and then + an callback will be invoked to compute the message to be sent back to each devices + (including the master device). + + Args: + master_msg: the message that the master want to send to itself. This will be placed as the first + message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example. + + Returns: the message to be sent back to the master device. + + """ + self._activated = True + + intermediates = [(0, master_msg)] + for i in range(self.nr_slaves): + intermediates.append(self._queue.get()) + + results = self._master_callback(intermediates) + assert results[0][0] == 0, 'The first result should belongs to the master.' + + for i, res in results: + if i == 0: + continue + self._registry[i].result.put(res) + + for i in range(self.nr_slaves): + assert self._queue.get() is True + + return results[0][1] + + @property + def nr_slaves(self): + return len(self._registry) diff --git a/src/common/sync_batchnorm/replicate.py b/src/common/sync_batchnorm/replicate.py new file mode 100644 index 0000000..a17b489 --- /dev/null +++ b/src/common/sync_batchnorm/replicate.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# File : replicate.py +# Author : Jiayuan Mao +# Email : maojiayuan@gmail.com +# Date : 27/01/2018 +# +# This file is part of Synchronized-BatchNorm-PyTorch. +# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch +# Distributed under MIT License. + +import functools + +from torch.nn.parallel.data_parallel import DataParallel + +__all__ = [ + 'CallbackContext', + 'execute_replication_callbacks', + 'DataParallelWithCallback', + 'patch_replication_callback' +] + + +class CallbackContext(object): + pass + + +def execute_replication_callbacks(modules): + """ + Execute an replication callback `__data_parallel_replicate__` on each module created by original replication. + + The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` + + Note that, as all modules are isomorphism, we assign each sub-module with a context + (shared among multiple copies of this module on different devices). + Through this context, different copies can share some information. + + We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback + of any slave copies. + """ + master_copy = modules[0] + nr_modules = len(list(master_copy.modules())) + ctxs = [CallbackContext() for _ in range(nr_modules)] + + for i, module in enumerate(modules): + for j, m in enumerate(module.modules()): + if hasattr(m, '__data_parallel_replicate__'): + m.__data_parallel_replicate__(ctxs[j], i) + + +class DataParallelWithCallback(DataParallel): + """ + Data Parallel with a replication callback. + + An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by + original `replicate` function. + The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` + + Examples: + > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) + > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) + # sync_bn.__data_parallel_replicate__ will be invoked. + """ + + def replicate(self, module, device_ids): + modules = super(DataParallelWithCallback, + self).replicate(module, device_ids) + execute_replication_callbacks(modules) + return modules + + +def patch_replication_callback(data_parallel): + """ + Monkey-patch an existing `DataParallel` object. Add the replication callback. + Useful when you have customized `DataParallel` implementation. + + Examples: + > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) + > sync_bn = DataParallel(sync_bn, device_ids=[0, 1]) + > patch_replication_callback(sync_bn) + # this is equivalent to + > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) + > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) + """ + + assert isinstance(data_parallel, DataParallel) + + old_replicate = data_parallel.replicate + + @functools.wraps(old_replicate) + def new_replicate(module, device_ids): + modules = old_replicate(module, device_ids) + execute_replication_callbacks(modules) + return modules + + data_parallel.replicate = new_replicate diff --git a/src/common/warmupLR.py b/src/common/warmupLR.py new file mode 100644 index 0000000..26d09d0 --- /dev/null +++ b/src/common/warmupLR.py @@ -0,0 +1,50 @@ +# This file is covered by the LICENSE file in the root of this project. + +import torch.optim.lr_scheduler as toptim + + +class warmupLR(toptim._LRScheduler): + """ Warmup learning rate scheduler. + Initially, increases the learning rate from 0 to the final value, in a + certain number of steps. After this number of steps, each step decreases + LR exponentially. + """ + + def __init__(self, optimizer, lr, warmup_steps, momentum, decay): + # cyclic params + self.optimizer = optimizer + self.lr = lr + self.warmup_steps = warmup_steps + self.momentum = momentum + self.decay = decay + + # cap to one + if self.warmup_steps < 1: + self.warmup_steps = 1 + + # cyclic lr + self.initial_scheduler = toptim.CyclicLR(self.optimizer, + base_lr=0, + max_lr=self.lr, + step_size_up=self.warmup_steps, + step_size_down=self.warmup_steps, + cycle_momentum=False, + base_momentum=self.momentum, + max_momentum=self.momentum) + + # our params + self.last_epoch = -1 # fix for pytorch 1.1 and below + self.finished = False # am i done + super().__init__(optimizer) + + def get_lr(self): + return [self.lr * (self.decay ** self.last_epoch) for lr in self.base_lrs] + + def step(self, epoch=None): + if self.finished or self.initial_scheduler.last_epoch >= self.warmup_steps: + if not self.finished: + self.base_lrs = [self.lr for lr in self.base_lrs] + self.finished = True + return super(warmupLR, self).step(epoch) + else: + return self.initial_scheduler.step(epoch) diff --git a/src/requirements.txt b/src/requirements.txt new file mode 100644 index 0000000..7a0ef67 --- /dev/null +++ b/src/requirements.txt @@ -0,0 +1,10 @@ +numpy==1.14.0 +scipy==0.19.1 +torch==1.1.0 +tensorflow==1.13.1 +vispy==0.5.3 +torchvision==0.2.2.post3 +opencv_contrib_python==4.1.0.25 +matplotlib==2.2.3 +Pillow==6.1.0 +PyYAML==5.1.1 diff --git a/src/tasks/.DS_Store b/src/tasks/.DS_Store new file mode 100644 index 0000000..142ddb1 Binary files /dev/null and b/src/tasks/.DS_Store differ diff --git a/src/tasks/__init__.py b/src/tasks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tasks/__pycache__/.DS_Store b/src/tasks/__pycache__/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/src/tasks/__pycache__/.DS_Store differ diff --git a/src/tasks/__pycache__/__init__.cpython-36.pyc b/src/tasks/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..702b86f Binary files /dev/null and b/src/tasks/__pycache__/__init__.cpython-36.pyc differ diff --git a/src/tasks/semantic/.DS_Store b/src/tasks/semantic/.DS_Store new file mode 100644 index 0000000..c3abded Binary files /dev/null and b/src/tasks/semantic/.DS_Store differ diff --git a/src/tasks/semantic/__init__.py b/src/tasks/semantic/__init__.py new file mode 100644 index 0000000..8979f75 --- /dev/null +++ b/src/tasks/semantic/__init__.py @@ -0,0 +1,4 @@ +import sys +TRAIN_PATH = "../../" +DEPLOY_PATH = "../../../deploy" +sys.path.insert(0, TRAIN_PATH) diff --git a/src/tasks/semantic/__pycache__/.DS_Store b/src/tasks/semantic/__pycache__/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/src/tasks/semantic/__pycache__/.DS_Store differ diff --git a/src/tasks/semantic/__pycache__/__init__.cpython-36.pyc b/src/tasks/semantic/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..bed3886 Binary files /dev/null and b/src/tasks/semantic/__pycache__/__init__.cpython-36.pyc differ diff --git a/src/tasks/semantic/config/.DS_Store b/src/tasks/semantic/config/.DS_Store new file mode 100644 index 0000000..7181c76 Binary files /dev/null and b/src/tasks/semantic/config/.DS_Store differ diff --git a/src/tasks/semantic/config/arch/SSGV321.yaml b/src/tasks/semantic/config/arch/SSGV321.yaml new file mode 100644 index 0000000..4bc7f18 --- /dev/null +++ b/src/tasks/semantic/config/arch/SSGV321.yaml @@ -0,0 +1,101 @@ +################################################################################:::wq: + # training parameters +################################################################################ +train: + loss: "xentropy" # must be either xentropy or iou + max_epochs: 150 + lr: 0.001 # sgd learning rate + wup_epochs: 1 # warmup during first XX epochs (can be float) + momentum: 0.9 # sgd momentum + lr_decay: 0.995 # learning rate decay per epoch after initial cycle (from min lr) + w_decay: 0.0001 # weight decay + batch_size: 16 + # batch size + report_batch: 1 # every x batches, report loss + report_epoch: 1 # every x epochs, report validation set + epsilon_w: 0.001 # class weight w = 1 / (content + epsilon_w) + save_summary: False # Summary of weight histograms for tensorboard + save_scans: True # False doesn't save anything, True saves some + # sample images (one per batch of the last calculated batch) + # in log folder + show_scans: False # show scans during training + workers: 12 # number of threads to get data + +################################################################################ +# backbone parameters +################################################################################ +backbone: + name: "SAC" # ['squeezeseg', 'squeezesegV2', 'darknet'] + input_depth: + range: True + xyz: True + remission: True + dropout: 0.01 + bn_d: 0.01 + OS: 8 # output stride (only horizontally) + train: True # train backbone? + extra: + layers: 21 + +################################################################################ +# decoder parameters +################################################################################ +decoder: + name: "SAC" + dropout: 0.01 + bn_d: 0.01 + train: True # train decoder? + extra: False # nothing to add for this decoder, otherwise this is a dict + +################################################################################ +# classification head parameters +################################################################################ +head: + name: "segmentation" + train: True + dropout: 0.01 + +################################################################################ +# postproc parameters +################################################################################ +post: + CRF: + use: False + train: True + params: False # this should be a dict when in use + KNN: + use: False + params: + knn: 7 + search: 7 + sigma: 1.0 + cutoff: 1.0 + +################################################################################ +# classification head parameters +################################################################################ +# dataset (to find parser) +dataset: + labels: "kitti" + scans: "kitti" + max_points: 150000 # max of any scan in dataset + sensor: + name: "HDL64" + type: "spherical" # projective + fov_up: 3 + fov_down: -25 + img_prop: + width: 2048 + height: 64 + img_means: #range_x,y,z,signal + - 12.12 + - 10.88 + - 0.23 + - -1.04 + - 0.21 + img_stds: #range, x,y,z,signal + - 12.32 + - 11.47 + - 6.91 + - 0.86 + - 0.16 diff --git a/src/tasks/semantic/config/arch/SSGV353.yaml b/src/tasks/semantic/config/arch/SSGV353.yaml new file mode 100644 index 0000000..02105b0 --- /dev/null +++ b/src/tasks/semantic/config/arch/SSGV353.yaml @@ -0,0 +1,100 @@ +################################################################################ +# training parameters +################################################################################ +train: + loss: "xentropy" # must be either xentropy or iou + max_epochs: 150 + lr: 0.005 # sgd learning rate + wup_epochs: 1 # warmup during first XX epochs (can be float) + momentum: 0.9 # sgd momentum + lr_decay: 0.99 # learning rate decay per epoch after initial cycle (from min lr) + w_decay: 0.0001 # weight decay + batch_size: 8 # batch size + report_batch: 1 # every x batches, report loss + report_epoch: 1 # every x epochs, report validation set + epsilon_w: 0.001 # class weight w = 1 / (content + epsilon_w) + save_summary: False # Summary of weight histograms for tensorboard + save_scans: True # False doesn't save anything, True saves some + # sample images (one per batch of the last calculated batch) + # in log folder + show_scans: False # show scans during training + workers: 12 # number of threads to get data + +################################################################################ +# backbone parameters +################################################################################ +backbone: + name: "SAC" # ['squeezeseg', 'squeezesegV2', 'darknet'] + input_depth: + range: True + xyz: True + remission: True + dropout: 0.01 + bn_d: 0.01 + OS: 8 # output stride (only horizontally) + train: True # train backbone? + extra: + layers: 53 + +################################################################################ +# decoder parameters +################################################################################ +decoder: + name: "SAC" + dropout: 0.01 + bn_d: 0.01 + train: True # train decoder? + extra: False # nothing to add for this decoder, otherwise this is a dict + +################################################################################ +# classification head parameters +################################################################################ +head: + name: "segmentation" + train: True + dropout: 0.01 + +################################################################################ +# postproc parameters +################################################################################ +post: + CRF: + use: False + train: True + params: False # this should be a dict when in use + KNN: + use: False + params: + knn: 7 + search: 7 + sigma: 1.0 + cutoff: 1.0 + +################################################################################ +# classification head parameters +################################################################################ +# dataset (to find parser) +dataset: + labels: "kitti" + scans: "kitti" + max_points: 150000 # max of any scan in dataset + sensor: + name: "HDL64" + type: "spherical" # projective + fov_up: 3 + fov_down: -25 + img_prop: + width: 2048 + height: 64 + img_means: #range,x,y,z,signal + - -1.04 + - 10.88 + - 0.23 + - 12.12 + - 0.21 + img_stds: #range,x,y,z,signal + - 0.86 + - 11.47 + - 6.91 + - 12.32 + - 0.16 diff --git a/src/tasks/semantic/config/labels/semantic-kitti-all.yaml b/src/tasks/semantic/config/labels/semantic-kitti-all.yaml new file mode 100644 index 0000000..74d815d --- /dev/null +++ b/src/tasks/semantic/config/labels/semantic-kitti-all.yaml @@ -0,0 +1,224 @@ +# This file is covered by the LICENSE file in the root of this project. +name: "kitti" +labels: + 0 : "unlabeled" + 1 : "outlier" + 10: "car" + 11: "bicycle" + 13: "bus" + 15: "motorcycle" + 16: "on-rails" + 18: "truck" + 20: "other-vehicle" + 30: "person" + 31: "bicyclist" + 32: "motorcyclist" + 40: "road" + 44: "parking" + 48: "sidewalk" + 49: "other-ground" + 50: "building" + 51: "fence" + 52: "other-structure" + 60: "lane-marking" + 70: "vegetation" + 71: "trunk" + 72: "terrain" + 80: "pole" + 81: "traffic-sign" + 99: "other-object" + 252: "moving-car" + 253: "moving-bicyclist" + 254: "moving-person" + 255: "moving-motorcyclist" + 256: "moving-on-rails" + 257: "moving-bus" + 258: "moving-truck" + 259: "moving-other-vehicle" +color_map: # bgr + 0 : [0, 0, 0] + 1 : [0, 0, 255] + 10: [245, 150, 100] + 11: [245, 230, 100] + 13: [250, 80, 100] + 15: [150, 60, 30] + 16: [255, 0, 0] + 18: [180, 30, 80] + 20: [255, 0, 0] + 30: [30, 30, 255] + 31: [200, 40, 255] + 32: [90, 30, 150] + 40: [255, 0, 255] + 44: [255, 150, 255] + 48: [75, 0, 75] + 49: [75, 0, 175] + 50: [0, 200, 255] + 51: [50, 120, 255] + 52: [0, 150, 255] + 60: [170, 255, 150] + 70: [0, 175, 0] + 71: [0, 60, 135] + 72: [80, 240, 150] + 80: [150, 240, 255] + 81: [0, 0, 255] + 99: [255, 255, 50] + 252: [245, 150, 100] + 256: [255, 0, 0] + 253: [200, 40, 255] + 254: [30, 30, 255] + 255: [90, 30, 150] + 257: [250, 80, 100] + 258: [180, 30, 80] + 259: [255, 0, 0] +content: # as a ratio with the total number of points + 0: 0.018889854628292943 + 1: 0.0002937197336781505 + 10: 0.040818519255974316 + 11: 0.00016609538710764618 + 13: 2.7879693665067774e-05 + 15: 0.00039838616015114444 + 16: 0.0 + 18: 0.0020633612104619787 + 20: 0.0016218197275284021 + 30: 0.00017698551338515307 + 31: 1.1065903904919655e-08 + 32: 5.532951952459828e-09 + 40: 0.1987493871255525 + 44: 0.014717169549888214 + 48: 0.14392298360372 + 49: 0.0039048553037472045 + 50: 0.1326861944777486 + 51: 0.0723592229456223 + 52: 0.002395131480328884 + 60: 4.7084144280367186e-05 + 70: 0.26681502148037506 + 71: 0.006035012012626033 + 72: 0.07814222006271769 + 80: 0.002855498193863172 + 81: 0.0006155958086189918 + 99: 0.009923127583046915 + 252: 0.001789309418528068 + 253: 0.00012709999297008662 + 254: 0.00016059776092534436 + 255: 3.745553104802113e-05 + 256: 0.0 + 257: 0.00011351574470342043 + 258: 0.00010157861367183268 + 259: 4.3840131989471124e-05 +# classes that are indistinguishable from single scan or inconsistent in +# ground truth are mapped to their closest equivalent +learning_map: + 0 : 0 # "unlabeled" + 1 : 0 # "outlier" mapped to "unlabeled" --------------------------mapped + 10: 1 # "car" + 11: 2 # "bicycle" + 13: 5 # "bus" mapped to "other-vehicle" --------------------------mapped + 15: 3 # "motorcycle" + 16: 5 # "on-rails" mapped to "other-vehicle" ---------------------mapped + 18: 4 # "truck" + 20: 5 # "other-vehicle" + 30: 6 # "person" + 31: 7 # "bicyclist" + 32: 8 # "motorcyclist" + 40: 9 # "road" + 44: 10 # "parking" + 48: 11 # "sidewalk" + 49: 12 # "other-ground" + 50: 13 # "building" + 51: 14 # "fence" + 52: 0 # "other-structure" mapped to "unlabeled" ------------------mapped + 60: 9 # "lane-marking" to "road" ---------------------------------mapped + 70: 15 # "vegetation" + 71: 16 # "trunk" + 72: 17 # "terrain" + 80: 18 # "pole" + 81: 19 # "traffic-sign" + 99: 0 # "other-object" to "unlabeled" ----------------------------mapped + 252: 20 # "moving-car" + 253: 21 # "moving-bicyclist" + 254: 22 # "moving-person" + 255: 23 # "moving-motorcyclist" + 256: 24 # "moving-on-rails" mapped to "moving-other-vehicle" ------mapped + 257: 24 # "moving-bus" mapped to "moving-other-vehicle" -----------mapped + 258: 25 # "moving-truck" + 259: 24 # "moving-other-vehicle" +learning_map_inv: # inverse of previous map + 0: 0 # "unlabeled", and others ignored + 1: 10 # "car" + 2: 11 # "bicycle" + 3: 15 # "motorcycle" + 4: 18 # "truck" + 5: 20 # "other-vehicle" + 6: 30 # "person" + 7: 31 # "bicyclist" + 8: 32 # "motorcyclist" + 9: 40 # "road" + 10: 44 # "parking" + 11: 48 # "sidewalk" + 12: 49 # "other-ground" + 13: 50 # "building" + 14: 51 # "fence" + 15: 70 # "vegetation" + 16: 71 # "trunk" + 17: 72 # "terrain" + 18: 80 # "pole" + 19: 81 # "traffic-sign" + 20: 252 # "moving-car" + 21: 253 # "moving-bicyclist" + 22: 254 # "moving-person" + 23: 255 # "moving-motorcyclist" + 24: 259 # "moving-other-vehicle" + 25: 258 # "moving-truck" +learning_ignore: # Ignore classes + 0: True # "unlabeled", and others ignored + 1: False # "car" + 2: False # "bicycle" + 3: False # "motorcycle" + 4: False # "truck" + 5: False # "other-vehicle" + 6: False # "person" + 7: False # "bicyclist" + 8: False # "motorcyclist" + 9: False # "road" + 10: False # "parking" + 11: False # "sidewalk" + 12: False # "other-ground" + 13: False # "building" + 14: False # "fence" + 15: False # "vegetation" + 16: False # "trunk" + 17: False # "terrain" + 18: False # "pole" + 19: False # "traffic-sign" + 20: False # "moving-car" + 21: False # "moving-bicyclist" + 22: False # "moving-person" + 23: False # "moving-motorcyclist" + 24: False # "moving-other-vehicle" + 25: False # "moving-truck" +split: # sequence numbers + train: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 9 + - 10 + valid: + - 8 + test: + - 11 + - 12 + - 13 + - 14 + - 15 + - 16 + - 17 + - 18 + - 19 + - 20 + - 21 diff --git a/src/tasks/semantic/config/labels/semantic-kitti.yaml b/src/tasks/semantic/config/labels/semantic-kitti.yaml new file mode 100644 index 0000000..a4af922 --- /dev/null +++ b/src/tasks/semantic/config/labels/semantic-kitti.yaml @@ -0,0 +1,212 @@ +# This file is covered by the LICENSE file in the root of this project. +name: "kitti" +labels: + 0 : "unlabeled" + 1 : "outlier" + 10: "car" + 11: "bicycle" + 13: "bus" + 15: "motorcycle" + 16: "on-rails" + 18: "truck" + 20: "other-vehicle" + 30: "person" + 31: "bicyclist" + 32: "motorcyclist" + 40: "road" + 44: "parking" + 48: "sidewalk" + 49: "other-ground" + 50: "building" + 51: "fence" + 52: "other-structure" + 60: "lane-marking" + 70: "vegetation" + 71: "trunk" + 72: "terrain" + 80: "pole" + 81: "traffic-sign" + 99: "other-object" + 252: "moving-car" + 253: "moving-bicyclist" + 254: "moving-person" + 255: "moving-motorcyclist" + 256: "moving-on-rails" + 257: "moving-bus" + 258: "moving-truck" + 259: "moving-other-vehicle" +color_map: # bgr + 0 : [0, 0, 0] + 1 : [0, 0, 255] + 10: [245, 150, 100] + 11: [245, 230, 100] + 13: [250, 80, 100] + 15: [150, 60, 30] + 16: [255, 0, 0] + 18: [180, 30, 80] + 20: [255, 0, 0] + 30: [30, 30, 255] + 31: [200, 40, 255] + 32: [90, 30, 150] + 40: [255, 0, 255] + 44: [255, 150, 255] + 48: [75, 0, 75] + 49: [75, 0, 175] + 50: [0, 200, 255] + 51: [50, 120, 255] + 52: [0, 150, 255] + 60: [170, 255, 150] + 70: [0, 175, 0] + 71: [0, 60, 135] + 72: [80, 240, 150] + 80: [150, 240, 255] + 81: [0, 0, 255] + 99: [255, 255, 50] + 252: [245, 150, 100] + 256: [255, 0, 0] + 253: [200, 40, 255] + 254: [30, 30, 255] + 255: [90, 30, 150] + 257: [250, 80, 100] + 258: [180, 30, 80] + 259: [255, 0, 0] +content: # as a ratio with the total number of points + 0: 0.018889854628292943 + 1: 0.0002937197336781505 + 10: 0.040818519255974316 + 11: 0.00016609538710764618 + 13: 2.7879693665067774e-05 + 15: 0.00039838616015114444 + 16: 0.0 + 18: 0.0020633612104619787 + 20: 0.0016218197275284021 + 30: 0.00017698551338515307 + 31: 1.1065903904919655e-08 + 32: 5.532951952459828e-09 + 40: 0.1987493871255525 + 44: 0.014717169549888214 + 48: 0.14392298360372 + 49: 0.0039048553037472045 + 50: 0.1326861944777486 + 51: 0.0723592229456223 + 52: 0.002395131480328884 + 60: 4.7084144280367186e-05 + 70: 0.26681502148037506 + 71: 0.006035012012626033 + 72: 0.07814222006271769 + 80: 0.002855498193863172 + 81: 0.0006155958086189918 + 99: 0.009923127583046915 + 252: 0.001789309418528068 + 253: 0.00012709999297008662 + 254: 0.00016059776092534436 + 255: 3.745553104802113e-05 + 256: 0.0 + 257: 0.00011351574470342043 + 258: 0.00010157861367183268 + 259: 4.3840131989471124e-05 +# classes that are indistinguishable from single scan or inconsistent in +# ground truth are mapped to their closest equivalent +learning_map: + 0 : 0 # "unlabeled" + 1 : 0 # "outlier" mapped to "unlabeled" --------------------------mapped + 10: 1 # "car" + 11: 2 # "bicycle" + 13: 5 # "bus" mapped to "other-vehicle" --------------------------mapped + 15: 3 # "motorcycle" + 16: 5 # "on-rails" mapped to "other-vehicle" ---------------------mapped + 18: 4 # "truck" + 20: 5 # "other-vehicle" + 30: 6 # "person" + 31: 7 # "bicyclist" + 32: 8 # "motorcyclist" + 40: 9 # "road" + 44: 10 # "parking" + 48: 11 # "sidewalk" + 49: 12 # "other-ground" + 50: 13 # "building" + 51: 14 # "fence" + 52: 0 # "other-structure" mapped to "unlabeled" ------------------mapped + 60: 9 # "lane-marking" to "road" ---------------------------------mapped + 70: 15 # "vegetation" + 71: 16 # "trunk" + 72: 17 # "terrain" + 80: 18 # "pole" + 81: 19 # "traffic-sign" + 99: 0 # "other-object" to "unlabeled" ----------------------------mapped + 252: 1 # "moving-car" to "car" ------------------------------------mapped + 253: 7 # "moving-bicyclist" to "bicyclist" ------------------------mapped + 254: 6 # "moving-person" to "person" ------------------------------mapped + 255: 8 # "moving-motorcyclist" to "motorcyclist" ------------------mapped + 256: 5 # "moving-on-rails" mapped to "other-vehicle" --------------mapped + 257: 5 # "moving-bus" mapped to "other-vehicle" -------------------mapped + 258: 4 # "moving-truck" to "truck" --------------------------------mapped + 259: 5 # "moving-other"-vehicle to "other-vehicle" ----------------mapped +learning_map_inv: # inverse of previous map + 0: 0 # "unlabeled", and others ignored + 1: 10 # "car" + 2: 11 # "bicycle" + 3: 15 # "motorcycle" + 4: 18 # "truck" + 5: 20 # "other-vehicle" + 6: 30 # "person" + 7: 31 # "bicyclist" + 8: 32 # "motorcyclist" + 9: 40 # "road" + 10: 44 # "parking" + 11: 48 # "sidewalk" + 12: 49 # "other-ground" + 13: 50 # "building" + 14: 51 # "fence" + 15: 70 # "vegetation" + 16: 71 # "trunk" + 17: 72 # "terrain" + 18: 80 # "pole" + 19: 81 # "traffic-sign" +learning_ignore: # Ignore classes + 0: True # "unlabeled", and others ignored + 1: False # "car" + 2: False # "bicycle" + 3: False # "motorcycle" + 4: False # "truck" + 5: False # "other-vehicle" + 6: False # "person" + 7: False # "bicyclist" + 8: False # "motorcyclist" + 9: False # "road" + 10: False # "parking" + 11: False # "sidewalk" + 12: False # "other-ground" + 13: False # "building" + 14: False # "fence" + 15: False # "vegetation" + 16: False # "trunk" + 17: False # "terrain" + 18: False # "pole" + 19: False # "traffic-sign" +split: # sequence numbers + train: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 9 + - 10 + valid: + - 8 + test: + - 11 + - 12 + - 13 + - 14 + - 15 + - 16 + - 17 + - 18 + - 19 + - 20 + - 21 diff --git a/src/tasks/semantic/dataset/.DS_Store b/src/tasks/semantic/dataset/.DS_Store new file mode 100644 index 0000000..e1ea149 Binary files /dev/null and b/src/tasks/semantic/dataset/.DS_Store differ diff --git a/src/tasks/semantic/dataset/kitti/.DS_Store b/src/tasks/semantic/dataset/kitti/.DS_Store new file mode 100644 index 0000000..38734ca Binary files /dev/null and b/src/tasks/semantic/dataset/kitti/.DS_Store differ diff --git a/src/tasks/semantic/dataset/kitti/__init__.py b/src/tasks/semantic/dataset/kitti/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tasks/semantic/dataset/kitti/__pycache__/.DS_Store b/src/tasks/semantic/dataset/kitti/__pycache__/.DS_Store new file mode 100644 index 0000000..7e34deb Binary files /dev/null and b/src/tasks/semantic/dataset/kitti/__pycache__/.DS_Store differ diff --git a/src/tasks/semantic/dataset/kitti/__pycache__/parser.cpython-36.pyc b/src/tasks/semantic/dataset/kitti/__pycache__/parser.cpython-36.pyc new file mode 100644 index 0000000..4dce3cf Binary files /dev/null and b/src/tasks/semantic/dataset/kitti/__pycache__/parser.cpython-36.pyc differ diff --git a/src/tasks/semantic/dataset/kitti/parser.py b/src/tasks/semantic/dataset/kitti/parser.py new file mode 100644 index 0000000..e379542 --- /dev/null +++ b/src/tasks/semantic/dataset/kitti/parser.py @@ -0,0 +1,362 @@ +import os +import numpy as np +import torch +from torch.utils.data import Dataset +from common.laserscan import LaserScan, SemLaserScan + +EXTENSIONS_SCAN = ['.bin'] +EXTENSIONS_LABEL = ['.label'] + + +def is_scan(filename): + return any(filename.endswith(ext) for ext in EXTENSIONS_SCAN) + + +def is_label(filename): + return any(filename.endswith(ext) for ext in EXTENSIONS_LABEL) + + +class SemanticKitti(Dataset): + + def __init__(self, root, # directory where data is + sequences, # sequences for this data (e.g. [1,3,4,6]) + labels, # label dict: (e.g 10: "car") + color_map, # colors dict bgr (e.g 10: [255, 0, 0]) + learning_map, # classes to learn (0 to N-1 for xentropy) + learning_map_inv, # inverse of previous (recover labels) + sensor, # sensor to parse scans from + max_points=150000, # max number of points present in dataset + gt=True): # send ground truth? + # save deats + self.root = os.path.join(root, "sequences") + self.sequences = sequences + self.labels = labels + self.color_map = color_map + self.learning_map = learning_map + self.learning_map_inv = learning_map_inv + self.sensor = sensor + self.sensor_img_H = sensor["img_prop"]["height"] + self.sensor_img_W = sensor["img_prop"]["width"] + self.sensor_img_means = torch.tensor(sensor["img_means"], + dtype=torch.float) + self.sensor_img_stds = torch.tensor(sensor["img_stds"], + dtype=torch.float) + self.sensor_fov_up = sensor["fov_up"] + self.sensor_fov_down = sensor["fov_down"] + self.max_points = max_points + self.gt = gt + + # get number of classes (can't be len(self.learning_map) because there + # are multiple repeated entries, so the number that matters is how many + # there are for the xentropy) + self.nclasses = len(self.learning_map_inv) + print(self.nclasses) + # sanity checks + + # make sure directory exists + if os.path.isdir(self.root): + print("Sequences folder exists! Using sequences from %s" % self.root) + else: + raise ValueError("Sequences folder doesn't exist! Exiting...") + + # make sure labels is a dict + assert(isinstance(self.labels, dict)) + + # make sure color_map is a dict + assert(isinstance(self.color_map, dict)) + + # make sure learning_map is a dict + assert(isinstance(self.learning_map, dict)) + + # make sure sequences is a list + assert(isinstance(self.sequences, list)) + + # placeholder for filenames + self.scan_files = [] + self.label_files = [] + + # fill in with names, checking that all sequences are complete + for seq in self.sequences: + # to string + seq = '{0:02d}'.format(int(seq)) + + print("parsing seq {}".format(seq)) + + # get paths for each + scan_path = os.path.join(self.root, seq, "velodyne") + label_path = os.path.join(self.root, seq, "labels") + + # get files + scan_files = [os.path.join(dp, f) for dp, dn, fn in os.walk( + os.path.expanduser(scan_path)) for f in fn if is_scan(f)] + label_files = [os.path.join(dp, f) for dp, dn, fn in os.walk( + os.path.expanduser(label_path)) for f in fn if is_label(f)] + + # check all scans have labels + if self.gt: + assert(len(scan_files) == len(label_files)) + + # extend list + self.scan_files.extend(scan_files) + self.label_files.extend(label_files) + + # sort for correspondance + self.scan_files.sort() + self.label_files.sort() + + print("Using {} scans from sequences {}".format(len(self.scan_files), + self.sequences)) + + def __getitem__(self, index): + # get item in tensor shape +# print(index) + scan_file = self.scan_files[index] + if self.gt: + label_file = self.label_files[index] + + # open a semantic laserscan + if self.gt: + scan = SemLaserScan(self.color_map, + project=True, + H=self.sensor_img_H, + W=self.sensor_img_W, + fov_up=self.sensor_fov_up, + fov_down=self.sensor_fov_down) + else: + scan = LaserScan(project=True, + H=self.sensor_img_H, + W=self.sensor_img_W, + fov_up=self.sensor_fov_up, + fov_down=self.sensor_fov_down) + + # open and obtain scan + scan.open_scan(scan_file) + if self.gt: + scan.open_label(label_file) + # map unused classes to used classes (also for projection) + scan.sem_label = self.map(scan.sem_label, self.learning_map) + scan.proj_sem_label = self.map(scan.proj_sem_label, self.learning_map) + + # make a tensor of the uncompressed data (with the max num points) + unproj_n_points = scan.points.shape[0] + unproj_xyz = torch.full((self.max_points, 3), -1.0, dtype=torch.float) + unproj_xyz[:unproj_n_points] = torch.from_numpy(scan.points) + unproj_range = torch.full([self.max_points], -1.0, dtype=torch.float) + unproj_range[:unproj_n_points] = torch.from_numpy(scan.unproj_range) + unproj_remissions = torch.full([self.max_points], -1.0, dtype=torch.float) + unproj_remissions[:unproj_n_points] = torch.from_numpy(scan.remissions) + if self.gt: + unproj_labels = torch.full([self.max_points], -1.0, dtype=torch.int32) + unproj_labels[:unproj_n_points] = torch.from_numpy(scan.sem_label) + else: + unproj_labels = [] + + # get points and labels + proj_range = torch.from_numpy(scan.proj_range).clone() + proj_xyz = torch.from_numpy(scan.proj_xyz).clone() + proj_remission = torch.from_numpy(scan.proj_remission).clone() + proj_mask = torch.from_numpy(scan.proj_mask) + if self.gt: + proj_labels = torch.from_numpy(scan.proj_sem_label).clone() + proj_labels = proj_labels * proj_mask + else: + proj_labels = [] + proj_x = torch.full([self.max_points], -1, dtype=torch.long) + proj_x[:unproj_n_points] = torch.from_numpy(scan.proj_x) + proj_y = torch.full([self.max_points], -1, dtype=torch.long) + proj_y[:unproj_n_points] = torch.from_numpy(scan.proj_y) + proj = torch.cat([proj_range.unsqueeze(0).clone(), + proj_xyz.clone().permute(2,0,1), + proj_remission.unsqueeze(0).clone()]) + proj = (proj - self.sensor_img_means[:, None, None]) / self.sensor_img_stds[:, None, None] + proj = proj * proj_mask.float() + + # get name and sequence + path_norm = os.path.normpath(scan_file) + path_split = path_norm.split(os.sep) + path_seq = path_split[-3] + path_name = path_split[-1].replace(".bin", ".label") + + return proj, proj_mask, proj_labels, unproj_labels, path_seq, path_name, \ + proj_x, proj_y, proj_range, unproj_range, proj_xyz, unproj_xyz, proj_remission, unproj_remissions, unproj_n_points + + def __len__(self): + return len(self.scan_files) + + @staticmethod + def map(label, mapdict): + # put label from original values to xentropy + # or vice-versa, depending on dictionary values + # make learning map a lookup table + maxkey = 0 + for key, data in mapdict.items(): + if isinstance(data, list): + nel = len(data) + else: + nel = 1 + if key > maxkey: + maxkey = key + # +100 hack making lut bigger just in case there are unknown labels + if nel > 1: + lut = np.zeros((maxkey + 100, nel), dtype=np.int32) + else: + lut = np.zeros((maxkey + 100), dtype=np.int32) + for key, data in mapdict.items(): + try: + lut[key] = data + except IndexError: + print("Wrong key ", key) + # do the mapping + return lut[label] + + +class Parser(): + # standard conv, BN, relu + def __init__(self, + root, # directory for data + train_sequences, # sequences to train + valid_sequences, # sequences to validate. + test_sequences, # sequences to test (if none, don't get) + labels, # labels in data + color_map, # color for each label + learning_map, # mapping for training labels + learning_map_inv, # recover labels from xentropy + sensor, # sensor to use + max_points, # max points in each scan in entire dataset + batch_size, # batch size for train and val + workers, # threads to load data + gt=True, # get gt? + shuffle_train=True): # shuffle training set? + super(Parser, self).__init__() + + # if I am training, get the dataset + self.root = root + self.train_sequences = train_sequences + self.valid_sequences = valid_sequences + self.test_sequences = test_sequences + self.labels = labels + self.color_map = color_map + self.learning_map = learning_map + self.learning_map_inv = learning_map_inv + self.sensor = sensor + self.max_points = max_points + self.batch_size = batch_size + self.workers = workers + self.gt = gt + self.shuffle_train = shuffle_train + + # number of classes that matters is the one for xentropy + self.nclasses = len(self.learning_map_inv) + print(self.nclasses) + # Data loading code + if self.train_sequences: + self.train_dataset = SemanticKitti(root=self.root, + sequences=self.train_sequences, + labels=self.labels, + color_map=self.color_map, + learning_map=self.learning_map, + learning_map_inv=self.learning_map_inv, + sensor=self.sensor, + max_points=max_points, + gt=self.gt) + + self.trainloader = torch.utils.data.DataLoader(self.train_dataset, + batch_size=self.batch_size, + shuffle=True, + num_workers=self.workers, + pin_memory=True, + drop_last=True) + assert len(self.trainloader) > 0 + self.trainiter = iter(self.trainloader) + if self.valid_sequences: + self.valid_dataset = SemanticKitti(root=self.root, + sequences=self.valid_sequences, + labels=self.labels, + color_map=self.color_map, + learning_map=self.learning_map, + learning_map_inv=self.learning_map_inv, + sensor=self.sensor, + max_points=max_points, + gt=self.gt) + + self.validloader = torch.utils.data.DataLoader(self.valid_dataset, + batch_size=self.batch_size, + shuffle=False, + num_workers=self.workers, + pin_memory=True, + drop_last=True) + assert len(self.validloader) > 0 + self.validiter = iter(self.validloader) + + if self.test_sequences: + self.test_dataset = SemanticKitti(root=self.root, + sequences=self.test_sequences, + labels=self.labels, + color_map=self.color_map, + learning_map=self.learning_map, + learning_map_inv=self.learning_map_inv, + sensor=self.sensor, + max_points=max_points, + gt=False) + + self.testloader = torch.utils.data.DataLoader(self.test_dataset, + batch_size=self.batch_size, + shuffle=False, + num_workers=self.workers, + pin_memory=True, + drop_last=True) + assert len(self.testloader) > 0 + self.testiter = iter(self.testloader) + + def get_train_batch(self): + scans = self.trainiter.next() + return scans + + def get_train_set(self): + return self.trainloader + + def get_valid_batch(self): + scans = self.validiter.next() + return scans + + def get_valid_set(self): + return self.validloader + + def get_test_batch(self): + scans = self.testiter.next() + return scans + + def get_test_set(self): + return self.testloader + + def get_train_size(self): + return len(self.trainloader) + + def get_valid_size(self): + return len(self.validloader) + + def get_test_size(self): + return len(self.testloader) + + def get_n_classes(self): + return self.nclasses + + def get_original_class_string(self, idx): + return self.labels[idx] + + def get_xentropy_class_string(self, idx): + return self.labels[self.learning_map_inv[idx]] + + def to_original(self, label): + # put label in original values + return SemanticKitti.map(label, self.learning_map_inv) + + def to_xentropy(self, label): + # put label in xentropy values + return SemanticKitti.map(label, self.learning_map) + + def to_color(self, label): + # put label in original values + label = SemanticKitti.map(label, self.learning_map_inv) + # put label in color + return SemanticKitti.map(label, self.color_map) diff --git a/src/tasks/semantic/decoders/.DS_Store b/src/tasks/semantic/decoders/.DS_Store new file mode 100644 index 0000000..a292f56 Binary files /dev/null and b/src/tasks/semantic/decoders/.DS_Store differ diff --git a/src/tasks/semantic/decoders/SAC.py b/src/tasks/semantic/decoders/SAC.py new file mode 100644 index 0000000..40db643 --- /dev/null +++ b/src/tasks/semantic/decoders/SAC.py @@ -0,0 +1,133 @@ +# This file was modified from https://github.com/BobLiu20/YOLOv3_PyTorch +# It needed to be modified in order to accomodate for different strides in the + +import torch.nn as nn +from collections import OrderedDict +import torch.nn.functional as F + + +class BasicBlock(nn.Module): + def __init__(self, inplanes, planes, bn_d=0.1): + super(BasicBlock, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes[0], kernel_size=1, + stride=1, padding=0, bias=False) + self.bn1 = nn.BatchNorm2d(planes[0], momentum=bn_d) + self.relu1 = nn.LeakyReLU(0.1) + self.conv2 = nn.Conv2d(planes[0], planes[1], kernel_size=3, + stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes[1], momentum=bn_d) + self.relu2 = nn.LeakyReLU(0.1) + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu1(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu2(out) + + out += residual + return out + + +# ****************************************************************************** + +class Decoder(nn.Module): + """ + Class for DarknetSeg. Subclasses PyTorch's own "nn" module + """ + + def __init__(self, params, stub_skips, OS=32, feature_depth=1024): + super(Decoder, self).__init__() + self.backbone_OS = OS + self.backbone_feature_depth = feature_depth + self.drop_prob = params["dropout"] + self.bn_d = params["bn_d"] + + # stride play + self.strides = [1, 1, 2, 2, 2] + # check current stride + current_os = 1 + for s in self.strides: + current_os *= s + print("Decoder original OS: ", int(current_os)) + # redo strides according to needed stride + for i, stride in enumerate(self.strides): + if int(current_os) != self.backbone_OS: + if stride == 2: + current_os /= 2 + self.strides[i] = 1 + if int(current_os) == self.backbone_OS: + break + print("Decoder new OS: ", int(current_os)) + print("Decoder strides: ", self.strides) + + # decoder + self.dec5 = self._make_dec_layer(BasicBlock, + [self.backbone_feature_depth, 256], + bn_d=self.bn_d, + stride=self.strides[0]) + self.dec4 = self._make_dec_layer(BasicBlock, [256, 256], bn_d=self.bn_d, + stride=self.strides[1]) + self.dec3 = self._make_dec_layer(BasicBlock, [256, 128], bn_d=self.bn_d, + stride=self.strides[2]) + self.dec2 = self._make_dec_layer(BasicBlock, [128, 64], bn_d=self.bn_d, + stride=self.strides[3]) + self.dec1 = self._make_dec_layer(BasicBlock, [64, 32], bn_d=self.bn_d, + stride=self.strides[4]) + + # layer list to execute with skips + self.layers = [self.dec5, self.dec4, self.dec3, self.dec2, self.dec1] + + # for a bit of fun + self.dropout = nn.Dropout2d(self.drop_prob) + + # last channels + self.last_channels = 32 + + def _make_dec_layer(self, block, planes, bn_d=0.1, stride=2,flag=True): + layers = [] + + # downsample + if stride==2: + layers.append(("upconv", nn.ConvTranspose2d(planes[0], planes[1], + kernel_size=[1, 4], stride=[1, 2], + padding=[0, 1]))) + else: + layers.append(("conv", nn.Conv2d(planes[0], planes[1], + kernel_size=3, padding=1))) + layers.append(("bn", nn.BatchNorm2d(planes[1], momentum=bn_d))) + layers.append(("relu", nn.LeakyReLU(0.1))) + + # blocks + layers.append(("residual", block(planes[1], planes, bn_d))) + + return nn.Sequential(OrderedDict(layers)) + + def run_layer(self, x, layer, skips, os): + feats = layer(x) # up + if feats.shape[-1] > x.shape[-1]: + os //= 2 # match skip + feats = feats + skips[os].detach() # add skip + x = feats + return x, skips, os + + def forward(self, x, skips): + os = self.backbone_OS + + # run layers + x1, skips, os = self.run_layer(x, self.dec5, skips, os) + x2, skips, os = self.run_layer(x1, self.dec4, skips, os) + x3, skips, os = self.run_layer(x2, self.dec3, skips, os) + x4, skips, os = self.run_layer(x3, self.dec2, skips, os) + x5, skips, os = self.run_layer(x4, self.dec1, skips, os) + + x5 = self.dropout(x5) + + return [x5, x4, x3, x2, x1] + + def get_last_depth(self): + return self.last_channels diff --git a/src/tasks/semantic/decoders/__init__.py b/src/tasks/semantic/decoders/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tasks/semantic/decoders/__pycache__/.DS_Store b/src/tasks/semantic/decoders/__pycache__/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/src/tasks/semantic/decoders/__pycache__/.DS_Store differ diff --git a/src/tasks/semantic/decoders/__pycache__/SAC.cpython-36.pyc b/src/tasks/semantic/decoders/__pycache__/SAC.cpython-36.pyc new file mode 100644 index 0000000..c2ecb4f Binary files /dev/null and b/src/tasks/semantic/decoders/__pycache__/SAC.cpython-36.pyc differ diff --git a/src/tasks/semantic/demo.py b/src/tasks/semantic/demo.py new file mode 100755 index 0000000..43a0603 --- /dev/null +++ b/src/tasks/semantic/demo.py @@ -0,0 +1,98 @@ + +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import argparse +import subprocess +import datetime +import yaml +from shutil import copyfile +import os +import shutil +import __init__ as booger + +from tasks.semantic.modules.demo_user import * + + +if __name__ == '__main__': + parser = argparse.ArgumentParser("./demo.py") + parser.add_argument( + '--dataset', '-d', + type=str, + default = "../../../sample_data/", + help='Dataset to sample' + ) + parser.add_argument( + '--log', '-l', + type=str, + default= '../../../sample_output/', + help='Directory to put the predictions. Default: ~/logs/date+time' + ) + parser.add_argument( + '--model', '-m', + type=str, + required=True, + default=None, + help='Directory to get the trained model.' + ) + FLAGS, unparsed = parser.parse_known_args() + + # print summary of what we will do + print("----------") + print("INTERFACE:") + print("dataset", FLAGS.dataset) + print("log", FLAGS.log) + print("model", FLAGS.model) + print("----------\n") + + + # open arch config file + try: + print("Opening arch config file from %s" % FLAGS.model) + ARCH = yaml.safe_load(open(FLAGS.model + "/arch_cfg.yaml", 'r')) + except Exception as e: + print(e) + print("Error opening arch yaml file.") + quit() + + # open data config file + try: + print("Opening data config file from %s" % FLAGS.model) + DATA = yaml.safe_load(open(FLAGS.model + "/data_cfg.yaml", 'r')) + except Exception as e: + print(e) + print("Error opening data yaml file.") + quit() + + # create log folder + try: + if os.path.isdir(FLAGS.log): + shutil.rmtree(FLAGS.log) + os.makedirs(FLAGS.log) + os.makedirs(os.path.join(FLAGS.log, "sequences")) + + for seq in DATA["split"]["sample"]: + seq = '{0:02d}'.format(int(seq)) + print("sample_list",seq) + os.makedirs(os.path.join(FLAGS.log,"sequences", str(seq))) + os.makedirs(os.path.join(FLAGS.log,"sequences", str(seq), "predictions")) + + except Exception as e: + print(e) + print("Error creating log directory. Check permissions!") + raise + + except Exception as e: + print(e) + print("Error creating log directory. Check permissions!") + quit() + + # does model folder exist? + if os.path.isdir(FLAGS.model): + print("model folder exists! Using model from %s" % (FLAGS.model)) + else: + print("model folder doesnt exist! Can't infer...") + quit() + # create user and infer dataset + user = User(ARCH, DATA, FLAGS.dataset, FLAGS.log, FLAGS.model) + user.infer() diff --git a/src/tasks/semantic/evaluate_biou.py b/src/tasks/semantic/evaluate_biou.py new file mode 100755 index 0000000..c98097d --- /dev/null +++ b/src/tasks/semantic/evaluate_biou.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import argparse +import os +import yaml +import sys +import numpy as np +import torch +import __init__ as booger + +from tasks.semantic.modules.ioueval import biouEval +from common.laserscan import SemLaserScan + +# possible splits +splits = ["train", "valid", "test"] + +if __name__ == '__main__': + parser = argparse.ArgumentParser("./evaluate_biou.py") + parser.add_argument( + '--dataset', '-d', + type=str, + required=True, + help='Dataset dir. No Default', + ) + parser.add_argument( + '--predictions', '-p', + type=str, + required=None, + help='Prediction dir. Same organization as dataset, but predictions in' + 'each sequences "prediction" directory. No Default. If no option is set' + ' we look for the labels in the same directory as dataset' + ) + parser.add_argument( + '--split', '-s', + type=str, + required=False, + choices=["train", "valid", "test"], + default="valid", + help='Split to evaluate on. One of ' + + str(splits) + '. Defaults to %(default)s', + ) + parser.add_argument( + '--data_cfg', '-dc', + type=str, + required=False, + default="config/labels/semantic-kitti.yaml", + help='Dataset config file. Defaults to %(default)s', + ) + parser.add_argument( + '--border', '-bs', + type=int, + required=False, + default=1, + help='Border size. Defaults to %(default)s', + ) + parser.add_argument( + '--conn', '-c', + type=int, + required=False, + default=4, + help='Kernel connectivity. Defaults to %(default)s', + ) + FLAGS, unparsed = parser.parse_known_args() + + # fill in real predictions dir + if FLAGS.predictions is None: + FLAGS.predictions = FLAGS.dataset + + # print summary of what we will do + print("*" * 80) + print("INTERFACE:") + print("Data: ", FLAGS.dataset) + print("Predictions: ", FLAGS.predictions) + print("Split: ", FLAGS.split) + print("Config: ", FLAGS.data_cfg) + print("Border Mask Size", FLAGS.border) + print("Border Mask Connectivity", FLAGS.conn) + print("*" * 80) + + # assert split + assert(FLAGS.split in splits) + + # open data config file + try: + print("Opening data config file %s" % FLAGS.data_cfg) + DATA = yaml.safe_load(open(FLAGS.data_cfg, 'r')) + except Exception as e: + print(e) + print("Error opening data yaml file.") + quit() + + # get number of interest classes, and the label mappings + class_strings = DATA["labels"] + class_remap = DATA["learning_map"] + class_inv_remap = DATA["learning_map_inv"] + class_ignore = DATA["learning_ignore"] + nr_classes = len(class_inv_remap) + + # make lookup table for mapping + maxkey = 0 + for key, data in class_remap.items(): + if key > maxkey: + maxkey = key + # +100 hack making lut bigger just in case there are unknown labels + remap_lut = np.zeros((maxkey + 100), dtype=np.int32) + for key, data in class_remap.items(): + try: + remap_lut[key] = data + except IndexError: + print("Wrong key ", key) + # print(remap_lut) + + # create evaluator + ignore = [] + for cl, ign in class_ignore.items(): + if ign: + x_cl = int(cl) + ignore.append(x_cl) + print("Ignoring xentropy class ", x_cl, " in IoU evaluation") + + # create evaluator + device = torch.device( + "cuda") if torch.cuda.is_available() else torch.device('cpu') + evaluator = biouEval(nr_classes, device, ignore, + FLAGS.border, FLAGS.conn) + evaluator.reset() + + # get test set + test_sequences = DATA["split"][FLAGS.split] + + # get scan paths + scan_names = [] + for sequence in test_sequences: + sequence = '{0:02d}'.format(int(sequence)) + scan_paths = os.path.join(FLAGS.dataset, "sequences", + str(sequence), "velodyne") + # populate the scan names + seq_scan_names = [os.path.join(dp, f) for dp, dn, fn in os.walk( + os.path.expanduser(scan_paths)) for f in fn if ".bin" in f] + seq_scan_names.sort() + scan_names.extend(seq_scan_names) + # print(scan_names) + + # get label paths + label_names = [] + for sequence in test_sequences: + sequence = '{0:02d}'.format(int(sequence)) + label_paths = os.path.join(FLAGS.dataset, "sequences", + str(sequence), "labels") + # populate the label names + seq_label_names = [os.path.join(dp, f) for dp, dn, fn in os.walk( + os.path.expanduser(label_paths)) for f in fn if ".label" in f] + seq_label_names.sort() + label_names.extend(seq_label_names) + # print(label_names) + + # get predictions paths + pred_names = [] + for sequence in test_sequences: + sequence = '{0:02d}'.format(int(sequence)) + pred_paths = os.path.join(FLAGS.predictions, "sequences", + sequence, "predictions") + # populate the label names + seq_pred_names = [os.path.join(dp, f) for dp, dn, fn in os.walk( + os.path.expanduser(pred_paths)) for f in fn if ".label" in f] + seq_pred_names.sort() + pred_names.extend(seq_pred_names) + # print(pred_names) + + # check that I have the same number of files + # print("labels: ", len(label_names)) + # print("predictions: ", len(pred_names)) + assert(len(label_names) == len(scan_names) and + len(label_names) == len(pred_names)) + + print("Evaluating sequences: ") + # open each file, get the tensor, and make the iou comparison + for scan_file, label_file, pred_file in zip(scan_names, label_names, pred_names): + print("evaluating label ", label_file, "with", pred_file) + # open label + label = SemLaserScan(project=True) + label.open_scan(scan_file) + label.open_label(label_file) + u_label_sem = remap_lut[label.sem_label] # remap to xentropy format + p_label_sem = remap_lut[label.proj_sem_label] # remap to xentropy format + u_scan_px = label.proj_x + u_scan_py = label.proj_y + + # open prediction + pred = SemLaserScan(project=True) + pred.open_scan(scan_file) + pred.open_label(pred_file) + u_pred_sem = remap_lut[pred.sem_label] # remap to xentropy format + + # add single scan to evaluation + evaluator.addBorderBatch1d(p_label_sem, u_pred_sem, u_label_sem, + u_scan_px, u_scan_py) + + # when I am done, print the evaluation + m_accuracy = evaluator.getacc() + m_jaccard, class_jaccard = evaluator.getIoU() + + print('Validation set:\n' + 'bAcc avg {m_accuracy:.3f}\n' + 'bIoU avg {m_jaccard:.3f}'.format(m_accuracy=m_accuracy, + m_jaccard=m_jaccard)) + # print also classwise + for i, jacc in enumerate(class_jaccard): + if i not in ignore: + print('bIoU class {i:} [{class_str:}] = {jacc:.3f}'.format( + i=i, class_str=class_strings[class_inv_remap[i]], jacc=jacc)) + + # print for spreadsheet + print("*" * 80) + print("below can be copied straight for paper table") + for i, jacc in enumerate(class_jaccard): + if i not in ignore: + sys.stdout.write('{jacc:.3f}'.format(jacc=jacc.item())) + sys.stdout.write(",") + sys.stdout.write('{jacc:.3f}'.format(jacc=m_jaccard.item())) + sys.stdout.write(",") + sys.stdout.write('{acc:.3f}'.format(acc=m_accuracy.item())) + sys.stdout.write('\n') + sys.stdout.flush() diff --git a/src/tasks/semantic/evaluate_iou.py b/src/tasks/semantic/evaluate_iou.py new file mode 100755 index 0000000..7f4e038 --- /dev/null +++ b/src/tasks/semantic/evaluate_iou.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import argparse +import os +import yaml +import sys +import numpy as np +import torch +import __init__ as booger + +from tasks.semantic.modules.ioueval import iouEval +from common.laserscan import SemLaserScan + +# possible splits +splits = ["train", "valid", "test"] + +if __name__ == '__main__': + parser = argparse.ArgumentParser("./evaluate_iou.py") + parser.add_argument( + '--dataset', '-d', + type=str, + required=True, + help='Dataset dir. No Default', + ) + parser.add_argument( + '--predictions', '-p', + type=str, + required=None, + help='Prediction dir. Same organization as dataset, but predictions in' + 'each sequences "prediction" directory. No Default. If no option is set' + ' we look for the labels in the same directory as dataset' + ) + parser.add_argument( + '--split', '-s', + type=str, + required=False, + choices=["train", "valid", "test"], + default="valid", + help='Split to evaluate on. One of ' + + str(splits) + '. Defaults to %(default)s', + ) + parser.add_argument( + '--data_cfg', '-dc', + type=str, + required=False, + default="config/labels/semantic-kitti.yaml", + help='Dataset config file. Defaults to %(default)s', + ) + parser.add_argument( + '--limit', '-l', + type=int, + required=False, + default=None, + help='Limit to the first "--limit" points of each scan. Useful for' + ' evaluating single scan from aggregated pointcloud.' + ' Defaults to %(default)s', + ) + + FLAGS, unparsed = parser.parse_known_args() + + # fill in real predictions dir + if FLAGS.predictions is None: + FLAGS.predictions = FLAGS.dataset + + # print summary of what we will do + print("*" * 80) + print("INTERFACE:") + print("Data: ", FLAGS.dataset) + print("Predictions: ", FLAGS.predictions) + print("Split: ", FLAGS.split) + print("Config: ", FLAGS.data_cfg) + print("Limit: ", FLAGS.limit) + print("*" * 80) + + # assert split + assert(FLAGS.split in splits) + + # open data config file + try: + print("Opening data config file %s" % FLAGS.data_cfg) + DATA = yaml.safe_load(open(FLAGS.data_cfg, 'r')) + except Exception as e: + print(e) + print("Error opening data yaml file.") + quit() + + # get number of interest classes, and the label mappings + class_strings = DATA["labels"] + class_remap = DATA["learning_map"] + class_inv_remap = DATA["learning_map_inv"] + class_ignore = DATA["learning_ignore"] + nr_classes = len(class_inv_remap) + + # make lookup table for mapping + maxkey = 0 + for key, data in class_remap.items(): + if key > maxkey: + maxkey = key + # +100 hack making lut bigger just in case there are unknown labels + remap_lut = np.zeros((maxkey + 100), dtype=np.int32) + for key, data in class_remap.items(): + try: + remap_lut[key] = data + except IndexError: + print("Wrong key ", key) + # print(remap_lut) + + # create evaluator + ignore = [] + for cl, ign in class_ignore.items(): + if ign: + x_cl = int(cl) + ignore.append(x_cl) + print("Ignoring xentropy class ", x_cl, " in IoU evaluation") + + # create evaluator + device = torch.device("cpu") + evaluator = iouEval(nr_classes, device, ignore) + evaluator.reset() + + # get test set + test_sequences = DATA["split"][FLAGS.split] + + # get scan paths + scan_names = [] + for sequence in test_sequences: + sequence = '{0:02d}'.format(int(sequence)) + scan_paths = os.path.join(FLAGS.dataset, "sequences", + str(sequence), "velodyne") + # populate the scan names + seq_scan_names = [os.path.join(dp, f) for dp, dn, fn in os.walk( + os.path.expanduser(scan_paths)) for f in fn if ".bin" in f] + seq_scan_names.sort() + scan_names.extend(seq_scan_names) + # print(scan_names) + + # get label paths + label_names = [] + for sequence in test_sequences: + sequence = '{0:02d}'.format(int(sequence)) + label_paths = os.path.join(FLAGS.dataset, "sequences", + str(sequence), "labels") + # populate the label names + seq_label_names = [os.path.join(dp, f) for dp, dn, fn in os.walk( + os.path.expanduser(label_paths)) for f in fn if ".label" in f] + seq_label_names.sort() + label_names.extend(seq_label_names) + # print(label_names) + + # get predictions paths + pred_names = [] + for sequence in test_sequences: + sequence = '{0:02d}'.format(int(sequence)) + pred_paths = os.path.join(FLAGS.predictions, "sequences", + sequence, "predictions") + # populate the label names + seq_pred_names = [os.path.join(dp, f) for dp, dn, fn in os.walk( + os.path.expanduser(pred_paths)) for f in fn if ".label" in f] + seq_pred_names.sort() + pred_names.extend(seq_pred_names) + # print(pred_names) + + # check that I have the same number of files + # print("labels: ", len(label_names)) + # print("predictions: ", len(pred_names)) + assert(len(label_names) == len(scan_names) and + len(label_names) == len(pred_names)) + + print("Evaluating sequences: ") + # open each file, get the tensor, and make the iou comparison + for scan_file, label_file, pred_file in zip(scan_names, label_names, pred_names): + print("evaluating label ", label_file, "with", pred_file) + # open label + label = SemLaserScan(project=False) + label.open_scan(scan_file) + label.open_label(label_file) + u_label_sem = remap_lut[label.sem_label] # remap to xentropy format + if FLAGS.limit is not None: + u_label_sem = u_label_sem[:FLAGS.limit] + + # open prediction + pred = SemLaserScan(project=False) + pred.open_scan(scan_file) + pred.open_label(pred_file) + u_pred_sem = remap_lut[pred.sem_label] # remap to xentropy format + if FLAGS.limit is not None: + u_pred_sem = u_pred_sem[:FLAGS.limit] + + # add single scan to evaluation + evaluator.addBatch(u_pred_sem, u_label_sem) + + # when I am done, print the evaluation + m_accuracy = evaluator.getacc() + m_jaccard, class_jaccard = evaluator.getIoU() + + print('Validation set:\n' + 'Acc avg {m_accuracy:.3f}\n' + 'IoU avg {m_jaccard:.3f}'.format(m_accuracy=m_accuracy, + m_jaccard=m_jaccard)) + # print also classwise + for i, jacc in enumerate(class_jaccard): + if i not in ignore: + print('IoU class {i:} [{class_str:}] = {jacc:.3f}'.format( + i=i, class_str=class_strings[class_inv_remap[i]], jacc=jacc)) + + # print for spreadsheet + print("*" * 80) + print("below can be copied straight for paper table") + for i, jacc in enumerate(class_jaccard): + if i not in ignore: + sys.stdout.write('{jacc:.3f}'.format(jacc=jacc.item())) + sys.stdout.write(",") + sys.stdout.write('{jacc:.3f}'.format(jacc=m_jaccard.item())) + sys.stdout.write(",") + sys.stdout.write('{acc:.3f}'.format(acc=m_accuracy.item())) + sys.stdout.write('\n') + sys.stdout.flush() diff --git a/src/tasks/semantic/infer.py b/src/tasks/semantic/infer.py new file mode 100755 index 0000000..923ef09 --- /dev/null +++ b/src/tasks/semantic/infer.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import argparse +import subprocess +import datetime +import yaml +from shutil import copyfile +import os +import shutil +import __init__ as booger + +from tasks.semantic.modules.user import * + + +if __name__ == '__main__': + parser = argparse.ArgumentParser("./infer.py") + parser.add_argument( + '--dataset', '-d', + type=str, + required=True, + help='Dataset to train with. No Default', + ) + parser.add_argument( + '--log', '-l', + type=str, + default=os.path.expanduser("~") + '/logs/' + + datetime.datetime.now().strftime("%Y-%-m-%d-%H:%M") + '/', + help='Directory to put the predictions. Default: ~/logs/date+time' + ) + parser.add_argument( + '--model', '-m', + type=str, + required=True, + default=None, + help='Directory to get the trained model.' + ) + FLAGS, unparsed = parser.parse_known_args() + + # print summary of what we will do + print("----------") + print("INTERFACE:") + print("dataset", FLAGS.dataset) + print("log", FLAGS.log) + print("model", FLAGS.model) + print("----------\n") + + + # open arch config file + try: + print("Opening arch config file from %s" % FLAGS.model) + ARCH = yaml.safe_load(open(FLAGS.model + "/arch_cfg.yaml", 'r')) + except Exception as e: + print(e) + print("Error opening arch yaml file.") + quit() + + # open data config file + try: + print("Opening data config file from %s" % FLAGS.model) + DATA = yaml.safe_load(open(FLAGS.model + "/data_cfg.yaml", 'r')) + except Exception as e: + print(e) + print("Error opening data yaml file.") + quit() + + # create log folder + try: + if os.path.isdir(FLAGS.log): + shutil.rmtree(FLAGS.log) + os.makedirs(FLAGS.log) + os.makedirs(os.path.join(FLAGS.log, "sequences")) + for seq in DATA["split"]["train"]: + seq = '{0:02d}'.format(int(seq)) + print("train", seq) + os.makedirs(os.path.join(FLAGS.log, "sequences", seq)) + os.makedirs(os.path.join(FLAGS.log, "sequences", seq, "predictions")) + for seq in DATA["split"]["valid"]: + seq = '{0:02d}'.format(int(seq)) + print("valid", seq) + os.makedirs(os.path.join(FLAGS.log, "sequences", seq)) + os.makedirs(os.path.join(FLAGS.log, "sequences", seq, "predictions")) + for seq in DATA["split"]["test"]: + seq = '{0:02d}'.format(int(seq)) + print("test", seq) + os.makedirs(os.path.join(FLAGS.log, "sequences", seq)) + os.makedirs(os.path.join(FLAGS.log, "sequences", seq, "predictions")) + except Exception as e: + print(e) + print("Error creating log directory. Check permissions!") + raise + + except Exception as e: + print(e) + print("Error creating log directory. Check permissions!") + quit() + + # does model folder exist? + if os.path.isdir(FLAGS.model): + print("model folder exists! Using model from %s" % (FLAGS.model)) + else: + print("model folder doesnt exist! Can't infer...") + quit() + + # create user and infer dataset + user = User(ARCH, DATA, FLAGS.dataset, FLAGS.log, FLAGS.model) + user.infer() diff --git a/src/tasks/semantic/modules/.DS_Store b/src/tasks/semantic/modules/.DS_Store new file mode 100644 index 0000000..a292f56 Binary files /dev/null and b/src/tasks/semantic/modules/.DS_Store differ diff --git a/src/tasks/semantic/modules/__init__.py b/src/tasks/semantic/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tasks/semantic/modules/__pycache__/.DS_Store b/src/tasks/semantic/modules/__pycache__/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/src/tasks/semantic/modules/__pycache__/.DS_Store differ diff --git a/src/tasks/semantic/modules/__pycache__/__init__.cpython-36.pyc b/src/tasks/semantic/modules/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..e7f24b1 Binary files /dev/null and b/src/tasks/semantic/modules/__pycache__/__init__.cpython-36.pyc differ diff --git a/src/tasks/semantic/modules/__pycache__/demo_user.cpython-36.pyc b/src/tasks/semantic/modules/__pycache__/demo_user.cpython-36.pyc new file mode 100644 index 0000000..8c32d92 Binary files /dev/null and b/src/tasks/semantic/modules/__pycache__/demo_user.cpython-36.pyc differ diff --git a/src/tasks/semantic/modules/__pycache__/ioueval.cpython-36.pyc b/src/tasks/semantic/modules/__pycache__/ioueval.cpython-36.pyc new file mode 100644 index 0000000..a458e75 Binary files /dev/null and b/src/tasks/semantic/modules/__pycache__/ioueval.cpython-36.pyc differ diff --git a/src/tasks/semantic/modules/__pycache__/segmentator.cpython-36.pyc b/src/tasks/semantic/modules/__pycache__/segmentator.cpython-36.pyc new file mode 100644 index 0000000..5809eb0 Binary files /dev/null and b/src/tasks/semantic/modules/__pycache__/segmentator.cpython-36.pyc differ diff --git a/src/tasks/semantic/modules/__pycache__/trainer.cpython-36.pyc b/src/tasks/semantic/modules/__pycache__/trainer.cpython-36.pyc new file mode 100644 index 0000000..2438714 Binary files /dev/null and b/src/tasks/semantic/modules/__pycache__/trainer.cpython-36.pyc differ diff --git a/src/tasks/semantic/modules/__pycache__/user.cpython-36.pyc b/src/tasks/semantic/modules/__pycache__/user.cpython-36.pyc new file mode 100644 index 0000000..2377a0c Binary files /dev/null and b/src/tasks/semantic/modules/__pycache__/user.cpython-36.pyc differ diff --git a/src/tasks/semantic/modules/demo_user.py b/src/tasks/semantic/modules/demo_user.py new file mode 100644 index 0000000..4d78250 --- /dev/null +++ b/src/tasks/semantic/modules/demo_user.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import torch +import torch.nn as nn +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms +import imp +import yaml +import time +from PIL import Image +import __init__ as booger +import collections +import copy +import cv2 +import os +import numpy as np +from tasks.semantic.modules.segmentator import * +from tasks.semantic.modules.trainer import * +from tasks.semantic.postproc.KNN import KNN + + +class User(): + def __init__(self, ARCH, DATA, datadir, logdir, modeldir): + # parameters + self.ARCH = ARCH + self.DATA = DATA + self.datadir = datadir + self.logdir = logdir + self.modeldir = modeldir + + # get the data + parserModule = imp.load_source("parserModule", + booger.TRAIN_PATH + '/tasks/semantic/dataset/' + + self.DATA["name"] + '/parser.py') + self.parser = parserModule.Parser(root=self.datadir, + train_sequences=None, + valid_sequences=None, + test_sequences=self.DATA["split"]["sample"], + labels=self.DATA["labels"], + color_map=self.DATA["color_map"], + learning_map=self.DATA["learning_map"], + learning_map_inv=self.DATA["learning_map_inv"], + sensor=self.ARCH["dataset"]["sensor"], + max_points=self.ARCH["dataset"]["max_points"], + batch_size=1, + workers=self.ARCH["train"]["workers"], + gt=True, + shuffle_train=False) + + # concatenate the encoder and the head + with torch.no_grad(): + self.model = Segmentator(self.ARCH, + self.parser.get_n_classes(), + self.modeldir) + + # use knn post processing? + self.post = None + if self.ARCH["post"]["KNN"]["use"]: + self.post = KNN(self.ARCH["post"]["KNN"]["params"], + self.parser.get_n_classes()) + + # GPU? + self.gpu = False + self.model_single = self.model + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print("Infering in device: ", self.device) + if torch.cuda.is_available() and torch.cuda.device_count() > 0: + cudnn.benchmark = True + cudnn.fastest = True + self.gpu = True + self.model.cuda() + + def infer(self): + # do test set + self.infer_subset(loader=self.parser.get_test_set(), + to_orig_fn=self.parser.to_original) + + print('Finished Infering') + + return + + def infer_subset(self, loader, to_orig_fn): + # switch to evaluate mode + self.model.eval() + + # empty the cache to infer in high res + if self.gpu: + torch.cuda.empty_cache() + + with torch.no_grad(): + end = time.time() + + for i, (proj_in, proj_mask, _, _, path_seq, path_name, p_x, p_y, proj_range, unproj_range, _, _, _, _, npoints) in enumerate(loader): + # first cut to rela size (batch size one allows it) + p_x = p_x[0, :npoints] + p_y = p_y[0, :npoints] + proj_range = proj_range[0, :npoints] + unproj_range = unproj_range[0, :npoints] + path_seq = path_seq[0] + path_name = path_name[0] + + if self.gpu: + proj_in = proj_in.cuda() + proj_mask = proj_mask.cuda() + p_x = p_x.cuda() + p_y = p_y.cuda() + + if self.post: + proj_range = proj_range.cuda() + unproj_range = unproj_range.cuda() + + proj_output, _, _, _, _ = self.model(proj_in, proj_mask) + proj_argmax = proj_output[0].argmax(dim=0) + + if self.post: + # knn postproc + unproj_argmax = self.post(proj_range, + unproj_range, + proj_argmax, + p_x, + p_y) + else: + # put in original pointcloud using indexes + unproj_argmax = proj_argmax[p_y, p_x] + + if torch.cuda.is_available(): + torch.cuda.synchronize() + + print("Infered seq", path_seq, "scan", path_name, + "in", time.time() - end, "sec") + + end = time.time() + + # save scan + # get the first scan in batch and project scan + pred_np = unproj_argmax.cpu().numpy() + pred_np = pred_np.reshape((-1)).astype(np.int32) + + # map to original label + pred_np = to_orig_fn(pred_np) + + # save scan + path = os.path.join(self.logdir, "sequences", + path_seq, "predictions", path_name) + pred_np.tofile(path) + depth = (cv2.normalize(proj_in[0][0].cpu().numpy(), None, alpha=0, beta=1, + norm_type=cv2.NORM_MINMAX, + dtype=cv2.CV_32F) * 255.0).astype(np.uint8) + print(depth.shape, proj_mask.shape,proj_argmax.shape) + out_img = cv2.applyColorMap( + depth, Trainer.get_mpl_colormap('viridis')) * proj_mask[0].cpu().numpy()[..., None] + # make label prediction + pred_color = self.parser.to_color((proj_argmax.cpu().numpy() * proj_mask[0].cpu().numpy()).astype(np.int32)) + out_img = np.concatenate([out_img, pred_color], axis=0) + print(path) + cv2.imwrite(path[:-6]+'.png',out_img) + + diff --git a/src/tasks/semantic/modules/ioueval.py b/src/tasks/semantic/modules/ioueval.py new file mode 100644 index 0000000..aa79ad5 --- /dev/null +++ b/src/tasks/semantic/modules/ioueval.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 + +# This file is covered by the LICENSE file in the root of this project. + +import torch +import numpy as np +import time + +from tasks.semantic.postproc.borderMask import borderMask + + +class iouEval: + def __init__(self, n_classes, device, ignore=None): + self.n_classes = n_classes + self.device = device + # if ignore is larger than n_classes, consider no ignoreIndex + self.ignore = torch.tensor(ignore).long() + self.include = torch.tensor( + [n for n in range(self.n_classes) if n not in self.ignore]).long() + print("[IOU EVAL] IGNORE: ", self.ignore) + print("[IOU EVAL] INCLUDE: ", self.include) + self.reset() + + def num_classes(self): + return self.n_classes + + def reset(self): + self.conf_matrix = torch.zeros( + (self.n_classes, self.n_classes), device=self.device).long() + self.ones = None + self.last_scan_size = None # for when variable scan size is used + + def addBatch(self, x, y): # x=preds, y=targets + # if numpy, pass to pytorch + # to tensor + if isinstance(x, np.ndarray): + x = torch.from_numpy(np.array(x)).long().to(self.device) + if isinstance(y, np.ndarray): + y = torch.from_numpy(np.array(y)).long().to(self.device) + + # sizes should be "batch_size x H x W" + x_row = x.reshape(-1) # de-batchify + y_row = y.reshape(-1) # de-batchify + + # idxs are labels and predictions + idxs = torch.stack([x_row, y_row], dim=0) + + # ones is what I want to add to conf when I + if self.ones is None or self.last_scan_size != idxs.shape[-1]: + self.ones = torch.ones((idxs.shape[-1]), device=self.device).long() + self.last_scan_size = idxs.shape[-1] + + # make confusion matrix (cols = gt, rows = pred) + self.conf_matrix = self.conf_matrix.index_put_( + tuple(idxs), self.ones, accumulate=True) + + def getStats(self): + # remove fp and fn from confusion on the ignore classes cols and rows + conf = self.conf_matrix.clone().double() + conf[self.ignore] = 0 + conf[:, self.ignore] = 0 + + # get the clean stats + tp = conf.diag() + fp = conf.sum(dim=1) - tp + fn = conf.sum(dim=0) - tp + return tp, fp, fn + + def getIoU(self): + tp, fp, fn = self.getStats() + intersection = tp + union = tp + fp + fn + 1e-15 + iou = intersection / union + iou_mean = (intersection[self.include] / union[self.include]).mean() + return iou_mean, iou # returns "iou mean", "iou per class" ALL CLASSES + + def getacc(self): + tp, fp, fn = self.getStats() + total_tp = tp.sum() + total = tp[self.include].sum() + fp[self.include].sum() + 1e-15 + acc_mean = total_tp / total + return acc_mean # returns "acc mean" + + +class biouEval(iouEval): + def __init__(self, n_classes, device, ignore=None, border_size=1, kern_conn=4): + super().__init__(n_classes, device, ignore) + self.border_size = border_size + self.kern_conn = kern_conn + + # check that I am only ignoring one class + if len(ignore) > 1: + raise ValueError("Length of ignored class list should be 1 or 0") + elif len(ignore) == 0: + ignore = None + else: + ignore = ignore[0] + + self.borderer = borderMask(self.n_classes, self.device, + self.border_size, self.kern_conn, + background_class=ignore) + self.reset() + + def reset(self): + super().reset() + return + + def addBorderBatch1d(self, range_y, x, y, px, py): + '''range_y=target as img, x=preds, y=targets, px,py=idxs of points of + pointcloud in range img + WARNING: Only batch size 1 works for now + ''' + # if numpy, pass to pytorch + # to tensor + if isinstance(range_y, np.ndarray): + range_y = torch.from_numpy(np.array(range_y)).long().to(self.device) + if isinstance(x, np.ndarray): + x = torch.from_numpy(np.array(x)).long().to(self.device) + if isinstance(y, np.ndarray): + y = torch.from_numpy(np.array(y)).long().to(self.device) + if isinstance(px, np.ndarray): + px = torch.from_numpy(np.array(px)).long().to(self.device) + if isinstance(py, np.ndarray): + py = torch.from_numpy(np.array(py)).long().to(self.device) + + # get border mask of range_y + border_mask_2d = self.borderer(range_y) + + # filter px, py according to if they are on border mask or not + border_mask_1d = border_mask_2d[0, py, px].byte() + + # get proper points from filtered x and y + x_in_mask = torch.masked_select(x, border_mask_1d) + y_in_mask = torch.masked_select(y, border_mask_1d) + + # add batch + self.addBatch(x_in_mask, y_in_mask) + + +if __name__ == "__main__": + # mock problem + nclasses = 2 + ignore = [] + + # test with 2 squares and a known IOU + lbl = torch.zeros((7, 7)).long() + argmax = torch.zeros((7, 7)).long() + + # put squares + lbl[2:4, 2:4] = 1 + argmax[3:5, 3:5] = 1 + + # make evaluator + eval = iouEval(nclasses, torch.device('cpu'), ignore) + + # run + eval.addBatch(argmax, lbl) + m_iou, iou = eval.getIoU() + print("*"*80) + print("Small iou mock problem") + print("IoU: ", m_iou) + print("IoU class: ", iou) + m_acc = eval.getacc() + print("Acc: ", m_acc) + print("*"*80) diff --git a/src/tasks/semantic/modules/segmentator.py b/src/tasks/semantic/modules/segmentator.py new file mode 100644 index 0000000..3a18978 --- /dev/null +++ b/src/tasks/semantic/modules/segmentator.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import imp +import torch +import torch.nn as nn +import torch.nn.functional as F +from tasks.semantic.postproc.CRF import CRF +import __init__ as booger + +class Segmentator(nn.Module): + def __init__(self, ARCH, nclasses, path=None, path_append="", strict=False): + super().__init__() + self.ARCH = ARCH + self.nclasses = 20 + self.path = path + self.path_append = path_append + self.strict = False + + bboneModule = imp.load_source("bboneModule", + booger.TRAIN_PATH + '/backbones/' + + self.ARCH["backbone"]["name"] + '.py') + self.backbone = bboneModule.Backbone(params=self.ARCH["backbone"]) + + # do a pass of the backbone to initialize the skip connections + xyz = torch.zeros((1, 3, + self.ARCH['dataset']['sensor']['img_prop']['height'], + self.ARCH['dataset']['sensor']['img_prop']['width'])) + stub = torch.zeros((1, + self.backbone.get_input_depth(), + self.ARCH["dataset"]["sensor"]["img_prop"]["height"], + self.ARCH["dataset"]["sensor"]["img_prop"]["width"])) + + if torch.cuda.is_available(): + stub = stub.cuda() + xyz = xyz.cuda() + self.backbone.cuda() + _, stub_skips = self.backbone(stub) + + decoderModule = imp.load_source("decoderModule", + booger.TRAIN_PATH + '/tasks/semantic/decoders/' + + self.ARCH["decoder"]["name"] + '.py') + self.decoder = decoderModule.Decoder(params=self.ARCH["decoder"], + stub_skips=stub_skips, + OS=self.ARCH["backbone"]["OS"], + feature_depth=self.backbone.get_last_depth()) + + self.head1 = nn.Sequential(nn.Dropout2d(p=ARCH["head"]["dropout"]), + nn.Conv2d(256, + self.nclasses, kernel_size=1, + stride=1, padding=0)) + + self.head2 = nn.Sequential(nn.Dropout2d(p=ARCH["head"]["dropout"]), + nn.Conv2d(256, + self.nclasses, kernel_size=1, + stride=1, padding=0)) + + self.head3 = nn.Sequential(nn.Dropout2d(p=ARCH["head"]["dropout"]), + nn.Conv2d(128, + self.nclasses, kernel_size=1, + stride=1, padding=0)) + + self.head4 = nn.Sequential(nn.Dropout2d(p=ARCH["head"]["dropout"]), + nn.Conv2d(64, + self.nclasses, kernel_size=1, + stride=1, padding=0)) + self.head5 = nn.Sequential(nn.Dropout2d(p=ARCH["head"]["dropout"]), + nn.Conv2d(32, + self.nclasses, kernel_size=3, + stride=1, padding=1)) + + + + if self.ARCH["post"]["CRF"]["use"]: + self.CRF = CRF(self.ARCH["post"]["CRF"]["params"], self.nclasses) + else: + self.CRF = None + + # train backbone? + if not self.ARCH["backbone"]["train"]: + for w in self.backbone.parameters(): + w.requires_grad = False + + # train decoder? + if not self.ARCH["decoder"]["train"]: + for w in self.decoder.parameters(): + w.requires_grad = False + + # train head? + if not self.ARCH["head"]["train"]: + for w in self.head.parameters(): + w.requires_grad = False + + # train CRF? + if self.CRF and not self.ARCH["post"]["CRF"]["train"]: + for w in self.CRF.parameters(): + w.requires_grad = False + + # print number of parameters and the ones requiring gradients + # print number of parameters and the ones requiring gradients + weights_total = sum(p.numel() for p in self.parameters()) + weights_grad = sum(p.numel() for p in self.parameters() if p.requires_grad) + print("Total number of parameters: ", weights_total) + print("Total number of parameters requires_grad: ", weights_grad) + + # breakdown by layer + weights_enc = sum(p.numel() for p in self.backbone.parameters()) + weights_dec = sum(p.numel() for p in self.decoder.parameters()) + weights_head = sum(p.numel() for p in self.head1.parameters())+\ + sum(p.numel() for p in self.head2.parameters())+\ + sum(p.numel() for p in self.head3.parameters())+\ + sum(p.numel() for p in self.head4.parameters())+\ + sum(p.numel() for p in self.head5.parameters()) + print("Param encoder ", weights_enc) + print("Param decoder ", weights_dec) + print("Param head ", weights_head) + if self.CRF: + weights_crf = sum(p.numel() for p in self.CRF.parameters()) + print("Param CRF ", weights_crf) + + # get weights + if path is not None: + # try backbone + try: + w_dict = torch.load(path + "/backbone", + map_location=lambda storage, loc: storage) + self.backbone.load_state_dict(w_dict, strict=True) + print("Successfully loaded model backbone weights") + except Exception as e: + print() + print("Couldn't load backbone, using random weights. Error: ", e) + if strict: + print("I'm in strict mode and failure to load weights blows me up :)") + raise e + + # try decoder + try: + w_dict = torch.load(path + "/segmentation_decoder", + map_location=lambda storage, loc: storage) + self.decoder.load_state_dict(w_dict, strict=True) + print("Successfully loaded model decoder weights") + except Exception as e: + print("Couldn't load decoder, using random weights. Error: ", e) + if strict: + print("I'm in strict mode and failure to load weights blows me up :)") + raise e + + # try head + try: + print(path_append+'./segmentation_head1') + w_dict = torch.load(path + "/segmentation_head1", + map_location=lambda storage, loc: storage) + self.head1.load_state_dict(w_dict, strict=True) + print("Successfully loaded model head weights") + except Exception as e: + print("Couldn't load head, using random weights. Error: ", e) + if strict: + print("I'm in strict mode and failure to load weights blows me up :)") + raise e + try: + w_dict = torch.load(path+ "/segmentation_head2", + map_location=lambda storage, loc: storage) + self.head2.load_state_dict(w_dict, strict=True) + print("Successfully loaded model head weights") + except Exception as e: + print("Couldn't load head, using random weights. Error: ", e) + if strict: + print("I'm in strict mode and failure to load weights blows me up :)") + raise e + try: + w_dict = torch.load(path + "/segmentation_head3", + map_location=lambda storage, loc: storage) + self.head3.load_state_dict(w_dict, strict=True) + print("Successfully loaded model head weights") + except Exception as e: + print("Couldn't load head, using random weights. Error: ", e) + if strict: + print("I'm in strict mode and failure to load weights blows me up :)") + raise e + + try: + w_dict = torch.load(path+ "/segmentation_head4", + map_location=lambda storage, loc: storage) + self.head4.load_state_dict(w_dict, strict=True) + print("Successfully loaded model head weights") + except Exception as e: + print("Couldn't load head, using random weights. Error: ", e) + if strict: + print("I'm in strict mode and failure to load weights blows me up :)") + raise e + + try: + w_dict = torch.load(path + "/segmentation_head5", + map_location=lambda storage, loc: storage) + self.head5.load_state_dict(w_dict, strict=True) + print("Successfully loaded model head weights") + except Exception as e: + print("Couldn't load head, using random weights. Error: ", e) + if strict: + print("I'm in strict mode and failure to load weights blows me up :)") + raise e + else: + print("No path to pretrained, using random init.") + + def forward(self, x, mask=None): + + feature, skips = self.backbone(x) + + y = self.decoder(feature, skips) + + z1 = self.head5(y[0]) + z1 = F.softmax(z1,dim=1) + + z2 = self.head4(y[1]) + z2 = F.softmax(z2,dim=1) + + z3 = self.head3(y[2]) + z3 = F.softmax(z3,dim=1) + + z4 = self.head2(y[3]) + z4 = F.softmax(z4,dim=1) + + z5 = self.head1(y[4]) + z5 = F.softmax(z5,dim=1) + + return [z1, z2, z3, z4, z5] + + def save_checkpoint(self, logdir, suffix=""): + # Save the weights + torch.save(self.backbone.state_dict(), logdir + + "/backbone" + suffix) + + torch.save(self.decoder.state_dict(), logdir + + "/segmentation_decoder" + suffix) + + torch.save(self.head1.state_dict(),logdir+"/segmentation_head1"+suffix) + torch.save(self.head2.state_dict(),logdir+"/segmentation_head2"+suffix) + torch.save(self.head3.state_dict(),logdir+"/segmentation_head3"+suffix) + torch.save(self.head4.state_dict(),logdir+"/segmentation_head4"+suffix) + torch.save(self.head5.state_dict(),logdir+"/segmentation_head5"+suffix) + diff --git a/src/tasks/semantic/modules/trainer.py b/src/tasks/semantic/modules/trainer.py new file mode 100644 index 0000000..d4a52c9 --- /dev/null +++ b/src/tasks/semantic/modules/trainer.py @@ -0,0 +1,467 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import torch +import torch.nn as nn +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms +import imp +import yaml +import time +from PIL import Image +import __init__ as booger +import collections +import copy +import cv2 +import os +import numpy as np +from matplotlib import pyplot as plt +import random +from common.logger import Logger +from common.avgmeter import * +from common.sync_batchnorm.batchnorm import convert_model +from common.warmupLR import * +from tasks.semantic.modules.segmentator import * +from tasks.semantic.modules.ioueval import * + + +class Trainer(): + def __init__(self, ARCH, DATA, datadir, logdir, path=None): + # parameters + self.ARCH = ARCH + self.DATA = DATA + self.datadir = datadir + self.log = logdir + self.path = path + + # put logger where it belongs + self.tb_logger = Logger(self.log + "/tb") + self.info = {"train_update": 0, + "train_loss": 0, + "train_acc": 0, + "train_iou": 0, + "valid_loss": 0, + "valid_acc": 0, + "valid_iou": 0, + "backbone_lr": 0, + "decoder_lr": 0, + "head_lr": 0, + "post_lr": 0} + + # get the data + parserModule = imp.load_source("parserModule", + booger.TRAIN_PATH + '/tasks/semantic/dataset/' + + self.DATA["name"] + '/parser.py') + self.parser = parserModule.Parser(root=self.datadir, + train_sequences=self.DATA["split"]["train"], + valid_sequences=self.DATA["split"]["valid"], + test_sequences=None, + labels=self.DATA["labels"], + color_map=self.DATA["color_map"], + learning_map=self.DATA["learning_map"], + learning_map_inv=self.DATA["learning_map_inv"], + sensor=self.ARCH["dataset"]["sensor"], + max_points=self.ARCH["dataset"]["max_points"], + batch_size=self.ARCH["train"]["batch_size"], + workers=self.ARCH["train"]["workers"], + gt=True, + shuffle_train=True) + + # weights for loss (and bias) + # weights for loss (and bias) + epsilon_w = self.ARCH["train"]["epsilon_w"] + content = torch.zeros(self.parser.get_n_classes(), dtype=torch.float) + for cl, freq in DATA["content"].items(): + x_cl = self.parser.to_xentropy(cl) # map actual class to xentropy class + content[x_cl] += freq + self.loss_w = 1 / (content + epsilon_w) # get weights + for x_cl, w in enumerate(self.loss_w): # ignore the ones necessary to ignore + if DATA["learning_ignore"][x_cl]: + # don't weigh + self.loss_w[x_cl] = 0 + print("Loss weights from content: ", self.loss_w.data) + + # concatenate the encoder and the head + with torch.no_grad(): + self.model = Segmentator(self.ARCH, + self.parser.get_n_classes(), + self.path) + + # GPU? + self.gpu = False + self.multi_gpu = False + self.n_gpus = 0 + self.model_single = self.model + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print("Training in device: ", self.device) + if torch.cuda.is_available() and torch.cuda.device_count() > 0: + cudnn.benchmark = True + cudnn.fastest = True + self.gpu = True + self.n_gpus = 1 + self.model.cuda() + if torch.cuda.is_available() and torch.cuda.device_count() > 1: + print("Let's use", torch.cuda.device_count(), "GPUs!") + self.model = nn.DataParallel(self.model) # spread in gpus + self.model = convert_model(self.model).cuda() # sync batchnorm + self.model_single = self.model.module # single model to get weight names + self.multi_gpu = True + self.n_gpus = torch.cuda.device_count() + + # loss + if "loss" in self.ARCH["train"].keys() and self.ARCH["train"]["loss"] == "xentropy": + self.criterion = nn.NLLLoss(weight=self.loss_w).to(self.device) + else: + raise Exception('Loss not defined in config file') + # loss as dataparallel too (more images in batch) + if self.n_gpus > 1: + self.criterion = nn.DataParallel(self.criterion).cuda() # spread in gpus + + # optimizer + if self.ARCH["post"]["CRF"]["use"] and self.ARCH["post"]["CRF"]["train"]: + self.lr_group_names = ["post_lr"] + self.train_dicts = [{'params': self.model_single.CRF.parameters()}] + else: + self.lr_group_names = [] + self.train_dicts = [] + if self.ARCH["backbone"]["train"]: + self.lr_group_names.append("backbone_lr") + self.train_dicts.append( + {'params': self.model_single.backbone.parameters()}) + if self.ARCH["decoder"]["train"]: + self.lr_group_names.append("decoder_lr") + self.train_dicts.append( + {'params': self.model_single.decoder.parameters()}) + if self.ARCH["head"]["train"]: + self.lr_group_names.append("head_lr") + self.train_dicts.append({'params': self.model_single.head1.parameters()}) + self.train_dicts.append({'params': self.model_single.head2.parameters()}) + self.train_dicts.append({'params': self.model_single.head3.parameters()}) + self.train_dicts.append({'params': self.model_single.head4.parameters()}) + self.train_dicts.append({'params': self.model_single.head5.parameters()}) + + # Use SGD optimizer to train + self.optimizer = optim.SGD(self.train_dicts, + lr=self.ARCH["train"]["lr"], + momentum=self.ARCH["train"]["momentum"], + weight_decay=self.ARCH["train"]["w_decay"]) + + # Use warmup learning rate + # post decay and step sizes come in epochs and we want it in steps + steps_per_epoch = self.parser.get_train_size() + up_steps = int(self.ARCH["train"]["wup_epochs"] * steps_per_epoch) + final_decay = self.ARCH["train"]["lr_decay"] ** (1/steps_per_epoch) + self.scheduler = warmupLR(optimizer=self.optimizer, + lr=self.ARCH["train"]["lr"], + warmup_steps=up_steps, + momentum=self.ARCH["train"]["momentum"], + decay=final_decay) + + @staticmethod + def get_mpl_colormap(cmap_name): + cmap = plt.get_cmap(cmap_name) + # Initialize the matplotlib color map + sm = plt.cm.ScalarMappable(cmap=cmap) + # Obtain linear color range + color_range = sm.to_rgba(np.linspace(0, 1, 256), bytes=True)[:, 2::-1] + return color_range.reshape(256, 1, 3) + + @staticmethod + def make_log_img(depth, mask, pred, gt, color_fn): + # input should be [depth, pred, gt] + # make range image (normalized to 0,1 for saving) + depth = (cv2.normalize(depth, None, alpha=0, beta=1, + norm_type=cv2.NORM_MINMAX, + dtype=cv2.CV_32F) * 255.0).astype(np.uint8) + out_img = cv2.applyColorMap( + depth, Trainer.get_mpl_colormap('viridis')) * mask[..., None] + # make label prediction + pred_color = color_fn((pred * mask).astype(np.int32)) + out_img = np.concatenate([out_img, pred_color], axis=0) + # make label gt + gt_color = color_fn(gt) + out_img = np.concatenate([out_img, gt_color], axis=0) + return (out_img).astype(np.uint8) + + @staticmethod + def save_to_log(logdir, logger, info, epoch, w_summary=False, model=None, img_summary=False, imgs=[]): + # save scalars + for tag, value in info.items(): + logger.scalar_summary(tag, value, epoch) + + # save summaries of weights and biases + if w_summary and model: + for tag, value in model.named_parameters(): + tag = tag.replace('.', '/') + logger.histo_summary(tag, value.data.cpu().numpy(), epoch) + if value.grad is not None: + logger.histo_summary( + tag + '/grad', value.grad.data.cpu().numpy(), epoch) + + if img_summary and len(imgs) > 0: + directory = os.path.join(logdir, "predictions") + if not os.path.isdir(directory): + os.makedirs(directory) + for i, img in enumerate(imgs): + name = os.path.join(directory, str(i) + ".png") + cv2.imwrite(name, img) + + def train(self): + # accuracy and IoU stuff + best_train_iou = 0.0 + best_val_iou = 0.0 + + self.ignore_class = [] + for i, w in enumerate(self.loss_w): + if w < 1e-10: + self.ignore_class.append(i) + print("Ignoring class ", i, " in IoU evaluation") + self.evaluator = iouEval(self.parser.get_n_classes(), + self.device, self.ignore_class) + + # train for n epochs + for epoch in range(self.ARCH["train"]["max_epochs"]): + # get info for learn rate currently + groups = self.optimizer.param_groups + for name, g in zip(self.lr_group_names, groups): + self.info[name] = g['lr'] + + # train for 1 epoch + acc, iou, loss, update_mean = self.train_epoch(train_loader=self.parser.get_train_set(), + model=self.model, + criterion=self.criterion, + optimizer=self.optimizer, + epoch=epoch, + evaluator=self.evaluator, + scheduler=self.scheduler, + color_fn=self.parser.to_color, + report=self.ARCH["train"]["report_batch"], + show_scans=self.ARCH["train"]["show_scans"]) + + # update info + self.info["train_update"] = update_mean + self.info["train_loss"] = loss + self.info["train_acc"] = acc + self.info["train_iou"] = iou + + # remember best iou and save checkpoint + if iou > best_train_iou: + print("Best mean iou in training set so far, save model!") + best_train_iou = iou + self.model_single.save_checkpoint(self.log, suffix="_train") + + if epoch % self.ARCH["train"]["report_epoch"] == 0: + # evaluate on validation set + print("*" * 80) + acc, iou, loss, rand_img = self.validate(val_loader=self.parser.get_valid_set(), + model=self.model, + criterion=self.criterion, + evaluator=self.evaluator, + class_func=self.parser.get_xentropy_class_string, + color_fn=self.parser.to_color, + save_scans=self.ARCH["train"]["save_scans"]) + + # update info + self.info["valid_loss"] = loss + self.info["valid_acc"] = acc + self.info["valid_iou"] = iou + + # remember best iou and save checkpoint + if iou > best_val_iou: + print("Best mean iou in validation so far, save model!") + print("*" * 80) + best_val_iou = iou + + # save the weights! + self.model_single.save_checkpoint(self.log, suffix="") + + print("*" * 80) + + # save to log + Trainer.save_to_log(logdir=self.log, + logger=self.tb_logger, + info=self.info, + epoch=epoch, + w_summary=self.ARCH["train"]["save_summary"], + model=self.model_single, + img_summary=self.ARCH["train"]["save_scans"], + imgs=rand_img) + + print('Finished Training') + + return + + def train_epoch(self, train_loader, model, criterion, optimizer, epoch, evaluator, scheduler, color_fn, report=10, show_scans=False): + batch_time = AverageMeter() + data_time = AverageMeter() + losses = AverageMeter() + acc = AverageMeter() + iou = AverageMeter() + update_ratio_meter = AverageMeter() + + # empty the cache to train now + if self.gpu: + torch.cuda.empty_cache() + + # switch to train mode + model.train() + + end = time.time() + for i, (in_vol, proj_mask, proj_labels, _, path_seq, path_name, _, _, _, _, _, _, _, _, _) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + proj_labels = proj_labels.unsqueeze(1).type(torch.FloatTensor) + if not self.multi_gpu and self.gpu: + in_vol = in_vol.cuda() + proj_mask = proj_mask.cuda() + + if self.gpu: + [n, c, h, w] = proj_labels.size() + proj_labels_5 = F.upsample(proj_labels,size=(h,w//8),mode='nearest').squeeze(1).cuda(non_blocking=True).long() + proj_labels_4 = F.upsample(proj_labels,size=(h,w//8),mode='nearest').squeeze(1).cuda(non_blocking=True).long() + proj_labels_3 = F.upsample(proj_labels,size=(h,w//4),mode='nearest').squeeze(1).cuda(non_blocking=True).long() + proj_labels_2 = F.upsample(proj_labels,size=(h,w//2),mode='nearest').squeeze(1).cuda(non_blocking=True).long() + proj_labels = proj_labels.squeeze(1).cuda(non_blocking=True).long() + + [output, z2, z3, z4, z5] = model(in_vol, proj_mask) + loss = criterion(torch.log(output.clamp(min=1e-8)), proj_labels)+\ + criterion(torch.log(z5.clamp(min=1e-8)), proj_labels_5)+\ + criterion(torch.log(z4.clamp(min=1e-8)), proj_labels_4)+\ + criterion(torch.log(z3.clamp(min=1e-8)), proj_labels_3)+\ + criterion(torch.log(z2.clamp(min=1e-8)), proj_labels_2) + # compute gradient and do SGD step + optimizer.zero_grad() + if self.n_gpus > 1: + idx = torch.ones(self.n_gpus).cuda() + loss.backward(idx) + else: + loss.backward() + optimizer.step() + + # measure accuracy and record loss + loss = loss.mean() + with torch.no_grad(): + evaluator.reset() + argmax = output.argmax(dim=1) + evaluator.addBatch(argmax, proj_labels) + accuracy = evaluator.getacc() + jaccard, class_jaccard = evaluator.getIoU() + losses.update(loss.item(), in_vol.size(0)) + acc.update(accuracy.item(), in_vol.size(0)) + iou.update(jaccard.item(), in_vol.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + # get gradient updates and weights, so I can print the relationship of + # their norms + update_ratios = [] + for g in self.optimizer.param_groups: + lr = g["lr"] + for value in g["params"]: + if value.grad is not None: + w = np.linalg.norm(value.data.cpu().numpy().reshape((-1))) + update = np.linalg.norm(-max(lr, 1e-10) * + value.grad.cpu().numpy().reshape((-1))) + update_ratios.append(update / max(w, 1e-10)) + update_ratios = np.array(update_ratios) + update_mean = update_ratios.mean() + update_std = update_ratios.std() + update_ratio_meter.update(update_mean) # over the epoch + + if show_scans: + # get the first scan in batch and project points + mask_np = proj_mask[0].cpu().numpy() + depth_np = in_vol[0][0].cpu().numpy() + pred_np = argmax[0].cpu().numpy() + gt_np = proj_labels[0].cpu().numpy() + out = Trainer.make_log_img(depth_np, mask_np, pred_np, gt_np, color_fn) + cv2.imshow("sample_training", out) + cv2.waitKey(1) + + if i % self.ARCH["train"]["report_batch"] == 0: + print('Lr: {lr:.3e} | ' + 'Update: {umean:.3e} mean,{ustd:.3e} std | ' + 'Epoch: [{0}][{1}/{2}] | ' + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) | ' + 'Data {data_time.val:.3f} ({data_time.avg:.3f}) | ' + 'Loss {loss.val:.4f} ({loss.avg:.4f}) | ' + 'acc {acc.val:.3f} ({acc.avg:.3f}) | ' + 'IoU {iou.val:.3f} ({iou.avg:.3f})'.format( + epoch, i, len(train_loader), batch_time=batch_time, + data_time=data_time, loss=losses, acc=acc, iou=iou, lr=lr, + umean=update_mean, ustd=update_std)) + + # step scheduler + scheduler.step() + + return acc.avg, iou.avg, losses.avg, update_ratio_meter.avg + + def validate(self, val_loader, model, criterion, evaluator, class_func, color_fn, save_scans): + batch_time = AverageMeter() + losses = AverageMeter() + acc = AverageMeter() + iou = AverageMeter() + rand_imgs = [] + + # switch to evaluate mode + model.eval() + evaluator.reset() + + # empty the cache to infer in high res + if self.gpu: + torch.cuda.empty_cache() + + with torch.no_grad(): + end = time.time() + for i, (in_vol, proj_mask, proj_labels, _, path_seq, path_name, _, _, _, _, _, _, _, _, _) in enumerate(val_loader): + if not self.multi_gpu and self.gpu: + in_vol = in_vol.cuda() + proj_mask = proj_mask.cuda() + if self.gpu: + proj_labels = proj_labels.cuda(non_blocking=True).long() + + [output, z2, z3, z4, z5] = model(in_vol, proj_mask) + loss = criterion(torch.log(output.clamp(min=1e-8)), proj_labels) +# + argmax = output.argmax(dim=1) + evaluator.addBatch(argmax, proj_labels) + losses.update(loss.mean().item(), in_vol.size(0)) + + if save_scans: + # get the first scan in batch and project points + mask_np = proj_mask[0].cpu().numpy() + depth_np = in_vol[0][0].cpu().numpy() + pred_np = argmax[0].cpu().numpy() + gt_np = proj_labels[0].cpu().numpy() + out = Trainer.make_log_img(depth_np, + mask_np, + pred_np, + gt_np, + color_fn) + rand_imgs.append(out) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + accuracy = evaluator.getacc() + jaccard, class_jaccard = evaluator.getIoU() + acc.update(accuracy.item(), in_vol.size(0)) + iou.update(jaccard.item(), in_vol.size(0)) + + print('Validation set:\n' + 'Time avg per batch {batch_time.avg:.3f}\n' + 'Loss avg {loss.avg:.4f}\n' + 'Acc avg {acc.avg:.3f}\n' + 'IoU avg {iou.avg:.3f}'.format(batch_time=batch_time, + loss=losses, + acc=acc, iou=iou)) + # print also classwise + for i, jacc in enumerate(class_jaccard): + print('IoU class {i:} [{class_str:}] = {jacc:.3f}'.format( + i=i, class_str=class_func(i), jacc=jacc)) + + return acc.avg, iou.avg, losses.avg, rand_imgs diff --git a/src/tasks/semantic/modules/user.py b/src/tasks/semantic/modules/user.py new file mode 100644 index 0000000..f15a262 --- /dev/null +++ b/src/tasks/semantic/modules/user.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import torch +import torch.nn as nn +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms +import imp +import yaml +import time +from PIL import Image +import __init__ as booger +import collections +import copy +import cv2 +import os +import numpy as np +from tasks.semantic.modules.segmentator import * +from tasks.semantic.postproc.KNN import KNN + + +class User(): + def __init__(self, ARCH, DATA, datadir, logdir, modeldir): + # parameters + self.ARCH = ARCH + self.DATA = DATA + self.datadir = datadir + self.logdir = logdir + self.modeldir = modeldir + + # get the data + parserModule = imp.load_source("parserModule", + booger.TRAIN_PATH + '/tasks/semantic/dataset/' + + self.DATA["name"] + '/parser.py') + self.parser = parserModule.Parser(root=self.datadir, + train_sequences=self.DATA["split"]["train"], + valid_sequences=self.DATA["split"]["valid"], + test_sequences=self.DATA["split"]["test"], + labels=self.DATA["labels"], + color_map=self.DATA["color_map"], + learning_map=self.DATA["learning_map"], + learning_map_inv=self.DATA["learning_map_inv"], + sensor=self.ARCH["dataset"]["sensor"], + max_points=self.ARCH["dataset"]["max_points"], + batch_size=1, + workers=self.ARCH["train"]["workers"], + gt=True, + shuffle_train=False) + + # concatenate the encoder and the head + with torch.no_grad(): + self.model = Segmentator(self.ARCH, + self.parser.get_n_classes(), + self.modeldir) + + # use knn post processing? + self.post = None + if self.ARCH["post"]["KNN"]["use"]: + self.post = KNN(self.ARCH["post"]["KNN"]["params"], + self.parser.get_n_classes()) + + # GPU? + self.gpu = False + self.model_single = self.model + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print("Infering in device: ", self.device) + if torch.cuda.is_available() and torch.cuda.device_count() > 0: + cudnn.benchmark = True + cudnn.fastest = True + self.gpu = True + self.model.cuda() + + def infer(self): + # do train set + self.infer_subset(loader=self.parser.get_train_set(), + to_orig_fn=self.parser.to_original) + # do valid set + self.infer_subset(loader=self.parser.get_valid_set(), + to_orig_fn=self.parser.to_original) + # do test set + self.infer_subset(loader=self.parser.get_test_set(), + to_orig_fn=self.parser.to_original) + + print('Finished Infering') + + return + + def infer_subset(self, loader, to_orig_fn): + # switch to evaluate mode + self.model.eval() + + # empty the cache to infer in high res + if self.gpu: + torch.cuda.empty_cache() + + with torch.no_grad(): + end = time.time() + + for i, (proj_in, proj_mask, _, _, path_seq, path_name, p_x, p_y, proj_range, unproj_range, _, _, _, _, npoints) in enumerate(loader): + # first cut to rela size (batch size one allows it) + p_x = p_x[0, :npoints] + p_y = p_y[0, :npoints] + proj_range = proj_range[0, :npoints] + unproj_range = unproj_range[0, :npoints] + path_seq = path_seq[0] + path_name = path_name[0] + + if self.gpu: + proj_in = proj_in.cuda() + proj_mask = proj_mask.cuda() + p_x = p_x.cuda() + p_y = p_y.cuda() + + if self.post: + proj_range = proj_range.cuda() + unproj_range = unproj_range.cuda() + + proj_output, _, _, _, _ = self.model(proj_in, proj_mask) + proj_argmax = proj_output[0].argmax(dim=0) + + if self.post: + # knn postproc + unproj_argmax = self.post(proj_range, + unproj_range, + proj_argmax, + p_x, + p_y) + else: + # put in original pointcloud using indexes + unproj_argmax = proj_argmax[p_y, p_x] + + if torch.cuda.is_available(): + torch.cuda.synchronize() + + print("Infered seq", path_seq, "scan", path_name, + "in", time.time() - end, "sec") + + end = time.time() + + # save scan + # get the first scan in batch and project scan + pred_np = unproj_argmax.cpu().numpy() + pred_np = pred_np.reshape((-1)).astype(np.int32) + + # map to original label + pred_np = to_orig_fn(pred_np) + + # save scan + path = os.path.join(self.logdir, "sequences", + path_seq, "predictions", path_name) + pred_np.tofile(path) diff --git a/src/tasks/semantic/postproc/.DS_Store b/src/tasks/semantic/postproc/.DS_Store new file mode 100644 index 0000000..a292f56 Binary files /dev/null and b/src/tasks/semantic/postproc/.DS_Store differ diff --git a/src/tasks/semantic/postproc/CRF.py b/src/tasks/semantic/postproc/CRF.py new file mode 100644 index 0000000..24f52d1 --- /dev/null +++ b/src/tasks/semantic/postproc/CRF.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import numpy as np +from scipy import signal +import torch +import torch.nn as nn +import torch.nn.functional as F +import __init__ as booger + + +class LocallyConnectedXYZLayer(nn.Module): + def __init__(self, h, w, sigma, nclasses): + super().__init__() + # size of window + self.h = h + self.padh = h//2 + self.w = w + self.padw = w//2 + assert(self.h % 2 == 1 and self.w % 2 == 1) # window must be odd + self.sigma = sigma + self.gauss_den = 2 * self.sigma**2 + self.nclasses = nclasses + + def forward(self, xyz, softmax, mask): + # softmax size + N, C, H, W = softmax.shape + + # make sofmax zero everywhere input is invalid + softmax = softmax * mask.unsqueeze(1).float() + + # get x,y,z for distance (shape N,1,H,W) + x = xyz[:, 0].unsqueeze(1) + y = xyz[:, 1].unsqueeze(1) + z = xyz[:, 2].unsqueeze(1) + + # im2col in size of window of input (x,y,z separately) + window_x = F.unfold(x, kernel_size=(self.h, self.w), + padding=(self.padh, self.padw)) + center_x = F.unfold(x, kernel_size=(1, 1), + padding=(0, 0)) + window_y = F.unfold(y, kernel_size=(self.h, self.w), + padding=(self.padh, self.padw)) + center_y = F.unfold(y, kernel_size=(1, 1), + padding=(0, 0)) + window_z = F.unfold(z, kernel_size=(self.h, self.w), + padding=(self.padh, self.padw)) + center_z = F.unfold(z, kernel_size=(1, 1), + padding=(0, 0)) + + # sq distance to center (center distance is zero) + unravel_dist2 = (window_x - center_x)**2 + \ + (window_y - center_y)**2 + \ + (window_z - center_z)**2 + + # weight input distance by gaussian weights + unravel_gaussian = torch.exp(- unravel_dist2 / self.gauss_den) + + # im2col in size of window of softmax to reweight by gaussian weights from input + cloned_softmax = softmax.clone() + for i in range(self.nclasses): + # get the softmax for this class + c_softmax = softmax[:, i].unsqueeze(1) + # unfold this class to weigh it by the proper gaussian weights + unravel_softmax = F.unfold(c_softmax, + kernel_size=(self.h, self.w), + padding=(self.padh, self.padw)) + unravel_w_softmax = unravel_softmax * unravel_gaussian + # add dimenssion 1 to obtain the new softmax for this class + unravel_added_softmax = unravel_w_softmax.sum(dim=1).unsqueeze(1) + # fold it and put it in new tensor + added_softmax = unravel_added_softmax.view(N, H, W) + cloned_softmax[:, i] = added_softmax + + return cloned_softmax + + +class CRF(nn.Module): + def __init__(self, params, nclasses): + super().__init__() + self.params = params + self.iter = torch.nn.Parameter(torch.tensor(params["iter"]), + requires_grad=False) + self.lcn_size = torch.nn.Parameter(torch.tensor([params["lcn_size"]["h"], + params["lcn_size"]["w"]]), + requires_grad=False) + self.xyz_coef = torch.nn.Parameter(torch.tensor(params["xyz_coef"]), + requires_grad=False).float() + self.xyz_sigma = torch.nn.Parameter(torch.tensor(params["xyz_sigma"]), + requires_grad=False).float() + + self.nclasses = nclasses + print("Using CRF!") + + # define layers here + # compat init + self.compat_kernel_init = np.reshape(np.ones((self.nclasses, self.nclasses)) - + np.identity(self.nclasses), + [self.nclasses, self.nclasses, 1, 1]) + + # bilateral compatibility matrixes + self.compat_conv = nn.Conv2d(self.nclasses, self.nclasses, 1) + self.compat_conv.weight = torch.nn.Parameter(torch.from_numpy( + self.compat_kernel_init).float() * self.xyz_coef, requires_grad=True) + + # locally connected layer for message passing + self.local_conn_xyz = LocallyConnectedXYZLayer(params["lcn_size"]["h"], + params["lcn_size"]["w"], + params["xyz_coef"], + self.nclasses) + + def forward(self, input, softmax, mask): + # use xyz + xyz = input[:, 1:4] + + # iteratively + for iter in range(self.iter): + # message passing as locally connected layer + locally_connected = self.local_conn_xyz(xyz, softmax, mask) + + # reweigh with the 1x1 convolution + reweight_softmax = self.compat_conv(locally_connected) + + # add the new values to the original softmax + reweight_softmax = reweight_softmax + softmax + + # lastly, renormalize + softmax = F.softmax(reweight_softmax, dim=1) + + return softmax diff --git a/src/tasks/semantic/postproc/KNN.py b/src/tasks/semantic/postproc/KNN.py new file mode 100644 index 0000000..9a765ce --- /dev/null +++ b/src/tasks/semantic/postproc/KNN.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +import __init__ as booger + + +def get_gaussian_kernel(kernel_size=3, sigma=2, channels=1): + # Create a x, y coordinate grid of shape (kernel_size, kernel_size, 2) + x_coord = torch.arange(kernel_size) + x_grid = x_coord.repeat(kernel_size).view(kernel_size, kernel_size) + y_grid = x_grid.t() + xy_grid = torch.stack([x_grid, y_grid], dim=-1).float() + + mean = (kernel_size - 1) / 2. + variance = sigma**2. + + # Calculate the 2-dimensional gaussian kernel which is + # the product of two gaussian distributions for two different + # variables (in this case called x and y) + gaussian_kernel = (1. / (2. * math.pi * variance)) *\ + torch.exp(-torch.sum((xy_grid - mean)**2., dim=-1) / (2 * variance)) + + # Make sure sum of values in gaussian kernel equals 1. + gaussian_kernel = gaussian_kernel / torch.sum(gaussian_kernel) + + # Reshape to 2d depthwise convolutional weight + gaussian_kernel = gaussian_kernel.view(kernel_size, kernel_size) + + return gaussian_kernel + + +class KNN(nn.Module): + def __init__(self, params, nclasses): + super().__init__() + print("*"*80) + print("Cleaning point-clouds with kNN post-processing") + self.knn = params["knn"] + self.search = params["search"] + self.sigma = params["sigma"] + self.cutoff = params["cutoff"] + self.nclasses = nclasses + print("kNN parameters:") + print("knn:", self.knn) + print("search:", self.search) + print("sigma:", self.sigma) + print("cutoff:", self.cutoff) + print("nclasses:", self.nclasses) + print("*"*80) + + def forward(self, proj_range, unproj_range, proj_argmax, px, py): + ''' Warning! Only works for un-batched pointclouds. + If they come batched we need to iterate over the batch dimension or do + something REALLY smart to handle unaligned number of points in memory + ''' + # get device + if proj_range.is_cuda: + device = torch.device("cuda") + else: + device = torch.device("cpu") + + # sizes of projection scan + H, W = proj_range.shape + + # number of points + P = unproj_range.shape + + # check if size of kernel is odd and complain + if (self.search % 2 == 0): + raise ValueError("Nearest neighbor kernel must be odd number") + + # calculate padding + pad = int((self.search - 1) / 2) + + # unfold neighborhood to get nearest neighbors for each pixel (range image) + proj_unfold_k_rang = F.unfold(proj_range[None, None, ...], + kernel_size=(self.search, self.search), + padding=(pad, pad)) + + # index with px, py to get ALL the pcld points + idx_list = py * W + px + unproj_unfold_k_rang = proj_unfold_k_rang[:, :, idx_list] + + # WARNING, THIS IS A HACK + # Make non valid (<0) range points extremely big so that there is no screwing + # up the nn self.search + unproj_unfold_k_rang[unproj_unfold_k_rang < 0] = float("inf") + + # now the matrix is unfolded TOTALLY, replace the middle points with the actual range points + center = int(((self.search * self.search) - 1) / 2) + unproj_unfold_k_rang[:, center, :] = unproj_range + + # now compare range + k2_distances = torch.abs(unproj_unfold_k_rang - unproj_range) + + # make a kernel to weigh the ranges according to distance in (x,y) + # I make this 1 - kernel because I want distances that are close in (x,y) + # to matter more + inv_gauss_k = ( + 1 - get_gaussian_kernel(self.search, self.sigma, 1)).view(1, -1, 1) + inv_gauss_k = inv_gauss_k.to(device).type(proj_range.type()) + + # apply weighing + k2_distances = k2_distances * inv_gauss_k + + # find nearest neighbors + _, knn_idx = k2_distances.topk( + self.knn, dim=1, largest=False, sorted=False) + + # do the same unfolding with the argmax + proj_unfold_1_argmax = F.unfold(proj_argmax[None, None, ...].float(), + kernel_size=(self.search, self.search), + padding=(pad, pad)).long() + unproj_unfold_1_argmax = proj_unfold_1_argmax[:, :, idx_list] + + # get the top k predictions from the knn at each pixel + knn_argmax = torch.gather( + input=unproj_unfold_1_argmax, dim=1, index=knn_idx) + + # fake an invalid argmax of classes + 1 for all cutoff items + if self.cutoff > 0: + knn_distances = torch.gather(input=k2_distances, dim=1, index=knn_idx) + knn_invalid_idx = knn_distances > self.cutoff + knn_argmax[knn_invalid_idx] = self.nclasses + + # now vote + # argmax onehot has an extra class for objects after cutoff + knn_argmax_onehot = torch.zeros( + (1, self.nclasses + 1, P[0]), device=device).type(proj_range.type()) + ones = torch.ones_like(knn_argmax).type(proj_range.type()) + knn_argmax_onehot = knn_argmax_onehot.scatter_add_(1, knn_argmax, ones) + + # now vote (as a sum over the onehot shit) (don't let it choose unlabeled OR invalid) + knn_argmax_out = knn_argmax_onehot[:, 1:-1].argmax(dim=1) + 1 + + # reshape again + knn_argmax_out = knn_argmax_out.view(P) + + return knn_argmax_out diff --git a/src/tasks/semantic/postproc/__init__.py b/src/tasks/semantic/postproc/__init__.py new file mode 100644 index 0000000..24fc240 --- /dev/null +++ b/src/tasks/semantic/postproc/__init__.py @@ -0,0 +1,4 @@ +import sys +TRAIN_PATH = "../" +DEPLOY_PATH = "../../deploy" +sys.path.insert(0, TRAIN_PATH) diff --git a/src/tasks/semantic/postproc/__pycache__/.DS_Store b/src/tasks/semantic/postproc/__pycache__/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/src/tasks/semantic/postproc/__pycache__/.DS_Store differ diff --git a/src/tasks/semantic/postproc/__pycache__/CRF.cpython-36.pyc b/src/tasks/semantic/postproc/__pycache__/CRF.cpython-36.pyc new file mode 100644 index 0000000..c2c0e42 Binary files /dev/null and b/src/tasks/semantic/postproc/__pycache__/CRF.cpython-36.pyc differ diff --git a/src/tasks/semantic/postproc/__pycache__/KNN.cpython-36.pyc b/src/tasks/semantic/postproc/__pycache__/KNN.cpython-36.pyc new file mode 100644 index 0000000..a43dc62 Binary files /dev/null and b/src/tasks/semantic/postproc/__pycache__/KNN.cpython-36.pyc differ diff --git a/src/tasks/semantic/postproc/__pycache__/__init__.cpython-36.pyc b/src/tasks/semantic/postproc/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..1ee1858 Binary files /dev/null and b/src/tasks/semantic/postproc/__pycache__/__init__.cpython-36.pyc differ diff --git a/src/tasks/semantic/postproc/__pycache__/borderMask.cpython-36.pyc b/src/tasks/semantic/postproc/__pycache__/borderMask.cpython-36.pyc new file mode 100644 index 0000000..2ae6b17 Binary files /dev/null and b/src/tasks/semantic/postproc/__pycache__/borderMask.cpython-36.pyc differ diff --git a/src/tasks/semantic/postproc/borderMask.py b/src/tasks/semantic/postproc/borderMask.py new file mode 100644 index 0000000..3e62d4f --- /dev/null +++ b/src/tasks/semantic/postproc/borderMask.py @@ -0,0 +1,304 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. +"""Border Mask for 2D labeled range images. + +Simple module to obtain the border mask of a given range image. + +The border mask is defined as the zone where are intersections between +differrent classes for the given range image. + +In this case we will violate a little bit the definition and will augment it. We +define the border mask as the zone where are intersections between differnet +classes for the given range image in determined neighborhood. To obtain this +border mask we will need to apply de binary erosion algorithm multiple times to +the same range image. + +Example: + Suppose we have 3 classes and this given range image(labeled): + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + + The output of the bordermask would like: + # 1 erode iteration with a connectivity kernel of 4: + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + [1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1], + [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1], + [1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1], + [1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1], + [1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1], + [1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1], + [1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1], + [1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1], + [1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1], + [1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1], + [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1], + [1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + # 2 erode iterations with a connectivity kernel of 8: + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1], + [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1], + [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1], + [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1], + [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1], + [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1], + [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1], + [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F +from common.onehot import oneHot +import __init__ as booger + + +class borderMask(nn.Module): + def __init__(self, nclasses, device, border_size, kern_conn=4, background_class=None): + """Get the binary border mask of a labeled 2d range image. + + Args: + nclasses(int) : The number of classes labeled in the input image + device(torch.device) : Process in host or cuda? + border_size(int) : How many erode iterations to perform for the mask + kern_conn(int) : The connectivity kernel number (4 or 8) + background_class(int) : "unlabeled" class in dataset (to avoid double borders) + + Returns: + eroded_output(tensor) : The 2d binary border mask, 1 where a intersection + between classes occurs, 0 everywhere else + + """ + super().__init__() + self.nclasses = nclasses + self.device = device + self.border_size = border_size + self.kern_conn = kern_conn + self.background_class = background_class + if self.background_class is not None: + self.include_idx = list(range(self.nclasses)) + self.exclude_idx = self.include_idx.pop(self.background_class) + + # check connectivity + # For obtaining the border mask we will be eroding the input image, for this + # reason we only support erode_kernels with connectivity 4 or 8 + assert self.kern_conn in (4, 8), ("The specified kernel connectivity(kern_conn= %r) is " + "not supported" % self.kern_conn) + + # make the onehot inferer + self.onehot = oneHot(self.device, + self.nclasses, + spatial_dim=2) # range labels + + def forward(self, range_label): + # length of shape of range_label must be 3 (N, H, W) + must_unbatch = False # remove batch dimension after operation? + if len(range_label.shape) != 3: + range_label = range_label[None, ...] + must_unbatch = True + + # The range_label comes labeled, we need to create one tensor per class, thus: + input_tensor = self.onehot(range_label) # (N, C, H, W) + + # Because we are using GT range_labels, there is a lot of pixels that end up + # unlabeled(thus, in the background). If we feed the erosion algorithm with + # this "raw" gt_labels we will detect intersection between the other classes + # and the backgorund, and we will end with the incorrect border mask. To solve + # this issue we need to pre process the input gt_label. The artifact in this + # case will be to sum the background channel(mostly the channel 0) to + # all the rest channels expect for the background channel itself. + # This will allow us to avoid detecting intersections between a class and the + # background. This also force us to change the logical AND we were doing to + # obtain the border mask when we were working with predicted labels. + # With predicted labels won't see this problem because all the pixels belongs + # to at least one class + if self.background_class is not None: + input_tensor[:, self.include_idx] = input_tensor[:, self.include_idx] + \ + input_tensor[:, self.exclude_idx] + + # C denotes a number of channels, N, H and W are dismissed + C = input_tensor.shape[1] + + # Create an empty erode kernel and send it to 'device' + erode_kernel = torch.zeros((C, 1, 3, 3), device=self.device) + if self.kern_conn == 4: + erode_kernel[:] = torch.tensor([[0, 1, 0], + [1, 1, 1], + [0, 1, 0]], device=self.device) + else: + erode_kernel[:] = torch.tensor([[1, 1, 1], + [1, 1, 1], + [1, 1, 1]], device=self.device) + + # to check connectivity + kernel_sum = erode_kernel[0][0].sum() # should be kern_conn + 1 + + # erode the input image border_size times + erode_input = input_tensor + for _ in range(self.border_size): + eroded_output = F.conv2d(erode_input, erode_kernel, groups=C, padding=1) + # Pick the elements that match the kernel_sum to obtain the eroded + # output and convert to dtype=float32 + eroded_output = (eroded_output == kernel_sum).float() + erode_input = eroded_output + + # We want to sum up all the channels into 1 unique border mask + # Even when we added the background to all the rest of the channels, there + # might be "bodies" in the background channel, thus, the erosion process can + # output "false positives" were this "bodies" are present in the background. + # We need to obtain the background mask and add it to the eroded bodies to + # obtain a consisent output once we calculate the border mask + if self.background_class is not None: + background_mask = (eroded_output[:, self.exclude_idx] == 1) + + # The eroded_bodies mask will consist in all the pixels were the convolution + # returned 1 for all the channels, therefore we need to sum up all the + # channels into one unique tensor and add the background mask to avoid having + # the background in the border mask output + eroded_bodies = (eroded_output.sum(1, keepdim=True) == 1) + if self.background_class is not None: + eroded_bodies = eroded_bodies + background_mask + + # we want the opposite + borders = 1 - eroded_bodies + + # unbatch? + if must_unbatch: + borders = borders[0] + # import cv2 + # import numpy as np + # bordersprint = (borders * 255).squeeze().cpu().numpy().astype(np.uint8) + # cv2.imshow("border", bordersprint) + # cv2.waitKey(0) + + return borders + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser("./borderMask.py") + parser.add_argument( + '--scan', '-s', + type=str, + required=True, + help='Scan to get xyz. No Default', + ) + parser.add_argument( + '--label', '-l', + type=str, + required=True, + help='Label to calculate border mask. No Default', + ) + parser.add_argument( + '--exclude_class', '-e', + type=int, + required=False, + default=None, + help='Label to ignore. No Default', + ) + parser.add_argument( + '--border', '-b', + type=int, + required=False, + default=1, + help='Border size. Defaults to %(default)s', + ) + parser.add_argument( + '--conn', '-c', + type=int, + required=False, + default=4, + help='Kernel connectivity. Defaults to %(default)s', + ) + FLAGS, unparsed = parser.parse_known_args() + + # print summary of what we will do + print("----------") + print("INTERFACE:") + print("Scan", FLAGS.scan) + print("Label", FLAGS.label) + print("Exclude class", FLAGS.exclude_class) + print("Border", FLAGS.border) + print("Connectivity", FLAGS.conn) + print("----------\n") + + # get device + if torch.cuda.is_available(): + device = torch.device('cuda') + else: + device = torch.device('cpu') + + # define the border mask + bm = borderMask(300, device, FLAGS.border, + FLAGS.conn, FLAGS.exclude_class) + + # imports for inference part + import cv2 + import numpy as np + from common.laserscan import LaserScan, SemLaserScan + + # open label and project + scan = SemLaserScan(project=True, max_classes=300) + scan.open_scan(FLAGS.scan) + scan.open_label(FLAGS.label) + + # get the things I need + proj_range = torch.from_numpy(scan.proj_range).to(device) + proj_sem_label = torch.from_numpy(scan.proj_sem_label).long().to(device) + proj_sem_color = torch.from_numpy(scan.proj_sem_color).to(device) + + # run the border mask + border_mask = bm(proj_sem_label) + + # bring to numpy and normalize for showing + proj_range = proj_range.cpu().numpy() + proj_sem_label = proj_sem_label.cpu().numpy() + proj_sem_color = proj_sem_color.cpu().numpy() + border_mask = border_mask.cpu().numpy().squeeze() + + # norm + proj_range = (proj_range / proj_range.max() * 255).astype(np.uint8) + border_mask = (border_mask * 255).astype(np.uint8) + + # show + cv2.imshow("range", proj_range) + cv2.imshow("label", proj_sem_color) + cv2.imshow("border", border_mask) + cv2.waitKey(0) + cv2.destroyAllWindows() diff --git a/src/tasks/semantic/train.py b/src/tasks/semantic/train.py new file mode 100755 index 0000000..d2c9403 --- /dev/null +++ b/src/tasks/semantic/train.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import argparse +import subprocess +import datetime +import yaml +from shutil import copyfile +import os +import shutil +import __init__ as booger + +from tasks.semantic.modules.trainer import * + +if __name__ == '__main__': + parser = argparse.ArgumentParser("./train.py") + parser.add_argument( + '--dataset', '-d', + type=str, + required=True, + help='Dataset to train with. No Default', + ) + parser.add_argument( + '--arch_cfg', '-ac', + type=str, + required=True, + help='Architecture yaml cfg file. See /config/arch for sample. No default!', + ) + parser.add_argument( + '--data_cfg', '-dc', + type=str, + required=False, + default='config/labels/semantic-kitti.yaml', + help='Classification yaml cfg file. See /config/labels for sample. No default!', + ) + parser.add_argument( + '--log', '-l', + type=str, + default=os.path.expanduser("~") + '/logs/' + + datetime.datetime.now().strftime("%Y-%-m-%d-%H:%M") + '/', + help='Directory to put the log data. Default: ~/logs/date+time' + ) + parser.add_argument( + '--pretrained', '-p', + type=str, + required=False, + default=None, + help='Directory to get the pretrained model. If not passed, do from scratch!' + ) + FLAGS, unparsed = parser.parse_known_args() + + # print summary of what we will do + print("----------") + print("INTERFACE:") + print("dataset", FLAGS.dataset) + print("arch_cfg", FLAGS.arch_cfg) + print("data_cfg", FLAGS.data_cfg) + print("log", FLAGS.log) + print("pretrained", FLAGS.pretrained) + print("----------\n") + + # open arch config file + try: + print("Opening arch config file %s" % FLAGS.arch_cfg) + ARCH = yaml.safe_load(open(FLAGS.arch_cfg, 'r')) + except Exception as e: + print(e) + print("Error opening arch yaml file.") + quit() + + # open data config file + try: + print("Opening data config file %s" % FLAGS.data_cfg) + DATA = yaml.safe_load(open(FLAGS.data_cfg, 'r')) + except Exception as e: + print(e) + print("Error opening data yaml file.") + quit() + + # create log folder + try: + if os.path.isdir(FLAGS.log): + shutil.rmtree(FLAGS.log) + os.makedirs(FLAGS.log) + except Exception as e: + print(e) + print("Error creating log directory. Check permissions!") + quit() + + # does model folder exist? + if FLAGS.pretrained is not None: + if os.path.isdir(FLAGS.pretrained): + print("model folder exists! Using model from %s" % (FLAGS.pretrained)) + else: + print("model folder doesnt exist! Start with random weights...") + else: + print("No pretrained directory found.") + + # copy all files to log folder (to remember what we did, and make inference + # easier). Also, standardize name to be able to open it later + try: + print("Copying files to %s for further reference." % FLAGS.log) + copyfile(FLAGS.arch_cfg, FLAGS.log + "/arch_cfg.yaml") + copyfile(FLAGS.data_cfg, FLAGS.log + "/data_cfg.yaml") + except Exception as e: + print(e) + print("Error copying files, check permissions. Exiting...") + quit() + + # create trainer and start the training + trainer = Trainer(ARCH, DATA, FLAGS.dataset, FLAGS.log, FLAGS.pretrained) + trainer.train() diff --git a/src/tasks/semantic/visualize.py b/src/tasks/semantic/visualize.py new file mode 100755 index 0000000..cd7db3f --- /dev/null +++ b/src/tasks/semantic/visualize.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +# This file is covered by the LICENSE file in the root of this project. + +import argparse +import os +import yaml +import __init__ as booger + +from common.laserscan import LaserScan, SemLaserScan +from common.laserscanvis import LaserScanVis + +if __name__ == '__main__': + parser = argparse.ArgumentParser("./visualize.py") + parser.add_argument( + '--dataset', '-d', + type=str, + required=True, + help='Dataset to visualize. No Default', + ) + parser.add_argument( + '--config', '-c', + type=str, + required=False, + default="config/labels/semantic-kitti.yaml", + help='Dataset config file. Defaults to %(default)s', + ) + parser.add_argument( + '--sequence', '-s', + type=str, + default="00", + required=False, + help='Sequence to visualize. Defaults to %(default)s', + ) + parser.add_argument( + '--predictions', '-p', + type=str, + default=None, + required=False, + help='Alternate location for labels, to use predictions folder. ' + 'Must point to directory containing the predictions in the proper format ' + ' (see readme)' + 'Defaults to %(default)s', + ) + parser.add_argument( + '--ignore_semantics', '-i', + dest='ignore_semantics', + default=False, + action='store_true', + help='Ignore semantics. Visualizes uncolored pointclouds.' + 'Defaults to %(default)s', + ) + parser.add_argument( + '--offset', + type=int, + default=0, + required=False, + help='Sequence to start. Defaults to %(default)s', + ) + parser.add_argument( + '--ignore_safety', + dest='ignore_safety', + default=False, + action='store_true', + help='Normally you want the number of labels and ptcls to be the same,' + ', but if you are not done inferring this is not the case, so this disables' + ' that safety.' + 'Defaults to %(default)s', + ) + FLAGS, unparsed = parser.parse_known_args() + + # print summary of what we will do + print("*" * 80) + print("INTERFACE:") + print("Dataset", FLAGS.dataset) + print("Config", FLAGS.config) + print("Sequence", FLAGS.sequence) + print("Predictions", FLAGS.predictions) + print("ignore_semantics", FLAGS.ignore_semantics) + print("ignore_safety", FLAGS.ignore_safety) + print("offset", FLAGS.offset) + print("*" * 80) + + # open config file + try: + print("Opening config file %s" % FLAGS.config) + CFG = yaml.safe_load(open(FLAGS.config, 'r')) + except Exception as e: + print(e) + print("Error opening yaml file.") + quit() + + # fix sequence name + FLAGS.sequence = '{0:02d}'.format(int(FLAGS.sequence)) + + # does sequence folder exist? + scan_paths = os.path.join(FLAGS.dataset, "sequences", + FLAGS.sequence, "velodyne") + if os.path.isdir(scan_paths): + print("Sequence folder exists! Using sequence from %s" % scan_paths) + else: + print("Sequence folder doesn't exist! Exiting...") + quit() + + # populate the pointclouds + scan_names = [os.path.join(dp, f) for dp, dn, fn in os.walk( + os.path.expanduser(scan_paths)) for f in fn] + scan_names.sort() + + # does sequence folder exist? + if not FLAGS.ignore_semantics: + if FLAGS.predictions is not None: + label_paths = os.path.join(FLAGS.predictions, "sequences", + FLAGS.sequence, "predictions") + else: + label_paths = os.path.join(FLAGS.dataset, "sequences", + FLAGS.sequence, "labels") + if os.path.isdir(label_paths): + print("Labels folder exists! Using labels from %s" % label_paths) + else: + print("Labels folder doesn't exist! Exiting...") + quit() + # populate the pointclouds + label_names = [os.path.join(dp, f) for dp, dn, fn in os.walk( + os.path.expanduser(label_paths)) for f in fn] + label_names.sort() + + # check that there are same amount of labels and scans + if not FLAGS.ignore_safety: + assert(len(label_names) == len(scan_names)) + + # create a scan + if FLAGS.ignore_semantics: + scan = LaserScan(project=True) # project all opened scans to spheric proj + else: + color_dict = CFG["color_map"] + scan = SemLaserScan(color_dict, project=True) + + # create a visualizer + semantics = not FLAGS.ignore_semantics + if not semantics: + label_names = None + vis = LaserScanVis(scan=scan, + scan_names=scan_names, + label_names=label_names, + offset=FLAGS.offset, + semantics=semantics, + instances=False) + + # print instructions + print("To navigate:") + print("\tb: back (previous scan)") + print("\tn: next (next scan)") + print("\tq: quit (exit program)") + + # run the visualizer + vis.run()