demo files

chrischoy · Jan 16, 2020 · df2796b · df2796b
1 parent 75c33f5
commit df2796b
Show file tree

Hide file tree

Showing 9 changed files with 356 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -93,6 +93,26 @@ python -m lib.datasets.preprocessing.stanford
 |:-------------:|:-------------------:|:----------:|:-----------------:|:-------------------------------------------------------------------------------:|:------:|
 | Mink16UNet34C | ScanNet train + val | 2cm        | 3                 | Test set 73.6% mIoU, no sliding window                                          | [download](https://node1.chrischoy.org/data/publications/minknet/Mink16UNet34C_ScanNet.pth) |
 | Mink16UNet34C | ScanNet train       | 2cm        | 5                 | Val 72.219% mIoU, no rotation average, no sliding window [per class performance](https://github.com/chrischoy/SpatioTemporalSegmentation/issues/13) | [download](https://node1.chrischoy.org/data/publications/minknet/MinkUNet34C-train-conv1-5.pth) |
-| Mink16UNet18  | Stanford Area5 train | 5cm       | 5                 | Area 5 test 65.828% mIoU, no rotation average, no sliding window [per class performance](https://pastebin.com/Gj3PrPFr) | [download](https://node1.chrischoy.org/data/publications/minknet/Mink16UNet18_stanford-conv1-5.pth) |
+| Mink16UNet18  | Stanford Area5 train | 5cm       | 5                 | Area 5 test 65.828% mIoU, no rotation average, no sliding window [per class performance](https://pastebin.com/Gj3PrPFr) | [download](https://node1.chrischoy.org/data/publications/minknet/Mink16UNet18-stanford-conv1-5.pth) |
 
 Note that sliding window style evaluation (cropping and stitching results) used in many related works effectively works as an ensemble (rotation averaging) which boosts the performance.
+
+
+## Demo
+
+The demo code will download the weights for ScanNet training split trained network Mink16UNet34C with conv1 kernel size 5 and visualize the prediction.
+
+```
+python -m demo.scannet
+```
+
+![](imgs/scannet.png)
+
+If you want to test a network trained on the Stanford dataset, run
+
+
+```
+python -m demo.stanford
+```
+
+![](imgs/stanford.png)
diff --git a/demo/scannet.py b/demo/scannet.py
@@ -0,0 +1,159 @@
+# Copyright (c) Chris Choy ([email protected]).
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+# of the Software, and to permit persons to whom the Software is furnished to do
+# so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Please cite "4D Spatio-Temporal ConvNets: Minkowski Convolutional Neural
+# Networks", CVPR'19 (https://arxiv.org/abs/1904.08755) if you use any part
+# of the code.
+import os
+import argparse
+import numpy as np
+from urllib.request import urlretrieve
+try:
+  import open3d as o3d
+except ImportError:
+  raise ImportError('Please install open3d with `pip install open3d`.')
+
+import torch
+import MinkowskiEngine as ME
+
+from models.res16unet import Res16UNet34C
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--weights', type=str, default='MinkUNet34C-train-conv1-5.pth')
+parser.add_argument('--file_name', type=str, default='1.ply')
+parser.add_argument('--bn_momentum', type=float, default=0.05)
+parser.add_argument('--voxel_size', type=float, default=0.02)
+parser.add_argument('--conv1_kernel_size', type=int, default=5)
+
+VALID_CLASS_IDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39]
+
+COLOR_MAP = {
+    0: (0., 0., 0.),
+    1: (174., 199., 232.),
+    2: (152., 223., 138.),
+    3: (31., 119., 180.),
+    4: (255., 187., 120.),
+    5: (188., 189., 34.),
+    6: (140., 86., 75.),
+    7: (255., 152., 150.),
+    8: (214., 39., 40.),
+    9: (197., 176., 213.),
+    10: (148., 103., 189.),
+    11: (196., 156., 148.),
+    12: (23., 190., 207.),
+    14: (247., 182., 210.),
+    15: (66., 188., 102.),
+    16: (219., 219., 141.),
+    17: (140., 57., 197.),
+    18: (202., 185., 52.),
+    19: (51., 176., 203.),
+    20: (200., 54., 131.),
+    21: (92., 193., 61.),
+    22: (78., 71., 183.),
+    23: (172., 114., 82.),
+    24: (255., 127., 14.),
+    25: (91., 163., 138.),
+    26: (153., 98., 156.),
+    27: (140., 153., 101.),
+    28: (158., 218., 229.),
+    29: (100., 125., 154.),
+    30: (178., 127., 135.),
+    32: (146., 111., 194.),
+    33: (44., 160., 44.),
+    34: (112., 128., 144.),
+    35: (96., 207., 209.),
+    36: (227., 119., 194.),
+    37: (213., 92., 176.),
+    38: (94., 106., 211.),
+    39: (82., 84., 163.),
+    40: (100., 85., 144.),
+}
+
+
+def download(config):
+  if not os.path.isfile(config.file_name):
+    print('Downloading the weights and a room ply file...')
+    urlretrieve(
+        "https://node1.chrischoy.org/data/publications/minknet/MinkUNet34C-train-conv1-5.pth",
+        'MinkUNet34C-train-conv1-5.pth')
+    urlretrieve(f"http://cvgl.stanford.edu/data2/minkowskiengine/{config.file_name}",
+                config.file_name)
+
+
+def load_file(file_name, voxel_size):
+  pcd = o3d.io.read_point_cloud(file_name)
+  coords = np.array(pcd.points)
+  feats = np.array(pcd.colors)
+
+  quantized_coords = np.floor(coords / voxel_size)
+  inds = ME.utils.sparse_quantize(quantized_coords, return_index=True)
+
+  return quantized_coords[inds], feats[inds], pcd
+
+
+def generate_input_sparse_tensor(file_name, voxel_size=0.05):
+  # Create a batch, this process is done in a data loader during training in parallel.
+  batch = [load_file(file_name, voxel_size)]
+  coordinates_, featrues_, pcds = list(zip(*batch))
+  coordinates, features = ME.utils.sparse_collate(coordinates_, featrues_)
+
+  # Normalize features and create a sparse tensor
+  return coordinates, (features - 0.5).float()
+
+
+if __name__ == '__main__':
+  config = parser.parse_args()
+  download(config)
+  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+  # Define a model and load the weights
+  model = Res16UNet34C(3, 20, config).to(device)
+  model_dict = torch.load(config.weights)
+  model.load_state_dict(model_dict['state_dict'])
+  model.eval()
+
+  # Measure time
+  with torch.no_grad():
+    coordinates, features = generate_input_sparse_tensor(
+        config.file_name, voxel_size=config.voxel_size)
+
+    # Feed-forward pass and get the prediction
+    sinput = ME.SparseTensor(features, coords=coordinates).to(device)
+    soutput = model(sinput)
+
+  # Feed-forward pass and get the prediction
+  _, pred = soutput.F.max(1)
+  pred = pred.cpu().numpy()
+
+  # Map color
+  colors = np.array([COLOR_MAP[VALID_CLASS_IDS[l]] for l in pred])
+
+  # Create a point cloud file
+  pred_pcd = o3d.geometry.PointCloud()
+  coordinates = soutput.C.numpy()[:, :3]  # last column is the batch index
+  pred_pcd.points = o3d.utility.Vector3dVector(coordinates * config.voxel_size)
+  pred_pcd.colors = o3d.utility.Vector3dVector(colors / 255)
+
+  # Move the original point cloud
+  pcd = o3d.io.read_point_cloud(config.file_name)
+  pcd.points = o3d.utility.Vector3dVector(np.array(pcd.points) + np.array([0, 5, 0]))
+
+  # Visualize the input point cloud and the prediction
+  o3d.visualization.draw_geometries([pcd, pred_pcd])
diff --git a/demo/stanford.py b/demo/stanford.py
@@ -0,0 +1,171 @@
+# Copyright (c) Chris Choy ([email protected]).
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+# of the Software, and to permit persons to whom the Software is furnished to do
+# so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Please cite "4D Spatio-Temporal ConvNets: Minkowski Convolutional Neural
+# Networks", CVPR'19 (https://arxiv.org/abs/1904.08755) if you use any part
+# of the code.
+import os
+import argparse
+import numpy as np
+from urllib.request import urlretrieve
+try:
+  import open3d as o3d
+except ImportError:
+  raise ImportError('Please install open3d with `pip install open3d`.')
+from plyfile import PlyData
+
+import torch
+import MinkowskiEngine as ME
+
+from models.res16unet import Res16UNet18
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--weights', type=str, default='Mink16UNet18-stanford-conv1-5.pth')
+parser.add_argument('--file_name', type=str, default='conferenceRoom_1.ply')
+parser.add_argument('--bn_momentum', type=float, default=0.05)
+parser.add_argument('--voxel_size', type=float, default=0.05)
+parser.add_argument('--conv1_kernel_size', type=int, default=5)
+
+VALID_CLASS_IDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15]
+
+COLOR_MAP = {
+    0: (0., 0., 0.),
+    1: (174., 199., 232.),
+    2: (152., 223., 138.),
+    3: (31., 119., 180.),
+    4: (255., 187., 120.),
+    5: (188., 189., 34.),
+    6: (140., 86., 75.),
+    7: (255., 152., 150.),
+    8: (214., 39., 40.),
+    9: (197., 176., 213.),
+    10: (148., 103., 189.),
+    11: (196., 156., 148.),
+    12: (23., 190., 207.),
+    14: (247., 182., 210.),
+    15: (66., 188., 102.),
+    16: (219., 219., 141.),
+    17: (140., 57., 197.),
+    18: (202., 185., 52.),
+    19: (51., 176., 203.),
+    20: (200., 54., 131.),
+    21: (92., 193., 61.),
+    22: (78., 71., 183.),
+    23: (172., 114., 82.),
+    24: (255., 127., 14.),
+    25: (91., 163., 138.),
+    26: (153., 98., 156.),
+    27: (140., 153., 101.),
+    28: (158., 218., 229.),
+    29: (100., 125., 154.),
+    30: (178., 127., 135.),
+    32: (146., 111., 194.),
+    33: (44., 160., 44.),
+    34: (112., 128., 144.),
+    35: (96., 207., 209.),
+    36: (227., 119., 194.),
+    37: (213., 92., 176.),
+    38: (94., 106., 211.),
+    39: (82., 84., 163.),
+    40: (100., 85., 144.),
+}
+
+
+def download(config):
+  if not os.path.isfile(config.file_name):
+    print('Downloading the weights and a room ply file...')
+    urlretrieve(
+        "https://node1.chrischoy.org/data/publications/minknet/Mink16UNet18-stanford-conv1-5.pth",
+        'Mink16UNet18-stanford-conv1-5.pth')
+    urlretrieve(f"http://cvgl.stanford.edu/data2/minkowskiengine/{config.file_name}",
+                config.file_name)
+
+
+def load_file(file_name, voxel_size):
+  plydata = PlyData.read(file_name)
+  data = plydata.elements[0].data
+  coords = np.array([data['x'], data['y'], data['z']], dtype=np.float32).T
+  colors = np.array([data['red'], data['green'], data['blue']], dtype=np.float32).T / 255
+  labels = np.array(data['label'], dtype=np.int32)
+
+  # Generate input pointcloud
+  pcd = o3d.geometry.PointCloud()
+  pcd.points = o3d.utility.Vector3dVector(coords)
+  pcd.colors = o3d.utility.Vector3dVector(colors)
+
+  # Normalize feature
+  norm_coords = coords - coords.mean(0)
+  feats = np.concatenate((colors - 0.5, norm_coords), 1)
+
+  coords, feats, labels = ME.utils.sparse_quantize(
+      coords, feats, labels, quantization_size=voxel_size)
+
+  return coords, feats, labels, pcd
+
+
+def generate_input_sparse_tensor(file_name, voxel_size=0.05):
+  # Create a batch, this process is done in a data loader during training in parallel.
+  batch = [load_file(file_name, voxel_size)]
+  coordinates_, featrues_, labels_, pcds = list(zip(*batch))
+  coordinates, features, labels = ME.utils.sparse_collate(coordinates_, featrues_, labels_)
+
+  # Normalize features and create a sparse tensor
+  return coordinates, features.float(), labels
+
+
+if __name__ == '__main__':
+  config = parser.parse_args()
+  download(config)
+  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+  # Define a model and load the weights
+  model = Res16UNet18(6, 13, config).to(device)
+  model_dict = torch.load(config.weights)
+  model.load_state_dict(model_dict['state_dict'])
+  model.eval()
+
+  # Measure time
+  with torch.no_grad():
+    coordinates, features, labels = generate_input_sparse_tensor(
+        config.file_name, voxel_size=config.voxel_size)
+
+    # Feed-forward pass and get the prediction
+    sinput = ME.SparseTensor(features, coords=coordinates).to(device)
+    soutput = model(sinput)
+
+  # Feed-forward pass and get the prediction
+  _, pred = soutput.F.max(1)
+  pred = pred.cpu().numpy()
+
+  # Map color
+  colors = np.array([COLOR_MAP[VALID_CLASS_IDS[l]] for l in pred])
+
+  # Create a point cloud file
+  pred_pcd = o3d.geometry.PointCloud()
+  coordinates = soutput.C.numpy()[:, :3]  # last column is the batch index
+  pred_pcd.points = o3d.utility.Vector3dVector(coordinates * config.voxel_size)
+  pred_pcd.colors = o3d.utility.Vector3dVector(colors / 255)
+
+  # Move the original point cloud
+  pcd = o3d.io.read_point_cloud(config.file_name)
+  pcd.points = o3d.utility.Vector3dVector(np.array(pcd.points) + np.array([7, 0, 0]))
+
+  # Visualize the input point cloud and the prediction
+  o3d.visualization.draw_geometries([pcd, pred_pcd])
diff --git a/imgs/scannet.png b/imgs/scannet.png
diff --git a/imgs/stanford.png b/imgs/stanford.png
diff --git a/scripts/train_scannet.sh b/scripts/train_scannet.sh
@@ -20,7 +20,7 @@ mkdir -p $LOG_DIR
 
 LOG="$LOG_DIR/$TIME.txt"
 
-python main.py \
+python -m main \
     --log_dir $LOG_DIR \
     --dataset ScannetVoxelization2cmDataset \
     --model Res16UNet34C \
@@ -35,7 +35,7 @@ python main.py \
 export TIME=$(date +"%Y-%m-%d_%H-%M-%S")
 LOG="$LOG_DIR/$TIME.txt"
 
-python main.py \
+python -m main \
     --log_dir $LOG_DIR \
     --dataset ScannetVoxelization2cmDataset \
     --model Res16UNet34C \

diff --git a/scripts/train_stanford.sh b/scripts/train_stanford.sh
@@ -20,7 +20,7 @@ mkdir -p $LOG_DIR
 
 LOG="$LOG_DIR/$TIME.txt"
 
-python main.py \
+python -m main \
     --dataset StanfordArea5Dataset \
     --batch_size $BATCH_SIZE \
     --scheduler PolyLR \

diff --git a/scripts/train_synthia4d.sh b/scripts/train_synthia4d.sh
@@ -22,7 +22,7 @@ mkdir -p $LOG_DIR
 
 LOG="$LOG_DIR/$TIME.txt"
 
-python main.py \
+python -m main \
     --log_dir $LOG_DIR \
     --dataset $DATASET \
     --model $MODEL \