scannet preprocessing

chrischoy · Oct 11, 2019 · 4b1315c · 4b1315c
1 parent 4085407
commit 4b1315c
Show file tree

Hide file tree

Showing 5 changed files with 95 additions and 38 deletions.
diff --git a/config.py b/config.py
@@ -113,43 +113,23 @@ def add_argument_group(name):
 data_arg.add_argument('--train_limit_numpoints', type=int, default=0)
 
 # Point Cloud Dataset
+
 data_arg.add_argument(
     '--synthia_path',
     type=str,
-    default='/cvgl/group/Synthia/synthia-processed/raw-pc-upright',
+    default='/home/chrischoy/datasets/synthia_preprocessed',
     help='Point Cloud dataset root dir')
 
-data_arg.add_argument(
-    '--synthia_online_path',
-    type=str,
-    default='/cvgl2/u/jgwak/Datasets/synthia_subsampled',
-    help='Synthia precropped dataset root dir')
-
 data_arg.add_argument(
     '--scannet_path',
     type=str,
-    default='/cvgl2/u/jgwak/Datasets/scannet',
+    default='/home/chrischoy/datasets/scannet/scannet_preprocessed',
     help='Scannet online voxelization dataset root dir')
 
-data_arg.add_argument(
-    '--scannet_ply_path',
-    type=str,
-    default='/cvgl2/u/jgwak/Datasets/scannet_ply',
-    help='Scannet sparse voxelization dataset root dir')
-
-data_arg.add_argument(
-    '--synthia_camera_path', type=str, default='/cvgl/group/Synthia/%s/CameraParams/')
-
-data_arg.add_argument('--synthia_camera_intrinsic_file', type=str, default='intrinsics.txt')
-
-data_arg.add_argument(
-    '--synthia_camera_extrinsics_file', type=str, default='Stereo_Right/Omni_F/%s.txt')
-
-# Point Cloud Dataset
 data_arg.add_argument(
     '--stanford3d_online_path',
     type=str,
-    default='/cvgl2/u/jgwak/Datasets/stanford_subsampled',
+    default='/home/chrischoy/datasets/stanford_preprocessed',
     help='Stanford precropped dataset root dir')
 
 # Training / test parameters

diff --git a/lib/datasets/preprocessing/scannet.py b/lib/datasets/preprocessing/scannet.py
@@ -0,0 +1,84 @@
+from pathlib import Path
+from random import shuffle
+
+import numpy as np
+
+from lib.pc_utils import read_plyfile, save_point_cloud
+
+
+SCANNET_RAW_PATH = Path('/data/chrischoy/datasets/scannet_raw')
+SCANNET_OUT_PATH = Path('/data/chrischoy/datasets/scannet_processed')
+TRAIN_DEST = 'train'
+TEST_DEST = 'test'
+SUBSETS = {TRAIN_DEST: 'scans', TEST_DEST: 'scans_test'}
+POINTCLOUD_FILE = '_vh_clean_2.ply'
+BUGS = {
+    'train/scene0270_00_*.ply': 50,
+    'train/scene0270_02_*.ply': 50,
+    'train/scene0384_00_*.ply': 149,
+}
+
+
+# Preprocess data.
+for out_path, in_path in SUBSETS.items():
+  phase_out_path = SCANNET_OUT_PATH / out_path
+  phase_out_path.mkdir(parents=True, exist_ok=True)
+  for f in (SCANNET_RAW_PATH / in_path).glob('*/*' + POINTCLOUD_FILE):
+    # Load pointcloud file.
+    pointcloud = read_plyfile(f)
+    # Make sure alpha value is meaningless.
+    assert np.unique(pointcloud[:, -1]).size == 1
+    # Load label file.
+    label_f = f.parent / (f.stem + '.labels' + f.suffix)
+    if label_f.is_file():
+      label = read_plyfile(label_f)
+      # Sanity check that the pointcloud and its label has same vertices.
+      assert pointcloud.shape[0] == label.shape[0]
+      assert np.allclose(pointcloud[:, :3], label[:, :3])
+    else:  # Label may not exist in test case.
+      label = np.zeros_like(pointcloud)
+    xyz = pointcloud[:, :3]
+
+    all_points = np.empty((0, 3))
+    out_f = phase_out_path / (f.name[:-len(POINTCLOUD_FILE)] + f.suffix)
+    processed = np.hstack((pointcloud[:, :6], np.array([label[:, -1]]).T))
+    save_point_cloud(processed, out_f, with_label=True, verbose=False)
+
+    # Check that all points are included in the crops.
+    assert set(tuple(l) for l in all_points.tolist()) == set(tuple(l) for l in xyz.tolist())
+
+# Split trainval data to train/val according to scene.
+trainval_files = [f.name for f in (SCANNET_OUT_PATH / TRAIN_DEST).glob('*.ply')]
+trainval_scenes = list(set(f.split('_')[0] for f in trainval_files))
+shuffle(trainval_scenes)
+num_train = int(len(trainval_scenes))
+train_scenes = trainval_scenes[:num_train]
+val_scenes = trainval_scenes[num_train:]
+
+# Collect file list for all phase.
+train_files = [f'{TRAIN_DEST}/{f}' for f in trainval_files if any(s in f for s in train_scenes)]
+val_files = [f'{TRAIN_DEST}/{f}' for f in trainval_files if any(s in f for s in val_scenes)]
+test_files = [f'{TEST_DEST}/{f.name}' for f in (SCANNET_OUT_PATH / TEST_DEST).glob('*.ply')]
+
+# Data sanity check.
+assert not set(train_files).intersection(val_files)
+assert all((SCANNET_OUT_PATH / f).is_file() for f in train_files)
+assert all((SCANNET_OUT_PATH / f).is_file() for f in val_files)
+assert all((SCANNET_OUT_PATH / f).is_file() for f in test_files)
+
+# Write file list for all phase.
+with open(SCANNET_OUT_PATH / 'train.txt', 'w') as f:
+  f.writelines([f + '\n' for f in train_files])
+with open(SCANNET_OUT_PATH / 'val.txt', 'w') as f:
+  f.writelines([f + '\n' for f in val_files])
+with open(SCANNET_OUT_PATH / 'test.txt', 'w') as f:
+  f.writelines([f + '\n' for f in test_files])
+
+# Fix bug in the data.
+# for files, bug_index in BUGS.items():
+#   for f in SCANNET_OUT_PATH.glob(files):
+#     pointcloud = read_plyfile(f)
+#     bug_mask = pointcloud[:, -1] == bug_index
+#     print(f'Fixing {f} bugged label {bug_index} x {bug_mask.sum()}')
+#     pointcloud[bug_mask, -1] = 0
+#     save_point_cloud(pointcloud, f, with_label=True, verbose=False)
diff --git a/lib/datasets/scannet.py b/lib/datasets/scannet.py
@@ -14,8 +14,6 @@
                 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
                 'shower curtain', 'toilet', 'sink', 'bathtub', 'otherfurniture')
 VALID_CLASS_IDS = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39)
-TEST_FULL_PLY_PATH = 'test/%s_vh_clean_2.ply'
-FULL_EVAL_PATH = 'outputs/fulleval'
 SCANNET_COLOR_MAP = {
     0: (0., 0., 0.),
     1: (174., 199., 232.),
@@ -78,11 +76,12 @@ class ScannetVoxelizationDataset(VoxelizationDataset):
   IGNORE_LABELS = tuple(set(range(41)) - set(VALID_CLASS_IDS))
   IS_FULL_POINTCLOUD_EVAL = True
 
+  # If trainval.txt does not exist, copy train.txt and add contents from val.txt
   DATA_PATH_FILE = {
-      DatasetPhase.Train: 'scannetv2_train.txt',
-      DatasetPhase.Val: 'scannetv2_val.txt',
-      DatasetPhase.TrainVal: 'trainval_uncropped.txt',
-      DatasetPhase.Test: 'scannetv2_test.txt'
+      DatasetPhase.Train: 'train.txt',
+      DatasetPhase.Val: 'val.txt',
+      DatasetPhase.Val: 'trainval.txt',
+      DatasetPhase.Test: 'test.txt'
   }
 
   def __init__(self,

diff --git a/lib/datasets/synthia.py b/lib/datasets/synthia.py
@@ -143,7 +143,7 @@ def __init__(self,
       phase = str2datasetphase_type(phase)
     if phase not in [DatasetPhase.Train, DatasetPhase.TrainVal]:
       self.CLIP_BOUND = self.TEST_CLIP_BOUND
-    data_root = config.synthia_online_path
+    data_root = config.synthia_path
     data_paths = read_txt(osp.join(data_root, self.DATA_PATH_FILE[phase]))
     data_paths = [d.split()[0] for d in data_paths]
     logging.info('Loading {}: {}'.format(self.__class__.__name__, self.DATA_PATH_FILE[phase]))
@@ -191,7 +191,7 @@ def __init__(self,
       phase = str2datasetphase_type(phase)
     if phase not in [DatasetPhase.Train, DatasetPhase.TrainVal]:
       self.CLIP_BOUND = self.TEST_CLIP_BOUND
-    data_root = config.synthia_online_path
+    data_root = config.synthia_path
     data_paths = read_txt(osp.join(data_root, self.DATA_PATH_FILE[phase]))
     data_paths = sorted([d.split()[0] for d in data_paths])
     seq2files = defaultdict(list)

diff --git a/models/resnet.py b/models/resnet.py
@@ -77,12 +77,6 @@ def space_n_time_m(n, m):
 
   def weight_initialization(self):
     for m in self.modules():
-      if isinstance(m, ME.MinkowskiConvolution):
-        ME.utils.kaiming_normal_(m.kernel, mode='fan_out', nonlinearity='relu')
-
-      if isinstance(m, ME.MinkowskiConvolutionTranspose):
-        ME.utils.kaiming_normal_(m.kernel, mode='fan_in', nonlinearity='relu')
-
       if isinstance(m, ME.MinkowskiBatchNorm):
         nn.init.constant_(m.bn.weight, 1)
         nn.init.constant_(m.bn.bias, 0)