Skip to content

Commit

Permalink
scannet preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
chrischoy committed Oct 11, 2019
1 parent 4085407 commit 4b1315c
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 38 deletions.
28 changes: 4 additions & 24 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,43 +113,23 @@ def add_argument_group(name):
data_arg.add_argument('--train_limit_numpoints', type=int, default=0)

# Point Cloud Dataset

data_arg.add_argument(
'--synthia_path',
type=str,
default='/cvgl/group/Synthia/synthia-processed/raw-pc-upright',
default='/home/chrischoy/datasets/synthia_preprocessed',
help='Point Cloud dataset root dir')

data_arg.add_argument(
'--synthia_online_path',
type=str,
default='/cvgl2/u/jgwak/Datasets/synthia_subsampled',
help='Synthia precropped dataset root dir')

data_arg.add_argument(
'--scannet_path',
type=str,
default='/cvgl2/u/jgwak/Datasets/scannet',
default='/home/chrischoy/datasets/scannet/scannet_preprocessed',
help='Scannet online voxelization dataset root dir')

data_arg.add_argument(
'--scannet_ply_path',
type=str,
default='/cvgl2/u/jgwak/Datasets/scannet_ply',
help='Scannet sparse voxelization dataset root dir')

data_arg.add_argument(
'--synthia_camera_path', type=str, default='/cvgl/group/Synthia/%s/CameraParams/')

data_arg.add_argument('--synthia_camera_intrinsic_file', type=str, default='intrinsics.txt')

data_arg.add_argument(
'--synthia_camera_extrinsics_file', type=str, default='Stereo_Right/Omni_F/%s.txt')

# Point Cloud Dataset
data_arg.add_argument(
'--stanford3d_online_path',
type=str,
default='/cvgl2/u/jgwak/Datasets/stanford_subsampled',
default='/home/chrischoy/datasets/stanford_preprocessed',
help='Stanford precropped dataset root dir')

# Training / test parameters
Expand Down
84 changes: 84 additions & 0 deletions lib/datasets/preprocessing/scannet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from pathlib import Path
from random import shuffle

import numpy as np

from lib.pc_utils import read_plyfile, save_point_cloud


SCANNET_RAW_PATH = Path('/data/chrischoy/datasets/scannet_raw')
SCANNET_OUT_PATH = Path('/data/chrischoy/datasets/scannet_processed')
TRAIN_DEST = 'train'
TEST_DEST = 'test'
SUBSETS = {TRAIN_DEST: 'scans', TEST_DEST: 'scans_test'}
POINTCLOUD_FILE = '_vh_clean_2.ply'
BUGS = {
'train/scene0270_00_*.ply': 50,
'train/scene0270_02_*.ply': 50,
'train/scene0384_00_*.ply': 149,
}


# Preprocess data.
for out_path, in_path in SUBSETS.items():
phase_out_path = SCANNET_OUT_PATH / out_path
phase_out_path.mkdir(parents=True, exist_ok=True)
for f in (SCANNET_RAW_PATH / in_path).glob('*/*' + POINTCLOUD_FILE):
# Load pointcloud file.
pointcloud = read_plyfile(f)
# Make sure alpha value is meaningless.
assert np.unique(pointcloud[:, -1]).size == 1
# Load label file.
label_f = f.parent / (f.stem + '.labels' + f.suffix)
if label_f.is_file():
label = read_plyfile(label_f)
# Sanity check that the pointcloud and its label has same vertices.
assert pointcloud.shape[0] == label.shape[0]
assert np.allclose(pointcloud[:, :3], label[:, :3])
else: # Label may not exist in test case.
label = np.zeros_like(pointcloud)
xyz = pointcloud[:, :3]

all_points = np.empty((0, 3))
out_f = phase_out_path / (f.name[:-len(POINTCLOUD_FILE)] + f.suffix)
processed = np.hstack((pointcloud[:, :6], np.array([label[:, -1]]).T))
save_point_cloud(processed, out_f, with_label=True, verbose=False)

# Check that all points are included in the crops.
assert set(tuple(l) for l in all_points.tolist()) == set(tuple(l) for l in xyz.tolist())

# Split trainval data to train/val according to scene.
trainval_files = [f.name for f in (SCANNET_OUT_PATH / TRAIN_DEST).glob('*.ply')]
trainval_scenes = list(set(f.split('_')[0] for f in trainval_files))
shuffle(trainval_scenes)
num_train = int(len(trainval_scenes))
train_scenes = trainval_scenes[:num_train]
val_scenes = trainval_scenes[num_train:]

# Collect file list for all phase.
train_files = [f'{TRAIN_DEST}/{f}' for f in trainval_files if any(s in f for s in train_scenes)]
val_files = [f'{TRAIN_DEST}/{f}' for f in trainval_files if any(s in f for s in val_scenes)]
test_files = [f'{TEST_DEST}/{f.name}' for f in (SCANNET_OUT_PATH / TEST_DEST).glob('*.ply')]

# Data sanity check.
assert not set(train_files).intersection(val_files)
assert all((SCANNET_OUT_PATH / f).is_file() for f in train_files)
assert all((SCANNET_OUT_PATH / f).is_file() for f in val_files)
assert all((SCANNET_OUT_PATH / f).is_file() for f in test_files)

# Write file list for all phase.
with open(SCANNET_OUT_PATH / 'train.txt', 'w') as f:
f.writelines([f + '\n' for f in train_files])
with open(SCANNET_OUT_PATH / 'val.txt', 'w') as f:
f.writelines([f + '\n' for f in val_files])
with open(SCANNET_OUT_PATH / 'test.txt', 'w') as f:
f.writelines([f + '\n' for f in test_files])

# Fix bug in the data.
# for files, bug_index in BUGS.items():
# for f in SCANNET_OUT_PATH.glob(files):
# pointcloud = read_plyfile(f)
# bug_mask = pointcloud[:, -1] == bug_index
# print(f'Fixing {f} bugged label {bug_index} x {bug_mask.sum()}')
# pointcloud[bug_mask, -1] = 0
# save_point_cloud(pointcloud, f, with_label=True, verbose=False)
11 changes: 5 additions & 6 deletions lib/datasets/scannet.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
'shower curtain', 'toilet', 'sink', 'bathtub', 'otherfurniture')
VALID_CLASS_IDS = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39)
TEST_FULL_PLY_PATH = 'test/%s_vh_clean_2.ply'
FULL_EVAL_PATH = 'outputs/fulleval'
SCANNET_COLOR_MAP = {
0: (0., 0., 0.),
1: (174., 199., 232.),
Expand Down Expand Up @@ -78,11 +76,12 @@ class ScannetVoxelizationDataset(VoxelizationDataset):
IGNORE_LABELS = tuple(set(range(41)) - set(VALID_CLASS_IDS))
IS_FULL_POINTCLOUD_EVAL = True

# If trainval.txt does not exist, copy train.txt and add contents from val.txt
DATA_PATH_FILE = {
DatasetPhase.Train: 'scannetv2_train.txt',
DatasetPhase.Val: 'scannetv2_val.txt',
DatasetPhase.TrainVal: 'trainval_uncropped.txt',
DatasetPhase.Test: 'scannetv2_test.txt'
DatasetPhase.Train: 'train.txt',
DatasetPhase.Val: 'val.txt',
DatasetPhase.Val: 'trainval.txt',
DatasetPhase.Test: 'test.txt'
}

def __init__(self,
Expand Down
4 changes: 2 additions & 2 deletions lib/datasets/synthia.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def __init__(self,
phase = str2datasetphase_type(phase)
if phase not in [DatasetPhase.Train, DatasetPhase.TrainVal]:
self.CLIP_BOUND = self.TEST_CLIP_BOUND
data_root = config.synthia_online_path
data_root = config.synthia_path
data_paths = read_txt(osp.join(data_root, self.DATA_PATH_FILE[phase]))
data_paths = [d.split()[0] for d in data_paths]
logging.info('Loading {}: {}'.format(self.__class__.__name__, self.DATA_PATH_FILE[phase]))
Expand Down Expand Up @@ -191,7 +191,7 @@ def __init__(self,
phase = str2datasetphase_type(phase)
if phase not in [DatasetPhase.Train, DatasetPhase.TrainVal]:
self.CLIP_BOUND = self.TEST_CLIP_BOUND
data_root = config.synthia_online_path
data_root = config.synthia_path
data_paths = read_txt(osp.join(data_root, self.DATA_PATH_FILE[phase]))
data_paths = sorted([d.split()[0] for d in data_paths])
seq2files = defaultdict(list)
Expand Down
6 changes: 0 additions & 6 deletions models/resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,6 @@ def space_n_time_m(n, m):

def weight_initialization(self):
for m in self.modules():
if isinstance(m, ME.MinkowskiConvolution):
ME.utils.kaiming_normal_(m.kernel, mode='fan_out', nonlinearity='relu')

if isinstance(m, ME.MinkowskiConvolutionTranspose):
ME.utils.kaiming_normal_(m.kernel, mode='fan_in', nonlinearity='relu')

if isinstance(m, ME.MinkowskiBatchNorm):
nn.init.constant_(m.bn.weight, 1)
nn.init.constant_(m.bn.bias, 0)
Expand Down

0 comments on commit 4b1315c

Please sign in to comment.