-
Notifications
You must be signed in to change notification settings - Fork 11
/
SDDPreprocess.py
128 lines (97 loc) · 5.26 KB
/
SDDPreprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import pathlib
import numpy as np
import pandas as pd
import math
from tqdm import tqdm
import json
# =================================================================================================
# This script is used to convert the SDD data used by DAG net to the ETH/UCY dataformat.
# We already provide the converted data in ./datasets/sdd
# If you want to convert the data by yourself, first get the DAG version of SDD data from (https://github.com/alexmonti19/dagnet/blob/master/datasets/README.md).
# Then, set the variable DATASET_DIR to the sdd data location and run this script.
# =================================================================================================
DATASET_DIR = pathlib.Path('/path/to/SDD-DAG-Version/')
SDD_RAW_DIR = DATASET_DIR / 'all_data'
SDD_NPY_DIR = DATASET_DIR / 'sdd_npy'
save_root = 'datasets/sdd/'
def split():
os.makedirs(os.path.join(save_root, 'train'), exist_ok=True)
os.makedirs(os.path.join(save_root, 'val'), exist_ok=True)
os.makedirs(os.path.join(save_root, 'test'), exist_ok=True)
stats_file_full_path = DATASET_DIR.absolute() / 'stats.txt'
stats_file = open(stats_file_full_path, 'w+')
scenes_limits = {}
all_files = [file_path for file_path in SDD_RAW_DIR.iterdir()]
file_data = []
for file in tqdm(all_files, desc='Reading files'):
dname = file.name.split(".")[0]
with open(file, 'r') as f:
lines = []
for line in f:
line = line.strip().split(" ")
line = [int(line[0]), int(line[1]), float(line[2]), float(line[3])]
lines.append(np.asarray(line))
file_data.append((dname, np.stack(lines)))
training_split, test_split, validation_split, all_data = [], [], [], []
for file in tqdm(file_data, desc='Preprocessing files'):
scene_name = file[0]
data = file[1]
data_per_file = []
# Frame IDs of the frames in the current dataset
frame_list = np.unique(data[:, 0]).tolist()
min_x, max_x = data[:, 2].min().item(), data[:, 2].max().item()
min_y, max_y = data[:, 3].min().item(), data[:, 3].max().item()
range_x = np.round(np.abs(max_x - min_x), decimals=3)
range_y = np.round(np.abs(max_y - min_y), decimals=3)
stats_file.write('SCENE {}:\nmin_x: {}, max_x: {}\nmin_y: {}, max_y: {}\nrange_x: {}, range_y: {}\n\n\n'.
format(scene_name, min_x, max_x, min_y, max_y, range_x, range_y))
# round down min(s) to 1 decimal place
min_x = math.floor(min_x * 10) / 10.0
min_y = math.floor(min_y * 10) / 10.0
# round up max(s) to 1 decimal place
max_x = math.ceil(max_x * 10) / 10.0
max_y = math.ceil(max_y * 10) / 10.0
scenes_limits[scene_name] = {'x_min': min_x, 'x_max': max_x, 'y_min': min_y, 'y_max': max_y}
for frame in frame_list:
# Extract all pedestrians in current frame
ped_in_frame = data[data[:, 0] == frame, :]
data_per_file.append(ped_in_frame)
# all_data.append((scene_name, np.concatenate(data_per_file)))
n_trjs = len(data_per_file)
# 70% --> training
# training_split.append((scene_name, np.concatenate(data_per_file[: math.ceil(n_trjs * 0.7)])))
train_df = pd.DataFrame(np.concatenate(data_per_file[:math.ceil(n_trjs*0.7)]),
columns=['frame_id', 'pedestrian_id', 'x', 'y'])
# 10% --> validation
# 20% --> test
validation_test_split = data_per_file[math.ceil(n_trjs*0.7):]
# validation_split.append((scene_name, np.concatenate(validation_test_split[:math.ceil(n_trjs * 0.1)])))
val_df = pd.DataFrame(np.concatenate(validation_test_split[:math.ceil(n_trjs * 0.1)]),
columns=['frame_id', 'pedestrian_id', 'x', 'y'])
# test_split.append((scene_name, np.concatenate(validation_test_split[math.ceil(n_trjs*0.1):])))
test_df = pd.DataFrame(np.concatenate(validation_test_split[math.ceil(n_trjs * 0.1):]),
columns=['frame_id', 'pedestrian_id', 'x', 'y'])
train_df.to_csv(os.path.join(save_root, 'train', '{}.txt'.format(scene_name)),
sep='\t', header=False, index=False)
val_df.to_csv(os.path.join(save_root, 'val', '{}.txt'.format(scene_name)),
sep='\t', header=False, index=False)
test_df.to_csv(os.path.join(save_root, 'test', '{}.txt'.format(scene_name)),
sep='\t', header=False, index=False)
print('Splitting...')
SDD_NPY_DIR.mkdir(exist_ok=True, parents=True)
print('Saving splits...')
np.save('{}/{}'.format(SDD_NPY_DIR, 'all_data.npy'), np.asarray(all_data))
np.save('{}/{}'.format(SDD_NPY_DIR, 'train.npy'), np.asarray(training_split))
np.save('{}/{}'.format(SDD_NPY_DIR, 'test.npy'), np.asarray(test_split))
np.save('{}/{}'.format(SDD_NPY_DIR, 'validation.npy'), np.asarray(validation_split))
print('Saving dataset stats...')
stats_file.close()
limits_json_full_path = DATASET_DIR.absolute() / 'limits.json'
with open(limits_json_full_path, 'w+') as outfile:
json.dump(scenes_limits, outfile, indent=2)
def main():
split()
print('All done.')
if __name__ == '__main__':
main()