-
Notifications
You must be signed in to change notification settings - Fork 6
/
1-loader.py
274 lines (214 loc) · 10.2 KB
/
1-loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
# -*- coding: utf-8 -*-
"""
@author: Joao
Objetive:
Gathers the data of each scenario in a numpy file.
Each scenario has a CSV file indicating which files are associated with
each unit and each modality (mmWave power, GPS data, ...).
For each scenario, unit and modality, this script accesses every path
specified in the CSV, reads the files, and concatenates the information
of each sample in a convinient numpy array.
Note: this script only gathers all the existing data. It does not change
the data in any way.
Script inputs:
data_folder: folder where the data for scenario is.
Use a path relative to the current working directory (cwd)
E.g. 'ViWiReal Scenarios' if *cwd*/ViWiReal Scenarios
scenario_idx: index of scenario. E.g. 5 corresponds to 'Scenario5_Tyler'
max_samples: read at most X samples from the scenario.
n_pos_vals: number of values per position sample
n_pwr_vals: number of values per mmWave power measurement sample
output_folder: relative path to where to output the processed CSV file.
Script outputs:
- a CSV file with the position and power data for a given scenario
"""
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.image as mplimage
# Inputs:
# Input and output data folders (absolute paths)
data_folder = r'E:\DeepSense-scenarios' # os.path.join(os.getcwd(), 'DeepSense')
output_folder = os.path.join(os.getcwd(), 'Gathered_data3')
scenario_idx = 1
unit = 2
first_sample = 1 # 1 to start on the first
max_samples = int(1e5) #9
modalities = ['loc', 'loc-cal', 'pwr_60ghz'] # Select multiple modalities!
labels = ['seq_index']
# Available modalities:
# - 'loc': Localization
# - 'pwr_60ghz': Power
# - 'rgb': RGB Images
# - 'lidar': Lidar data
# - 'radar': Radar data
#%% Set constants
CAM_DIMENSIONS = {'cam1': (540,960,3),
'cam2': (720,1280,3)}
CAMS_PER_SCENARIO = ['cam1'] * 15 + ['cam2'] * 1 + ['cam1'] * 7
PWRS_PER_SCENARIO = [(64,1)] * 15 + [(1,1)] * 1 + [(64,1)] * 7
DATA_DIMENSIONS = {'loc': (2, 1),
'loc_cal': (2, 1),
'pwr_60ghz': PWRS_PER_SCENARIO[scenario_idx-1],
'rgb': CAM_DIMENSIONS[CAMS_PER_SCENARIO[scenario_idx-1]],
'lidar': (460, 2),
'radar': (4, 256, 128),
'seq_index': (1, 1),
'blockage_label': (1, 1),
'bbox': (2,4)}
SUPPORTED_MODALITIES_PER_UNIT_PER_SCENARIO = \
[{1: ['loc', 'pwr_60ghz', 'rgb'], 2: ['loc']}, # 1
{1: ['loc', 'pwr_60ghz', 'rgb'], 2: ['loc']}, # 2
{1: ['loc', 'pwr_60ghz', 'rgb'], 2: ['loc', 'loc_cal']}, # 3
{1: ['loc', 'pwr_60ghz', 'rgb'], 2: ['loc', 'loc_cal']}, # 4
{1: ['loc', 'pwr_60ghz', 'rgb'], 2: ['loc']}, # 5
{1: ['loc', 'pwr_60ghz', 'rgb'], 2: ['loc']}, # 6
{1: ['loc', 'pwr_60ghz', 'rgb'], 2: ['loc']}, # 7
{1: ['loc', 'pwr_60ghz', 'rgb', 'lidar'], 2: ['loc', 'loc_cal']}, # 8
{1: ['loc', 'pwr_60ghz', 'rgb', 'lidar', 'radar'], 2: ['loc', 'loc_cal']}, # 9
{1: ['pwr_60ghz', 'rgb']}, # 10
{1: ['pwr_60ghz', 'rgb']}, # 11
{1: ['pwr_60ghz', 'rgb']}, # 12
{1: ['loc', 'pwr_60ghz', 'rgb']}, # 13
{1: ['loc', 'pwr_60ghz', 'rgb'], 2: ['loc']}, # 14
{1: ['loc', 'pwr_60ghz', 'rgb']}, # 15
{1: ['pwr_60ghz', 'rgb', 'blockage']}, # 16
{1: ['loc', 'pwr_60ghz', 'rgb']}, # 17
{1: ['loc', 'pwr_60ghz', 'rgb']}, # 18
{1: ['loc', 'pwr_60ghz', 'rgb']}, # 19
{1: ['pwr_60ghz', 'rgb']}, # 20
{1: ['loc', 'pwr_60ghz', 'rgb']}, # 21
{1: ['loc', 'pwr_60ghz', 'rgb']}, # 22
{1: ['loc', 'pwr_60ghz', 'rgb'],
2: ['loc', 'speed', 'altitude', 'distance', 'height',
'x-speed', 'y-speed', 'z-speed', 'pitch', 'roll']}, # 23
]
SUPPORTED_FORMATS = ['.npy', '.mat']
SUPPORTED_UNITS = SUPPORTED_MODALITIES_PER_UNIT_PER_SCENARIO[scenario_idx-1].keys()
if unit not in SUPPORTED_UNITS:
raise Exception(f"Unit '{unit}' not supported for scenario {scenario_idx}. "
f"Supported units are: {SUPPORTED_UNITS}.")
SUPPORTED_MODALITIES = \
SUPPORTED_MODALITIES_PER_UNIT_PER_SCENARIO[scenario_idx-1][unit]
if modalities == 'all':
modalities = SUPPORTED_MODALITIES
allowed_modalities = []
for mod in modalities:
if mod in SUPPORTED_MODALITIES:
allowed_modalities.append(mod)
else:
print(f"WARNING: Modality '{mod}' not supported for unit {unit} in "
f"scenario {scenario_idx}. Removing from the modalities list...")
SUPPORTED_LABELS_PER_SCENARIO = [
['seq_index'], # Scenario 1
['seq_index'], # Scenario 2
['seq_index'], # Scenario 3
['seq_index'], # Scenario 4
['seq_index'], # Scenario 5
['seq_index'], # Scenario 6
['seq_index'], # Scenario 7
['seq_index'], # Scenario 8
['seq_index'], # Scenario 9
['bbox'], # Scenario 10
['bbox'], # Scenario 11
['bbox'], # Scenario 12
['seq_index', 'bbox'], # Scenario 13
['seq_index', 'bbox'], # Scenario 14
['seq_index', 'bbox'], # Scenario 15
['seq_index', 'unit1_blockage'], # Scenario 16
['seq_index', 'unit1_blockage'], # Scenario 17
['seq_index', 'unit1_blockage'], # Scenario 18
['seq_index', 'unit1_blockage'], # Scenario 19
['seq_index', 'unit1_blockage'], # Scenario 20
['seq_index', 'unit1_blockage'], # Scenario 21
['seq_index', 'unit1_blockage'], # Scenario 22
['seq_index'], # Scenario 23
]
# Finally, some useful and constant strings
UNIT_STR = 'unit' + str(unit) + '_'
#%% Task A - Get the path to the csv of the target scenario
# Task A.1 - list all scenarios in the data folder
all_files_in_data_folder = os.listdir(data_folder)
# Task A.2 - obtain the all scenario directories
# (They start with "Scenario", as in "Scenario5_Tyler")
all_scenario_dir = [i for i in all_files_in_data_folder
if i[:8].lower() == 'scenario']
# Task A.3 - select the scenario we want to read
scenario_dir = [scen_dir for scen_dir in all_scenario_dir
if scen_dir.split('_')[0].lower() == 'scenario' + str(scenario_idx)][0]
print(f'Opening folder: {scenario_dir}')
scen_path = os.path.join(data_folder, scenario_dir)
# Task A.4 - select the csv inside that directory: starts with "scenarioX",
# where X is the scenario_index
csv_file = [file for file in os.listdir(scen_path)
if file == 'scenario' + str(scenario_idx) + '.csv'][0]
print(f'Reading: {csv_file}')
csv_path = os.path.join(scen_path, csv_file)
#%% Task B: Open CSV with the files of each sample and load relevant data
# Task B.1 - Load csv containing the files for each sample
dataframe = pd.read_csv(csv_path)
print(f'Columns: {dataframe.columns.values}')
n_rows = dataframe.shape[0]
print(f'Number of Rows: {n_rows}')
# Task B.2 - Load data from each file
n_samples = min(max_samples, n_rows)
last_sample = n_samples
print(f'Reading {n_samples} samples... ', end='')
problematic_samples = []
data = {}
for mod in allowed_modalities:
data[mod] = np.squeeze(np.zeros((n_samples, *DATA_DIMENSIONS[mod])))
if mod == 'radar':
data[mod] = data[mod].astype(np.complex64)
files = dataframe[UNIT_STR + mod].values
sample_range = np.arange(first_sample-1, last_sample)
print(f'Reading modality: {mod}')
for i in tqdm(sample_range):
try:
file_parts = files[i].split('/')[1:]
except AttributeError:
print('Path is improperly formated. Rectify CSV.')
problematic_samples.append(i)
file_path = os.path.join(scen_path, *file_parts)
new_idx = np.where(sample_range == i)
if mod in ['loc', 'loc_cal', 'pwr_60ghz']:
data[mod][new_idx] = np.loadtxt(file_path)
if mod == 'rgb':
data[mod][new_idx] = mplimage.imread(file_path)
if mod in ['lidar', 'radar']:
data[mod][new_idx] = np.load(file_path)
if problematic_samples:
print(problematic_samples)
raise Exception('Cannot read some samples.')
problematic_samples = []
label_data = {}
for label in labels:
label_data[label] = np.squeeze(np.zeros((n_samples, *DATA_DIMENSIONS[label])))
# sample_range = np.arange(first_sample-1, last_sample)
# Different labels have different ways of being read
if label in ['seq_index']:
label_data[label] = dataframe[label].values
# if label in ['blockage_label']:
# label_data[label] = dataframe[label].values
# if label in ['bbox']:
# for i in tqdm(sample_range):
# label_data[label][ = dataframe[label].values
#%% Task C: Create output folder and get an output path
if not os.path.isdir(output_folder):
os.makedirs(output_folder)
for mod in allowed_modalities:
output_file = ('scenario' + str(scenario_idx) + '_' +
UNIT_STR + mod + '_' + str(first_sample) + '-' +
str(last_sample))
output_path = os.path.join(output_folder, output_file)
print(f'Writing to {output_path}... ', end='')
np.save(output_path, data[mod])
for label in labels:
output_file = ('scenario' + str(scenario_idx) + '_' +
label + '_' + str(first_sample) + '-' +
str(last_sample))
output_path = os.path.join(output_folder, output_file)
print(f'Writing to {output_path}... ', end='')
np.save(output_path, label_data[label])
print('Done.')