-
Notifications
You must be signed in to change notification settings - Fork 0
/
01_trafficDataRaw.py
51 lines (41 loc) · 1.28 KB
/
01_trafficDataRaw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import _pickle as pickle
import numpy as np
from collections import defaultdict
raw_speed_file = "./date/traffic_speed_sub-dataset"
gps_data_file = "./date/link_gps"
raw_data_file = "./clean/raw_speed_gps.pkl"
gps_data = {}
# raw_data = defaultdict(lambda : {'speed': np.zeros(5856)})
raw_data = {}
with open(gps_data_file, 'r') as f:
for line in f:
sp = line.split()
id = sp[0].strip()
gps1 = float(sp[1].strip())
gps2 = float(sp[2].strip())
gps = np.zeros(2)
gps[0] = gps1
gps[1] = gps2
gps_data[id] = gps
not_have_gps = []
with open(raw_speed_file, 'r') as f:
for line in f:
sp = line.split(',')
id = sp[0].strip()
idx = int(sp[1].strip())
speed = float(sp[2].strip())
data_dict = {}
if id not in raw_data:
data_dict = {'speed': np.zeros(5856)}
raw_data[id] = data_dict
else:
data_dict = raw_data[id]
data_dict['speed'][idx] = speed
if id not in gps_data:
not_have_gps.append(id)
else:
data_dict['gps'] = gps_data[id]
print(len(raw_data))
print(len(not_have_gps))
with open(raw_data_file , 'wb') as f:
pickle.dump(raw_data, file=f)