-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrajGenerate.py
439 lines (381 loc) · 20 KB
/
trajGenerate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
import random
import pickle
import pandas as pd
GRIDSIZE_ROW = 100
GRIDSIZE_COL = 100
class GridMap():
"""
GridMap class contains the information of grid-map of a given area.
each gridMap includes a type of travel mode (e.g. expressway, railway, highway, etc.)
"""
def __init__(self, row: int, col: int, infos: list):
self.row = row
self.col = col
self.route_infomap = [[set() for j in range(self.col)] for i in range(self.row)]
for info in infos:
for cell in info['route']:
if info['id'] not in self.route_infomap[cell[0]][cell[1]]:
self.route_infomap[cell[0]][cell[1]].add(info['id'])
self.mode_infomap = [[set() for j in range(self.col)] for i in range(self.row)] # 0: unknown, 1: highway, 2: GSD, 3: TG
for info in infos:
for cell in info['route']:
if info['name'].startswith('H'):
self.mode_infomap[cell[0]][cell[1]].add(1)
elif info['name'].startswith('O'):
self.mode_infomap[cell[0]][cell[1]].add(2)
else:
self.mode_infomap[cell[0]][cell[1]].add(3)
def is_close(self, x:int, y:int, mode:int)->int:
"""
check if the given cell is close to the given mode,0: not close, 1: close
'close' means the cell and its 8 neighbors have the same mode,if the cell is on the edge of the map, the neighbors are less than 8
"""
neighbors = [
(x - 1, y - 1), (x - 1, y), (x - 1, y + 1),
(x, y - 1), (x, y + 1),
(x + 1, y - 1), (x + 1, y), (x + 1, y + 1)
]
for nx, ny in neighbors:
if 0 <= nx < self.row and 0 <= ny < self.col:
if mode in self.mode_infomap[nx][ny]:
return 1
return 0
def _bias(method: str, para1,para2):
"""
bias function
:param method: 'uniform' or 'normal'
:param para1: if method is 'uniform', para1 is the lower bound; if method is 'normal', para1 is the mean
:param para2: if method is 'uniform', para2 is the upper bound; if method is 'normal', para2 is the standard deviation
:return: return bias value
"""
if method == 'uniform':
return random.uniform(para1, para2)
if method == 'normal':
return random.normalvariate(para1, para2)
def _change_route(prob:float,current_grid_xy:tuple, current_route): # TODO UNFINISHED
"""
change the route according to the probability
:param prob: the probability of changing route
:param current_grid: the current grid
:param current_route: the current route
:return: the new route
"""
if random.random() < prob:
return current_route
else:
return current_route
def _allow(neighbor: int, mode: str) -> bool:
"""
check if the neighbor is allowed to travel of the given mode: static, TG, GG, GSD, TS
:param neighbor: int, element of a list of 9 elements, the 4-th element is the grid itself, the other 8 elements are the neighbors
:param mode: str, 'TG', 'GG', 'GSD', 'static'
:return: bool, True if the neighbor is allowed to travel of the given mode, False otherwise
"""
if mode == 'TG' or mode == 'static':
return neighbor>>1 & 1 == 1
elif mode == 'GG':
return neighbor>>3 & 1 == 1
elif mode =='GSD':
return neighbor>>2 & 1 == 1 or neighbor>>5 & 1 == 1
elif mode == 'TS':
return neighbor>>6 & 1 == 1 or neighbor>>1 & 1 == 1
elif mode == 'static':
return True
def _allow_change(current_gird: int, mode: str) -> bool:
"""
check if the mode can be changed at the current grid
:param current_grid: bit-int, the current grid information
:param mode: str, 'TG-GSD', 'GSD-GG', 'TS-TG'
:return: bool, True if the mode can be changed at the current grid, False otherwise
"""
if mode == 'TG-GSD' or mode == 'GSD-TG':
return current_gird & 1 == 1 and (current_gird>>2 & 1 == 1 or current_gird>>5 & 1 == 1)
elif mode == 'GSD-GG' or mode == 'GG-GSD':
return current_gird >> 4 & 1 == 1 and (current_gird>>2 & 1 == 1 or current_gird>>5 & 1 == 1)
elif mode == 'TS-TG' or mode == 'TG-TS':
return current_gird & 1 == 1 and (current_gird>>1 & 1 == 1 and current_gird>>6 & 1 == 1)
else :
return False
def _get_new_coordinates(x, y, position)->tuple:
offsets = [
(-1, 1), (0, 1), (1, 1),
(-1, 0), (0, 0), (1, 0),
(-1, -1), (0, -1), (1, -1)
]
if 0 <= position < len(offsets):
dx, dy = offsets[position]
return (x + dx, y + dy)
else:
raise ValueError("Invalid position")
def generate_traj(gridmap:GridMap ,from_map_records: list, size: int,time_interval=10) -> pd.DataFrame:
"""
Generate a cell-trajectory from a given map
"""
df = pd.DataFrame(columns = ["ID", "time", "locx", "locy", "route_id"])
# the trajectory information includes route_id which can indicate travel mode
for i in range(size):
traj_len = random.randint(5, 15)
traj_id = str(i + 1)
pre_x,pre_y = -1,-1
pre_loc_idx = -1
start_route = random.choice(from_map_records)
for j in range(traj_len):
if j == 0:
time = 0
locx, locy = start_route['route'][0]
loc_idx = 0
route_id = start_route['id']
else:
time = j * time_interval
speed = start_route['speed']
position_interval = int(speed * time_interval / 60)
rts_tuple_list = start_route['route']
loc_idx = pre_loc_idx
while position_interval > 0 and loc_idx < len(rts_tuple_list) - 1:
current_point = rts_tuple_list[loc_idx]
next_point = rts_tuple_list[loc_idx + 1]
distance = ((current_point[0] - next_point[0]) ** 2 + (
current_point[1] - next_point[1]) ** 2) ** 0.5
position_interval -= distance
loc_idx += 1
locx,locy = rts_tuple_list[loc_idx]
route_id = start_route['id']
pre_x, pre_y, pre_loc_idx = locx, locy, loc_idx
newpoint = pd.DataFrame([[traj_id,
time,
locx+int(_bias('normal',-1.5,1.5)),
locy+int(_bias('normal',-1.5,1.5)),
route_id]], columns=["ID", "time", "locx", "locy", "route_id"])
df = df._append(newpoint)
return df
def generate_traj_single(real_map, size:int, mode:str, time_interval = 6) -> pd.DataFrame:
"""
Generate a cell-trajectory from a real-world given map
1. choice the travel mode and start grid
2. dfs to generate the full travel trajectory
3. sample the simulated-trajectory with time_interval from the full trajectory
4. return the simulated-trajectory as pd.DataFrame and real-world routes as pd.DataFrame
:param real_map: from GIS, structur: { grid[x,y]: [binary * 9] # the neighbor information}]}
:param size: the size of the dataset
:param mode: the travel mode, 'TG', 'GG', 'GSD', 'TS'
:param time_interval: init==10 min
:return: pandas.DataFrame with columns = ["traj_id", "time", "locx", "locy", "travel_mode"]
"""
df = pd.DataFrame(columns = ["ID", "time", "locx", "locy", "mode"])
route_df = pd.DataFrame(columns = ["ID", "locx", "locy"])
# the trajectory information includes route_id which can indicate travel mode
i = 0
while i < size: # for each trajectory
traj_id = mode + str(i + 1)
if mode == "TG": # highspeed rail, must start from station
start , _ = random.choice([(k,v) for k,v in real_map.items() if (v[4] & 1 == 1 and v[4] >> 1 & 1 == 1)])
elif mode == "GG": #highway, must start from highway zoll
start , _ = random.choice([(k,v) for k,v in real_map.items() if (v[4]) >>4 & 1 == 1 ])
elif mode == 'GSD': # normal road
start , _ = random.choice([(k,v) for k,v in real_map.items() if (v[4]) >>2 & 1 == 1 or (v[4]) >> 5 & 1 == 1])
elif mode == 'TS': # normal railway, must start from station
start , _ = random.choice([(k,v) for k,v in real_map.items() if (v[4]) & 1 == 1])
# generate the total-trajectory timestamp record by record using dfs, the traj includes sequence of (x,y) with len==100
mode_rtslen_dict = {'TG': 300, 'GG': 200, 'GSD': 200, 'TS': 200}
expected_sample_len = mode_rtslen_dict[mode] # can be modified if necessary, this is dfs depth
cnt = 0
hashmap = set() # hashmap of (x,y)
route_list = [] # list of (x,y) in order
current_pos = start
sub_route_df = pd.DataFrame(columns=["ID", "locx", "locy"])
while cnt < expected_sample_len:
current_x, current_y = current_pos[0], current_pos[1]
sub_route_df = sub_route_df._append(pd.DataFrame([[traj_id, current_x, current_y]], columns=["ID", "locx", "locy"]))
route_list.append(current_pos)
hashmap.add(current_pos)
cnt += 1
random_shuffle = []
for idx, neighbor in enumerate(real_map[current_pos]):
if _get_new_coordinates(current_x, current_y, idx) not in hashmap and _allow(neighbor ,mode):
random_shuffle.append(idx)
if not random_shuffle: # no neighbor available, break to next trajectory
break
delta = random.choice(random_shuffle)
current_pos = _get_new_coordinates(current_x, current_y, delta)
# sample the simulated-trajectory with time_interval from the full trajectory
mode_v_dict = {'TG': 300, 'GG': 120, 'GSD': 60, 'TS': 150}
if mode in mode_v_dict:
v = mode_v_dict[mode]
else: v = 0 # station/static
mode_len_dict = {'TG': 3, 'GG': 4, 'GSD': 5, 'TS': 4}
mode_vbias_dict = {'TG': 50, 'GG': 20, 'GSD': 20, 'TS': 30}
j = 0
timestamp = 0
traj_len = 0
# with the hypothesis that the route length in each grid is 1 km, this can be proved by real-data distribution
sub_df = pd.DataFrame([[traj_id, timestamp,route_list[0][0],route_list[0][1],mode]],columns=["ID", "time", "locx", "locy", "mode"])
while j < len(route_list):
timestep = time_interval + _bias('normal',0,1)
timestamp += timestep
spacestep = int(v + _bias('uniform', - mode_vbias_dict[mode], mode_vbias_dict[mode])) * timestep / 60
traj_len += 1
j = int(j+ spacestep)
locx, locy = route_list[min(j, len(route_list)-1)][0], route_list[min(j, len(route_list)-1)][1]
newpoint = pd.DataFrame([[traj_id,
timestamp,
locx + int(_bias('uniform',-1.5,1.5)),
locy + int(_bias('uniform',-1.5,1.5)),
mode]], columns=["ID", "time", "locx", "locy", "mode"])
sub_df = pd.concat([sub_df, newpoint], ignore_index=True)
# traj with enough len will be saved
if traj_len >= mode_len_dict[mode]:
df = pd.concat([df, sub_df], ignore_index=True)
route_df = pd.concat([route_df, sub_route_df], ignore_index=True)
i += 1
# else delete traj generated in this iteration
else:
del sub_df
del sub_route_df
return df, route_df
def generate_traj_mixed(real_map, size:int, mode:str, time_interval=6)-> pd.DataFrame:
"""
Generate mixed travel mode traj from a real-world given map,
TG-GSD: the traj will start from TG and change to GSD if there exist stations near the grid
GSD-TG: REVERSE
GSD-GG: the traj will start from GSD and change to GG if there exist zolls near the grid
GG-GSD: REVERSE
TS-TG: the traj will start from TS and change to TG if there exist stations near the grid
TG-TS: REVERSE
near the station
:param real_map: from GIS, structure: { grid[x,y]: [bit * 9] # the neighbor information}]}, bit zoll,GSD,GG,TG,station
:param size:
:param time_interval:
:param mode: 'TG-GSD', 'GSD-GG', 'TS-TG'
:return :
"""
df = pd.DataFrame(columns = ["ID", "time", "locx", "locy", "mode"])
route_df = pd.DataFrame(columns = ["ID", "locx", "locy"])
# the trajectory information includes route_id which can indicate travel mode
i = 0
while i < size: # for each trajectory
traj_id = mode + str(i + 1)
if mode.startswith('TG'): # start from TG station
start , _ = random.choice([(k,v) for k,v in real_map.items() if (v[4] & 1 == 1 and v[4] >>1 & 1 == 1)])
elif mode.startswith("GSD"): # start from GSD
start , _ = random.choice([(k,v) for k,v in real_map.items() if (v[4] >> 2 & 1 == 1 or v[4] >>5 & 1 == 1)])
elif mode.startswith("GG"): # start from GG zoll
start , _ = random.choice([(k,v) for k,v in real_map.items() if (v[4] >> 3 & 1 == 1 and v[4] >>4 & 1 == 1)])
elif mode.startswith("TS"): # start from TS station
start , _ = random.choice([(k,v) for k,v in real_map.items() if (v[4] & 1 == 1 and v[4] >>6 & 1 == 1)])
# generate the total-trajectory timestamp record by record using dfs, the traj includes sequence of (x,y) with len==100
expected_sample_len = 200
cnt = 0
hashmap = set() # hashmap of (x,y)
route_list = [] # list of (x,y) in order
current_pos = start
changed = False # if the mode has been changed
next_mode = mode.split('-')[1]
former_mode = mode.split('-')[0]
current_mode = mode.split('-')[0]
change_position = (-1, -1) # the position where the mode will be changed, if not changed, the position is (-1,-1)
# to get a total route with expected_sample_len, including 1 mode change
while cnt < expected_sample_len:
current_x, current_y = current_pos[0], current_pos[1]
route_df = route_df._append(pd.DataFrame([[traj_id, current_x, current_y]], columns=["ID", "locx", "locy"]))
route_list.append(current_pos)
hashmap.add(current_pos)
cnt += 1
random_shuffle = []
# if the mode has not been changed
if not changed and _allow_change(real_map[current_pos][4], mode) and cnt>80 and random.random() < 0.8:
changed = True
current_mode = next_mode
change_position = (current_x, current_y)
for idx, neighbor in enumerate(real_map[current_pos]):
# if neighbor is not visited and allowed to travel
if _get_new_coordinates(current_x, current_y, idx) not in hashmap and _allow(neighbor ,current_mode):
random_shuffle.append(idx)
if not random_shuffle:
break
delta = random.choice(random_shuffle)
current_pos = _get_new_coordinates(current_x, current_y, delta)
if change_position == (-1,-1): # no mode change, restart
continue
change_idx = route_list.index(change_position)
# sample the simulated-trajectory with time_interval from the full trajectory
mode_v_dict = {'TG': 300, 'GG': 120, 'GSD': 60, 'TS': 150}
if current_mode in mode_v_dict:
current_v = mode_v_dict[current_mode]
else: current_v = 0 # station/static
if next_mode in mode_v_dict:
next_v = mode_v_dict[next_mode]
else: next_v = 0 # station/static
mode_vbias_dict = {'TG': 50, 'GG': 20, 'GSD': 20, 'TS': 30}
j = 0
timestamp = 0
traj_len = 0
# sample traj with time_interval from the full trajectory, former part with current_mode, latter part with next_mode
sub_df = pd.DataFrame(columns=["ID", "time", "locx", "locy", "mode"])
while j < change_idx:
timestep = time_interval + _bias('normal',0,1)
timestamp += timestep
spacestep = int(current_v + _bias('uniform', - mode_vbias_dict[former_mode], mode_vbias_dict[former_mode])) * timestep / 60
traj_len += 1
j = int(min(j + spacestep, change_idx))
locx, locy = route_list[j][0], route_list[j][1]
newpoint = pd.DataFrame([[traj_id,
timestamp,
locx + int(_bias('uniform',-1.5,1.5)),
locy + int(_bias('uniform',-1.5,1.5)),
former_mode]], columns=["ID", "time", "locx", "locy", "mode"])
sub_df = pd.concat([sub_df, newpoint], ignore_index=True)
# the record of station stop, 4 min for TG and 7 min for TS
timestamp += 4 if former_mode == 'TG' else 7
sub_df = pd.concat( [sub_df, pd.DataFrame([[traj_id,
timestamp,
route_list[change_idx][0] + int(_bias('uniform',-1.5,1.5)),
route_list[change_idx][1] + int(_bias('uniform',-1.5,1.5)),
0]], columns=["ID", "time", "locx", "locy", "mode"]) ]
, ignore_index=True)
# latter part
while j < len(route_list)-1:
timestep = time_interval + _bias('normal',-1,1)
timestamp += timestep
spacestep = int(next_v + _bias('uniform', - mode_vbias_dict[next_mode], mode_vbias_dict[next_mode])) * timestep / 60
traj_len += 1
j = int(min(j + spacestep, len(route_list)-1))
locx, locy = route_list[j][0], route_list[j][1]
newpoint = pd.DataFrame([[traj_id,
timestamp,
locx + int(_bias('uniform',-1.5,1.5)),
locy + int(_bias('uniform',-1.5,1.5)),
next_mode]], columns=["ID", "time", "locx", "locy", "mode"])
sub_df = sub_df._append(newpoint)
if traj_len >= 6:
df = pd.concat([df, sub_df], ignore_index=True)
i += 1
else:
del sub_df
return df, route_df
# generate traj with route
#with open('data/artificial_network.pkl', 'rb') as f:
# artificial_net = pickle.load(f)
# artificial_map = GridMap(GRIDSIZE_ROW,GRIDSIZE_COL,artificial_net)
# traj = generate_traj(artificial_map,artificial_net, size=400)
# traj.to_csv('data/artificial_traj.csv', index=False)
# generate traj with grid
with open('data/GridModesAdjacentRes.pkl', 'rb') as f:
real_map_data = pickle.load(f)
traj_GG, rts_GG = generate_traj_single(real_map_data, size=200, mode='GG')
traj_GSD, rts_GSD = generate_traj_single(real_map_data, size=200, mode='GSD')
traj_TS, rts_TS = generate_traj_single(real_map_data, size=200, mode='TS')
traj_TG, rts_TG = generate_traj_single(real_map_data, size=200, mode='TG')
rts_single = pd.concat([rts_GG, rts_GSD, rts_TS, rts_TG])
traj_single = pd.concat([traj_GG, traj_GSD, traj_TS, traj_TG])
traj_single.to_csv('data/artificial_traj_mixed_single.csv', index=False)
rts_single.to_csv('data/artificial_rts_mixed_single.csv', index=False)
traj_TG_GSD ,rts_TG_GSD= generate_traj_mixed(real_map_data, size=50, mode='TG-GSD')
traj_GSD_TG ,rts_GSD_TG = generate_traj_mixed(real_map_data, size=50, mode='GSD-TG')
traj_TS_TG ,rts_TS_TG = generate_traj_mixed(real_map_data, size=50, mode='TS-TG')
traj_TG_TS ,rts_TG_TS = generate_traj_mixed(real_map_data, size=50, mode='TG-TS')
traj_GSD_GG ,rts_GSD_GG = generate_traj_mixed(real_map_data, size=50, mode='GSD-GG')
traj_GG_GSD ,rts_GG_GSD = generate_traj_mixed(real_map_data, size=50, mode='GG-GSD')
rts_mult = pd.concat([rts_TG_GSD, rts_GSD_TG, rts_TS_TG, rts_TG_TS,rts_GSD_GG, rts_GG_GSD])
traj_mult = pd.concat([traj_TG_GSD, traj_GSD_TG, traj_TS_TG, traj_TG_TS, traj_GSD_GG, traj_GG_GSD])
traj_mult.to_csv('data/artificial_traj_mixed_mult.csv', index=False)
rts_mult.to_csv('data/artificial_rts_mixed_mult.csv', index=False)