-
Notifications
You must be signed in to change notification settings - Fork 1
/
project.py
217 lines (189 loc) · 7.83 KB
/
project.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
from copy import deepcopy
import geojson
import geopy.distance
import shapely.geometry
from geoql import geoql
import geoleaflet
import xlsxwriter
import rtree
import pickle
import numpy as np
import networkx
from sklearn.cluster import KMeans
from tqdm import tqdm
# Want a function that takes as input
# set of geojson points, set of geojson linestrings
# points is a list of (x, y) coordinates
# linestrings is a list of geojson linestrings representing streets
def project(x, v, w):
'''
Compute projection of x onto line between v and w.
'''
(x, v, w) = (np.array(x), np.array(v), np.array(w))
d = w - v
u = x - v
return v + (u.dot(d) / d.dot(d)) * d
def project_point_onto_segment(x, v, w):
(x, v, w) = (np.array(x), np.array(v), np.array(w))
p = project(x, v, w)
pv = p - v
pw = p - w
# The vectors pv and pw face in opposite directions if the dot product is negative.
if pv.dot(pw) <= 0:
return p
elif np.dot(pv, pv) <= np.dot(pw, pw): # The point p is not on the segment.
return v
else: # Distance from the point to w is smaller.
return w
def normal(x, v, w):
(x, v, w) = (np.array(x), np.array(v), np.array(w))
return project_point_onto_segment(x, v, w) - x
def features_to_rtree(obj):
'''
Takes GeoJSON FeatureCollection of LineString entries and constructs
a corresponding R-tree.
'''
tree = rtree.index.Index()
tree_keys = {}
i = 0
for (j, lstr) in enumerate(obj.features):
for p in lstr.geometry.coordinates:
tree_keys[str(i)] = j
(x, y) = (p[0], p[1])
tree.insert(i, (x, y, x, y))
i += 1
return (tree, tree_keys)
def find_intersection(obj, tree, tree_keys, p, r):
''' Finds all points in the rtree tree in the bounding box centered on p with
radius r '''
lat, lon = p
result = set()
for i in tqdm(list(tree.intersection((lat-r, lon-r, lat+r, lon+r)))):
result.add(tree_keys[str(i)])
result = list(result)
result.sort()
obj.features = [obj.features[j] for j in result]
return obj
# UNFINISHED
def project_points_to_linestrings(points, linestrings):
# Todo: Implement rtrees to find line points within certain distance.
projections = []
(tree, tree_keys) = features_to_rtree(linestrings)
for (lat, lon) in tqdm(points):
p = np.array([lat, lon])
lstr_copy = deepcopy(linestrings)
lstr_copy = find_intersection(lstr_copy, tree, tree_keys, p, 0.01)
lstr_copy = geoql.features_keep_within_radius(lstr_copy, [lon,lat], 0.5, 'miles')
min_proj = (10000, [0,0])
for lstr in lstr_copy.features:
segments = lstr.geometry.coordinates
for i in range(len(segments)-1):
if np.linalg.norm(np.array(segments[i+1]) - np.array(segments[i])) == 0:
continue
norm = normal(p, segments[i], segments[i+1])
dist = norm.dot(norm)
if dist < min_proj[0]:
proj = project_point_onto_segment(p, segments[i], segments[i+1])
min_proj = [dist, proj, np.array(segments[i]), np.array(segments[i+1])]
projections.append(min_proj)
return [p[1:] for p in projections]
def load_road_segments(fname):
linestrings = geojson.loads(open(fname, 'r').read())
linestrings.features = [seg for seg in linestrings.features if seg.type=='Feature']
return linestrings
def to_networkx(graph_ql):
'''
Creates and returns a GeoJSON graph generated by the geoql function
node_edge_graph into a networkx representation.
'''
G = networkx.Graph()
for j, feature in enumerate(graph_ql.features):
if feature.type == "Point":
G.add_node(tuple(feature.coordinates))
elif feature.type == "Feature":
coords = [tuple(c) for c in feature.geometry.coordinates]
for i in range(len(coords)-1):
G.add_edge(coords[i], coords[i+1], index=j)
return G
def find_connected_segment_indices(obj):
G = to_networkx(obj)
G = nx.subgraph(G, max(nx.connected_components(G), key=lambda x:len(x)))
indices = set()
for v0, v1 in G.edges():
indices.add(G[v0][v1]['index'])
obj.features = [obj.features[i] for i in indices]
return obj
def generate_student_stops(student_features, numStops=5000, loadFrom=None):
# We assume that the order of stops will not change at any point.
# We don't want to do anything to d2d stops.
#d2d_stops = [f['geometry']['coordinates'][0]
# for f in student_features['features']
# if f['properties']['pickup'] == 'd2d']
d2d_stops = [f for f in student_features['features'] if f['properties']['pickup'] == 'd2d']
# load student coordinates from students datafile to list of coordinates
corner_students = [feature['geometry']['coordinates'][0]
for feature in student_features['features']
if feature['properties']['pickup'] == 'corner']
# Get means from pickle or generate.
if loadFrom:
k_fit = pickle.load(open(loadFrom, 'rb'))
corner_stops = k_fit['corner_stops']
labels = k_fit['labels']
else:
# Generate means for corner students.
kmeans = KMeans(n_clusters=numStops-len(d2d_stops), random_state=0)
k_fit = kmeans.fit(corner_students)
corner_stops = k_fit.cluster_centers_
labels = k_fit.labels_
# Write kmeans results to a file.
with open('output/kmeans', 'wb') as f:
f.write(pickle.dumps({'corner_stops': corner_stops, 'labels': labels}))
print('K-means output written.', flush=True)
# Get LineString entries from road segment data.
linestrings = load_road_segments('input/road-network-extract-missing.geojson')
linestrings = find_connected_segment_indices(linestrings)
projected_corner_stops = project_points_to_linestrings(corner_stops, linestrings)
return list(d2d_stops), list(projected_corner_stops)
def seperate_stops_by_school(stops, students, labels) :
# Create a duplicate stop for every student going to the same school
schools_per_stop = {stop:set() for stop in range(len(stops['features']))}
new_features = []
for i in range(len(labels)):
school = students[i]['properties']['school']
school_coords = students[i]['geometry']['coordinates'][1]
schools_per_stop[labels[i]].add((school, tuple(school_coords)))
for stop_idx in schools_per_stop:
for school, school_coords in schools_per_stop[stop_idx]:
feature = deepcopy(stops['features'][stop_idx])
feature['geometry']['coordinates'][1] = school_coords
feature['properties']['school'] = school
new_features.append(feature)
stops.features = new_features
return stops
corner = pickle.loads(open('corner', 'rb').read())
students = geojson.loads(open('output/students.geojson', 'rb').read())
students = [f for f in students['features'] if f['properties']['pickup'] == 'corner']
labels = pickle.loads(open('output/kmeans', 'rb').read())['labels']
features = []
for stop in corner:
p, l1, l2 = stop
# -1 is a placeholder value
feature = geojson.Feature(geometry=geojson.LineString([p, (-1,-1)]), properties={'projection':[l1, l2]})
features.append(feature)
stops = geojson.feature.FeatureCollection(features)
stops = seperate_stops_by_school(stops, students, labels)
with open('corner', 'wb') as f:
f.write(pickle.dumps(stops))
'''
import time
student_features = geojson.loads(open('output/students.geojson', 'r').read())
start = time.time()
d2d, corner = generate_student_stops(student_features, loadFrom='output/kmeans')
end = time.time()
all_stops = seperate_stops_by_school(stops, students, labels)
with open('output/timelog', 'w') as f:
f.write(str(end-start))
'''
#with open('output/stops', 'wb') as f:
# f.write(pickle.dumps(stops))
#run()