-
Notifications
You must be signed in to change notification settings - Fork 8
/
function-clean_bylaws_text.sql
340 lines (317 loc) · 13.2 KB
/
function-clean_bylaws_text.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
--First create a table
CREATE TABLE IF NOT EXISTS gis.cleaned_bylaws_text (
bylaw_id int,
highway2 text,
btwn1 text,
direction_btwn1 text,
metres_btwn1 float,
btwn2 text,
direction_btwn2 text,
metres_btwn2 float,
btwn2_orig text,
btwn2_check text
);
--Then, create a function
DROP FUNCTION IF EXISTS gis._clean_bylaws_text (int, text, text, text);
CREATE OR REPLACE FUNCTION gis._clean_bylaws_text(
_bylaw_id int, highway text, frm text, t text
)
RETURNS gis.cleaned_bylaws_text
LANGUAGE 'plpgsql'
AS $$
DECLARE
highway text := gis.custom_case(_clean_bylaws_text.highway);
frm text := gis.custom_case(_clean_bylaws_text.frm);
t text := gis.custom_case(_clean_bylaws_text.t);
direction_or text := 'north|south|east|west|northeast|northwest|southwest|southeast';
--STEP 1
-- clean data
-- when the input was btwn instead of from and to
btwn1_cleaned text := regexp_replace(
regexp_replace(
regexp_replace(frm,
'[0123456789.,]* m (' || direction_or || ') of ', '', 'gi'),
'\(.*?\)', '', 'gi'),
'A point', '', 'gi');
btwn1_v1 text := CASE
WHEN t IS NULL THEN gis.abbr_street(
regexp_replace(
regexp_replace(
split_part(
split_part(btwn1_cleaned,
' to ', 1),
' and ', 1),
'\(.*\)', '', 'gi'),
'between ', '', 'gi')
)
ELSE gis.abbr_street(btwn1_cleaned)
END;
-- cases when three roads intersect at once (i.e. btwn1_v1 = Terry Dr/Woolner Ave) ... just take first street
btwn1 text := CASE WHEN btwn1_v1 LIKE '%/%'
THEN split_part(btwn1_v1, '/', 1)
WHEN btwn1_v1 LIKE '% of %' THEN split_part(btwn1_v1, ' of ', 2)
ELSE btwn1_v1
END;
btwn2_cleaned text := regexp_replace(
regexp_replace(
regexp_replace(
COALESCE(t, frm),
'\(.*?\)', '', 'gi'),
'[0123456789.,]* m (' || direction_or || ') of ', '', 'gi'),
'the (' || direction_or || ') end of', '', 'gi');
btwn2_orig_v1 text := CASE
WHEN t IS NULL THEN (
CASE WHEN split_part(
regexp_replace(frm, '\(.*?\)', '', 'gi'), ' and ', 2) <> ''
THEN gis.abbr_street(
regexp_replace(
split_part(btwn2_cleaned, ' and ', 2),
--Delete 'thereof' and some other words
'between |(A point)|(thereof)|(the northeast of)', '', 'gi')
)
WHEN split_part(frm, ' to ', 2) <> ''
THEN gis.abbr_street(
regexp_replace(
regexp_replace(
split_part(
btwn2_cleaned, ' to ', 2),
'between ', '', 'gi'),
'A point', '', 'g')
)
END
)
ELSE gis.abbr_street(
regexp_replace(btwn2_cleaned, 'the northeast of', '', 'gi')
)
END;
-- cases when three roads intersect at once (i.e. btwn2_orig_v1 = Terry Dr/Woolner Ave)
-- if there is still a '/' after cresting btwn2_orig_v1 then we know there are 3 intersections in one
btwn2_orig text := CASE
WHEN btwn2_orig_v1 LIKE '%/%'
THEN split_part(btwn2_orig_v1, '/', 1)
ELSE btwn2_orig_v1
END;
highway2 text := gis.abbr_street(highway);
direction_btwn1 text := CASE WHEN t IS NULL THEN
(
CASE
WHEN btwn1 LIKE '% m %'
OR gis.abbr_street(
regexp_replace(
split_part(
split_part(frm, ' to ', 1),
' and ', 1),
'between ', '', 'gi')
) LIKE '% m %'
THEN split_part(
split_part(
gis.abbr_street(
regexp_replace(
split_part(
split_part(frm, ' to ', 1),
' and ', 1),
'(between)|(further) ', '', 'gi')
),
' m ', 2),
' of ', 1)
ELSE NULL
END
)
ELSE (
CASE
WHEN btwn1 LIKE '% m %'
OR gis.abbr_street(frm) LIKE '% m %'
THEN regexp_replace(
split_part(
split_part(
gis.abbr_street(frm),
' m ', 2),
' of ', 1),
'further ', '', 'gi')
ELSE NULL
END
)
END;
frm_part_1 text := substring(frm, '(?<= (from)|(and) )[\S\s]+');
frm_part_2 text := substring(frm, '(?<= (and)|(to) )[\S\s]+');
direction_btwn2 text := CASE
WHEN t IS NULL THEN (
CASE WHEN btwn2_orig LIKE '% m %'
OR (
CASE WHEN frm_part_1 IS NOT NULL
THEN gis.abbr_street(
regexp_replace(
frm_part_1,
'between ', '', 'gi')
)
END
) LIKE '% m %'
THEN
(
CASE
WHEN frm_part_1 IS NOT NULL
THEN regexp_replace(
split_part(
split_part(
gis.abbr_street(
regexp_replace(
frm_part_1,
'between ', '', 'gi')
),
' m ', 2),
' of ', 1),
'further | thereof', '', 'gi')
END
)
ELSE NULL
END)
ELSE (
CASE
WHEN btwn2_orig LIKE '% m %'
OR gis.abbr_street(t) LIKE '% m %'
THEN
regexp_replace(
split_part(
split_part(
gis.abbr_street(t),
' m ', 2),
' of ', 1),
'further ', '', 'gi')
ELSE NULL
END
)
END;
metres_btwn1 float := (CASE WHEN t IS NULL THEN (
CASE WHEN
btwn1 LIKE '% m %'
OR gis.abbr_street(
regexp_replace(
split_part(
split_part(frm, ' to ', 1),
' and ', 1),
'Between ', '', 'gi')
) LIKE '% m %'
THEN regexp_replace(
regexp_replace(
split_part(
gis.abbr_street(
regexp_replace(
split_part(
split_part(frm, ' to ', 1), ' and ', 1),
'between ', '', 'gi')),
' m ' ,1),
'a point\s{0,1}', '', 'gi'),
',', '', 'gi')::float
ELSE NULL
END
)
ELSE (
CASE WHEN btwn1 LIKE '% m %'
OR gis.abbr_street(frm) LIKE '% m %'
THEN regexp_replace(
regexp_replace(
split_part(gis.abbr_street(frm), ' m ' ,1),
'a point\s{0,1}', '', 'gi'),
',', 'gi')::float
ELSE NULL
END
)
END)::float;
metres_btwn2 float := (
CASE WHEN t IS NULL THEN
( CASE WHEN btwn2_orig LIKE '% m %' OR
(
CASE WHEN frm_part_2 IS NOT NULL
THEN gis.abbr_street(
regexp_replace(
regexp_replace(
frm_part_2,
'\(.*?\)', '', 'gi'),
'between ', '', 'gi')
)
END
)
LIKE '% m %'
THEN
(
CASE
WHEN frm_part_2 IS NOT NULL
THEN regexp_replace(
regexp_replace(
split_part(
gis.abbr_street(
regexp_replace(
regexp_replace(
frm_part_2,
'\(.*\)', '', 'gi'),
'between ', '', 'gi')
),
' m ', 1),
'a point\s{0,1}', '', 'gi'),
',', '', 'gi')::float
END
)
ELSE NULL
END )
ELSE (
CASE WHEN btwn2_orig LIKE '% m %'
OR gis.abbr_street(t) LIKE '% m %'
THEN
regexp_replace(
regexp_replace(
split_part(
gis.abbr_street(t),
' m ', 1),
'a point\s{0,1}', '', 'gi'),
',', '', 'gi')::float
ELSE NULL
END
)
END)::float;
-- to help figure out if the row is case 1
-- i.e. Watson road from St. Mark's Road to a point 100 metres north
-- we want the btwn2 to be St. Mark's Road (which is also btwn1)
-- there are also cases like: street= Arundel Avenue and btwn= Danforth Avenue and a point 44.9 metres north of Fulton Avenue
-- we need to be able to differentiate the two cases
-- the difference between the two is that one of the cases has a 'of' to describe the second road that intersects with "street"/"highway2"
btwn2_check text := CASE
WHEN t IS NULL
THEN (
CASE
WHEN frm_part_2 IS NOT NULL
THEN gis.abbr_street(
regexp_replace(
regexp_replace(
frm_part_2,
'between ', '', 'gi'),
'A point', '', 'gi')
)
END)
ELSE gis.abbr_street(t)
END;
btwn2 text := CASE
WHEN btwn2_orig LIKE '%point%'
AND (btwn2_check NOT LIKE '% of %' OR btwn2_check LIKE ('% of ' || TRIM(btwn1)))
-- for case one
-- i.e. Watson road from St. Mark's Road to a point 100 metres north
-- we want the btwn2 to be St. Mark's Road (which is also btwn1)
THEN TRIM(gis.abbr_street(btwn1))
ELSE TRIM(gis.abbr_street(
regexp_replace(btwn2_orig , 'a point', '', 'gi')))
END;
BEGIN
RAISE NOTICE 'btwn1: %, btwn2: %, btwn2_check: %, highway2: %, metres_btwn1: %, metres_btwn2: %, direction_btwn1: %, direction_btwn2: %',
btwn1, btwn2, btwn2_check, highway2, metres_btwn1, metres_btwn2, direction_btwn1, direction_btwn2;
RETURN ROW(_bylaw_id, highway2, btwn1, direction_btwn1, metres_btwn1, btwn2, direction_btwn2, metres_btwn2,
btwn2_orig, btwn2_check)::gis.cleaned_bylaws_text;
END;
$$;
--For testing purposes only
DO $$
DECLARE
return_test gis.cleaned_bylaws_text; --the table
BEGIN
return_test := gis._clean_bylaws_text(123::int, 'Chesham Drive'::text, 'The west end of Chesham Drive and Heathrow Drive'::text, NULL::text); --the function
RAISE NOTICE 'Testing 123';
END;
$$ LANGUAGE 'plpgsql';