-
Notifications
You must be signed in to change notification settings - Fork 22
/
continental.py
423 lines (362 loc) · 14 KB
/
continental.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
#!/usr/bin/env python
# coding: utf-8
# This code combines individual datasets into continental DEA
# Coastlines layers:
#
# * Combines output shorelines and rates of change statistics point
# vectors into single continental datasets
# * Aggregates this data to produce moving window hotspot datasets
# that summarise coastal change at regional and continental scale.
# * Writes outputs to GeoPackage and zipped shapefiles
import os
import sys
import fiona
import click
import numpy as np
import pandas as pd
import geohash as gh
import geopandas as gpd
from pathlib import Path
from coastlines.utils import configure_logging, STYLES_FILE
from coastlines.vector import points_on_line, change_regress, vector_schema
def wms_fields(gdf):
"""
Calculates several addition fields required
for the WMS/TerriaJS Coastlines visualisation.
Parameters:
-----------
gdf : geopandas.GeoDataFrame
The input rate of change points vector dataset.
Returns:
--------
A `pandas.DataFrame` containing additional fields
with a "wms_*" prefix.
"""
return pd.DataFrame(
dict(
wms_abs=gdf.rate_time.abs(),
wms_conf=gdf.se_time * 1.96,
wms_grew=gdf.rate_time < 0,
wms_retr=gdf.rate_time > 0,
wms_sig=gdf.sig_time <= 0.01,
wms_good=gdf.certainty == "good",
)
)
@click.command()
@click.option(
"--vector_version",
type=str,
required=True,
help="A unique string proving a name that was used "
"for output vector directories and files. This is used "
"to identify the tiled annual shoreline and rates of "
"change layers that will be combined into continental-"
"scale layers.",
)
@click.option(
"--continental_version",
type=str,
help="A unique string proving a name that will be used "
"for output continental-scale layers. This allows "
"multiple versions of continental-scale layers to be "
"generated from the same input vector data, e.g. for "
"testing different hotspot of coastal change summary "
"layers. If not provided, this will default to the "
'string provided to "--vector_version".',
)
@click.option(
"--shorelines",
type=bool,
default=True,
help="A boolean indicating whether to combine tiled "
"annual shorelines layers into a single continental-"
"scale annual shorelines layer.",
)
@click.option(
"--ratesofchange",
type=bool,
default=True,
help="A boolean indicating whether to combine tiled "
"rates of change statistics layers into a single "
"continental-scale rates of change statistics layer.",
)
@click.option(
"--hotspots",
type=bool,
default=True,
help="A boolean indicating whether to generate "
"continental-scale coastal change hotspot summary layers "
"(this can be very slow depending on the radii requested).",
)
@click.option(
"--hotspots_radius",
default=[10000, 5000, 1000],
multiple=True,
help="The distance (in metres) used to generate coastal "
"change hotspots summary layers. This controls the spacing "
"of each summary point, and the radius used to aggregate "
"rates of change statistics around each point. "
"The default generates three hotspot layers with radii "
"10000 m, 5000 m and 1000 m. To specify multiple custom "
"radii, repeat this argument, e.g. "
"`--hotspots_radius 1000 --hotspots_radius 5000`.",
)
@click.option(
"--baseline_year",
type=int,
default=2021,
help="The annual shoreline used to generate the hotspot "
"summary points. This is typically the most recent "
"annual shoreline in the dataset.",
)
@click.option(
"--shapefiles",
type=bool,
default=True,
help="A boolean indicating whether to export outputs as a zipped "
"ESRI Shapefile dataset (this can be slow depending on the size "
"of the analysis).",
)
@click.option(
"--include-styles/--no-include-styles",
is_flag=True,
default=True,
help="Set this to indicate whether to include styles "
"in the output OGC GeoPackage file.",
)
def continental_cli(
vector_version,
continental_version,
shorelines,
ratesofchange,
hotspots,
hotspots_radius,
baseline_year,
shapefiles,
include_styles,
):
#########
# Setup #
#########
log = configure_logging("Continental layers and hotspots generation")
# If no continental version is provided, copy this from vector
# version
if continental_version is None:
continental_version = vector_version
output_dir = Path(f"data/processed/{continental_version}")
output_dir.mkdir(exist_ok=True, parents=True)
log.info(f"Writing data to {output_dir}")
######################
# Merge tile vectors #
######################
# Setup input and output file paths
shoreline_paths = (
f"data/interim/vector/{vector_version}/*/" f"annualshorelines*.shp"
)
ratesofchange_paths = (
f"data/interim/vector/{vector_version}/*/" f"ratesofchange*.shp"
)
# Output path for geopackage
OUTPUT_GPKG = output_dir / f"coastlines_{continental_version}.gpkg"
# Combine annual shorelines into a single continental layer
if shorelines:
os.system(
f"ogrmerge.py -o "
f"{OUTPUT_GPKG} {shoreline_paths} "
f"-single -overwrite_ds -t_srs epsg:3577 "
f"-nln shorelines_annual"
)
log.info("Merging annual shorelines complete")
else:
log.info("Not writing shorelines")
# Combine rates of change stats points into single continental layer
if ratesofchange:
os.system(
f"ogrmerge.py "
f"-o {OUTPUT_GPKG} {ratesofchange_paths} "
f"-single -update -t_srs epsg:3577 "
f"-nln rates_of_change"
)
log.info("Merging rates of change points complete")
else:
log.info("Not writing annual rates of change points")
# If shapefiles are requested, set up file paths
if shapefiles:
# Output path for zipped shapefiles
OUTPUT_SHPS = output_dir / f"coastlines_{continental_version}.shp.zip"
# If shapefile zip exists, delete it first
if OUTPUT_SHPS.exists():
OUTPUT_SHPS.unlink()
else:
log.info("Not exporting zipped ESRI Shapefile outputs")
############################
# Load continental vectors #
############################
# Load merged continental data into memory if either hotspot
# generation or zipped shapefile exports are required
if hotspots or shapefiles:
# Load continental shoreline and rates of change data
try:
# Load continental rates of change data
ratesofchange_gdf = gpd.read_file(
OUTPUT_GPKG, layer="rates_of_change"
).set_index("uid")
# Load continental shorelines data
shorelines_gdf = gpd.read_file(
OUTPUT_GPKG, layer="shorelines_annual"
).set_index("year")
shorelines_gdf = shorelines_gdf.loc[shorelines_gdf.geometry.is_valid]
log.info(
"Loading continental shoreline and rates of change data into memory"
)
except (fiona.errors.DriverError, ValueError):
raise FileNotFoundError(
"Continental-scale annual shoreline and rates of "
"change layers are required for hotspot generation or "
"shapefile export. Try re-running this analysis with "
"the following settings: `--shorelines True --ratesofchange True`."
)
#####################
# Generate hotspots #
#####################
# Generate hotspot points that provide regional/continental summary
# of hotspots of coastal erosion and growth
if hotspots:
log.info("Generating coastal change hotspots")
######################
# Calculate hotspots #
######################
for i, radius in enumerate(hotspots_radius):
# Extract hotspot points
log.info(f"Calculating {radius} m hotspots")
hotspots_gdf = points_on_line(
shorelines_gdf,
index=baseline_year,
distance=int(radius / 2),
)
# Create polygon windows by buffering points
buffered_gdf = hotspots_gdf[["geometry"]].copy()
buffered_gdf["geometry"] = buffered_gdf.buffer(radius)
# Spatial join rate of change points to each polygon
hotspot_grouped = (
ratesofchange_gdf.loc[
ratesofchange_gdf.certainty == "good",
ratesofchange_gdf.columns.str.contains("dist_|geometry"),
]
.sjoin(buffered_gdf, predicate="within")
.groupby("index_right")
)
# Aggregate/summarise values by taking median of all points
# within each buffered polygon
hotspot_values = hotspot_grouped.median(numeric_only=True).round(2)
# Extract year from distance columns (remove "dist_")
x_years = hotspot_values.columns.str.replace("dist_", "").astype(int)
# Compute coastal change rates by linearly regressing annual
# movements vs. time
rate_out = hotspot_values.apply(
lambda row: change_regress(
y_vals=row.values.astype(float), x_vals=x_years, x_labels=x_years
),
axis=1,
)
# Add rates of change back into dataframe
hotspot_values[
["rate_time", "incpt_time", "sig_time", "se_time", "outl_time"]
] = rate_out
# Join aggregated values back to hotspot points after
# dropping unused columns (regression intercept)
hotspots_gdf = hotspots_gdf.join(hotspot_values.drop("incpt_time", axis=1))
# Add hotspots radius attribute column
hotspots_gdf["radius_m"] = radius
# Initialise certainty column with good values
hotspots_gdf["certainty"] = "good"
# Identify any points with insufficient observations and flag these as
# uncertain. We can obtain a sensible threshold by dividing the
# hotspots radius by 30 m along-shore rates of change point distance)
hotspots_gdf["n"] = hotspot_grouped.size()
hotspots_gdf["n"] = hotspots_gdf["n"].fillna(0)
hotspots_gdf.loc[
hotspots_gdf.n < (radius / 30), "certainty"
] = "insufficient points"
# Generate a geohash UID for each point and set as index
uids = (
hotspots_gdf.geometry.to_crs("EPSG:4326")
.apply(lambda x: gh.encode(x.y, x.x, precision=11))
.rename("uid")
)
hotspots_gdf = hotspots_gdf.set_index(uids)
# Export hotspots to file, incrementing name for each layer
try:
# Export to geopackage
layer_name = f"hotspots_zoom_{range(0, 10)[i + 1]}"
hotspots_gdf.to_file(
OUTPUT_GPKG,
layer=layer_name,
schema={
"properties": vector_schema(hotspots_gdf),
"geometry": "Point",
},
)
if shapefiles:
# Add additional WMS fields and add to shapefile
hotspots_gdf = pd.concat(
[hotspots_gdf, wms_fields(gdf=hotspots_gdf)], axis=1
)
hotspots_gdf.to_file(
OUTPUT_SHPS,
layer=f"coastlines_{continental_version}_{layer_name}",
schema={
"properties": vector_schema(hotspots_gdf),
"geometry": "Point",
},
)
except ValueError as e:
log.exception(f"Failed to generate hotspots with error: {e}")
sys.exit(1)
log.info("Writing coastal change hotspots complete")
else:
log.info("Not generating coastal change hotspots")
############################
# Export zipped shapefiles #
############################
if shapefiles:
log.info("Started writing outputs as zipped ESRI Shapefiles")
if ratesofchange:
# Add rates of change points to shapefile zip
# Add additional WMS fields and add to shapefile
ratesofchange_gdf = pd.concat(
[ratesofchange_gdf, wms_fields(gdf=ratesofchange_gdf)], axis=1
)
ratesofchange_gdf.to_file(
OUTPUT_SHPS,
layer=f"coastlines_{continental_version}_rates_of_change",
schema={
"properties": vector_schema(ratesofchange_gdf),
"geometry": "Point",
},
)
log.info(
"Completed writing rates of change points to zipped ESRI Shapefiles"
)
if shorelines:
# Add annual shorelines to shapefile zip
shorelines_gdf.to_file(
OUTPUT_SHPS,
layer=f"coastlines_{continental_version}_shorelines_annual",
schema={
"properties": vector_schema(shorelines_gdf),
"geometry": ["MultiLineString", "LineString"],
},
)
log.info("Completed writing annual shorelines to zipped ESRI Shapefiles")
#########################
# Add GeoPackage styles #
#########################
if include_styles:
styles = gpd.read_file(STYLES_FILE)
styles.to_file(OUTPUT_GPKG, layer="layer_styles")
log.info("Writing styles to OGC GeoPackage file complete")
else:
log.info("Not writing styles to OGC GeoPackage")
if __name__ == "__main__":
continental_cli()