-
Notifications
You must be signed in to change notification settings - Fork 22
/
raster.py
762 lines (659 loc) · 26.9 KB
/
raster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
#!/usr/bin/env python
# coding: utf-8
# This code conducts raster generation for DEA Coastlines:
# * Load stack of all available Landsat 5, 7, 8 and 9 satellite imagery
# for a location using ODC Virtual Products
# * Convert each satellite image into a remote sensing water index
# (MNDWI)
# * For each satellite image, model ocean tides into a tide modelling
# grid based on exact time of image acquisition
# * Interpolate tide heights into spatial extent of image stack
# * Mask out high and low tide pixels by removing all observations
# acquired outside of 50 percent of the observed tidal range
# centered over mean sea level
# * Combine tidally-masked data into annual median composites from
# representing the most representative position of the shoreline
# at approximately mean sea level tide height each year (0 metres
# Above Mean Sea Level).
import os
import sys
import warnings
from functools import partial
from collections import Counter
import pytz
import dask
import click
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
from affine import Affine
from shapely.geometry import shape
import datacube
import odc.algo
import odc.geo.xr
from datacube.utils.aws import configure_s3_access
from datacube.utils.cog import write_cog
from datacube.utils.geometry import CRS, GeoBox, Geometry
from datacube.utils.masking import make_mask
from datacube.virtual import catalog_from_file
from dea_tools.dask import create_local_dask_cluster
from dea_tools.spatial import hillshade, sun_angles
from dea_tools.coastal import model_tides, pixel_tides
from dea_tools.datahandling import parallel_apply
from coastlines.utils import configure_logging, load_config
# Hide warnings
warnings.simplefilter(action="ignore", category=FutureWarning)
def terrain_shadow(ds, dem, threshold=0.5, radius=1):
"""
Calculates a custom terrain shadow mask that can be used
to remove shadow-affected satellite pixels.
This is achieved by by first calculating hillshade using
'sun_elevation' and 'sun_azimuth' metadata, thresholding this
hillshade to identify shadows, then cleaning and dilating
to return clean areas of terrain shadow.
Parameters:
-----------
ds : xarray.Dataset
An `xarray.Dataset` containing data variables named
'sun_elevation' and 'sun_azimuth'.
dem : numpy.array
A 2D Digital Elevation Model array.
threshold : float, optional
An illumination value below which hillshaded cells should
be considered terrain shadow. Defaults to 0.5.
radius : int, optional
The disk radius to use when applying morphological opening
(to clean up small noisy shadows) and dilation (to
mask out shadow edges. Defaults to a 1.
Returns:
--------
xarray.DataArray
An `xarray.DataArray` containing boolean True values where
a pixel contains terrain shadow, and False if not.
"""
from skimage.morphology import binary_dilation, binary_opening, disk
hs = hillshade(dem, ds.sun_elevation, ds.sun_azimuth)
hs = hs < threshold
hs = binary_opening(hs, disk(radius))
hs = binary_dilation(hs, disk(radius))
return xr.DataArray(hs, dims=["y", "x"])
def terrain_shadow_masking(dc, query, ds, dem_product="dem_cop_30"):
"""
Use a Digital Elevation Model to calculate and apply a terrain
shadow mask to set all satellite pixels to `NaN` if they are
affected by terrain shadow. This helps to remove noisy shorelines
along coastal cliffs and steep coastal terrain.
Parameters:
-----------
dc : datacube.Datacube object
Datacube instance used to load DEM data and sun angle metadata.
query : dict
A dictionary containing query parameters used to identify
satellite observations and load metadata.
ds : xarray.Dataset
An `xarray.Dataset` containing a time series of water index
data (e.g. MNDWI) that will be terrain shadow masked.
dem_product : string, optional
A string giving the name of the DEM product to use for terrain
masking. This must match the name of a product in the datacube.
The DEM should contain a variable named 'elevation'.
Defaults to "dem_cop_30".
Returns:
--------
xarray.Dataset
An `xarray.Dataset` containing a time series of water index
data (e.g. MNDWI) with terrain shadowed pixels set to `NaN`.
"""
# Compute solar angles for all satellite image timesteps
query_subset = {k: v for k, v in query.items() if k not in ["dask_chunks"]}
query_subset.update(
product=["ls5_sr", "ls7_sr", "ls8_sr", "ls9_sr"],
collection_category="T1",
group_by="solar_day",
)
sun_angles_ds = sun_angles(dc, query_subset)
# Load DEM into satellite data geobox
dem_ds = dc.load(product=dem_product, like=ds.geobox, resampling="cubic").squeeze(
"time", drop=True
)
dem_ds = dem_ds.where(dem_ds.elevation >= 0)
# Identify terrain shadow across all timesteps
terrain_shadow_ds = parallel_apply(
sun_angles_ds,
dim="time",
func=partial(terrain_shadow, dem=dem_ds.elevation.values),
)
# Remove terrain shadow pixels from satellite data
return ds.where(~terrain_shadow_ds)
def load_water_index(
dc, query, yaml_path, product_name="ls_nbart_ndwi", mask_terrain_shadow=True
):
"""
This function uses virtual products to load Landsat 5, 7, 8 and 9 data,
calculate a custom remote sensing index, and return the data as a
single xarray.Dataset.
To minimise resampling effects and maintain the highest data
fidelity required for subpixel shoreline extraction, this workflow
applies masking and index calculation at native resolution, and
only re-projects to the most common CRS for the query using cubic
resampling in the final step.
Parameters:
-----------
dc : datacube.Datacube object
Datacube instance used to load data.
query : dict
A dictionary containing query parameters passed to the
datacube virtual product (e.g. same as provided to `dc.load`).
yaml_path : string
Path to YAML file containing virtual product recipe.
product_name : string, optional
Name of the virtual product to load from the YAML recipe.
mask_terrain_shadow : bool, optional
Whether to use hillshading to mask out pixels potentially
affected by terrain shadow. This can significantly improve
shoreline mapping in areas of coastal cliffs or steep coastal
topography. Defaults to True.
Returns:
--------
ds : xarray.Dataset
An `xarray.Dataset` containing a time series of water index
data (e.g. MNDWI) for the provided datacube query
"""
# Load in virtual product catalogue and select MNDWI product
catalog = catalog_from_file(yaml_path)
product = catalog[product_name]
# Identify most common CRS
bag = product.query(dc, **query)
crs_list = [str(i.crs) for i in bag.contained_datasets()]
crs_counts = Counter(crs_list)
crs = crs_counts.most_common(1)[0][0]
# Pass CRS to product load
settings = dict(
output_crs=crs,
resolution=(-30, 30),
align=(15, 15),
skip_broken_datasets=True,
resampling={
"oa_nbart_contiguity": "nearest",
"oa_fmask": "nearest",
"*": "cubic",
},
)
box = product.group(bag, **settings, **query)
ds = product.fetch(box, **settings, **query)
# Rechunk if smallest chunk is less than 10
if ((len(ds.x) % 2048) <= 10) or ((len(ds.y) % 2048) <= 10):
ds = ds.chunk({"x": 3000, "y": 3000})
# Extract boolean mask
mask = odc.algo.enum_to_bool(
ds.cloud_mask, categories=["nodata", "cloud", "shadow", "snow"]
)
# Close mask to remove small holes in cloud, open mask to
# remove narrow false positive cloud, then dilate
mask_cleaned = odc.algo.mask_cleanup(
mask=mask, mask_filters=[("closing", 2), ("opening", 10), ("dilation", 5)]
)
# Add new mask to nodata pixels
ds = odc.algo.erase_bad(ds, mask_cleaned, nodata=np.nan)
# Apply terrain mask to remove deep shadows that can be
# be mistaken for water
if mask_terrain_shadow:
ds = terrain_shadow_masking(dc, query, ds, dem_product="dem_cop_30")
return ds[["mndwi"]]
def tide_cutoffs(ds, tides_lowres, tide_centre=0.0, resampling="bilinear"):
"""
Based on the entire time-series of tide heights, compute the max
and min satellite-observed tide height for each pixel, then
calculate tide cutoffs used to restrict our data to satellite
observations centred over mid-tide (0 m Above Mean Sea Level).
These tide cutoffs are spatially interpolated into the extent of
the input satellite imagery so they can be used to mask out low
and high tide satellite pixels.
Parameters:
-----------
ds : xarray.Dataset
An `xarray.Dataset` containing a time series of water index
data (e.g. MNDWI) for the provided datacube query. This is
used to define the spatial extents into which tide height
cutoffs will be interpolated.
tides_lowres : xarray.Dataset
A low-res `xarray.Dataset` containing tide heights for each
timestep in `ds`, as produced by the `pixel_tides` function.
tide_centre : float, optional
The central tide height used to compute the min and max
tide height cutoffs. Tide heights will be masked so all
satellite observations are approximately centred over this
value. The default is 0.0 which represents 0 m Above Mean
Sea Level.
resampling : string, optional
The resampling method used when reprojecting low resolution
tides to higher resolution. Defaults to "bilinear".
Returns:
--------
tide_cutoff_min, tide_cutoff_max : xarray.DataArray
2D arrays containing tide height cutoff values interpolated
into the extent of `ds`.
"""
# Calculate min and max tides
tide_min = tides_lowres.min(dim="time")
tide_max = tides_lowres.max(dim="time")
# Identify cutoffs
tide_cutoff_buffer = (tide_max - tide_min) * 0.25
tide_cutoff_min = tide_centre - tide_cutoff_buffer
tide_cutoff_max = tide_centre + tide_cutoff_buffer
# Reproject into original geobox
tide_cutoff_min = tide_cutoff_min.odc.reproject(
ds.odc.geobox, resampling=resampling
)
tide_cutoff_max = tide_cutoff_max.odc.reproject(
ds.odc.geobox, resampling=resampling
)
return tide_cutoff_min, tide_cutoff_max
def load_tidal_subset(year_ds, tide_cutoff_min, tide_cutoff_max):
"""
For a given year of data, thresholds data to keep observations
within a minimum and maximum tide height cutoff range, and load
the data into memory.
Parameters:
-----------
year_ds : xarray.Dataset
An `xarray.Dataset` for a single epoch (typically annually)
containing a time series of water index data (e.g. MNDWI) and
tide heights (`tide_m`) for each pixel.
tide_cutoff_min, tide_cutoff_max : numeric or xarray.DataArray
Numeric values or 2D data arrays containing minimum and
maximum tide height values used to select a subset of
satellite observations for each individual pixel that fall
within this range. All pixels with tide heights outside of
this range will be set to `NaN`.
Returns:
--------
year_ds : xarray.Dataset
An in-memory `xarray.Dataset` with pixels set to `NaN` if
they were acquired outside of the supplied tide height range.
"""
# Determine what pixels were acquired in selected tide range, and
# drop time-steps without any relevant pixels to reduce data to load
tide_bool = (year_ds.tide_m >= tide_cutoff_min) & (
year_ds.tide_m <= tide_cutoff_max
)
year_ds = year_ds.sel(time=tide_bool.sum(dim=["x", "y"]) > 0)
# Apply mask, and load in corresponding tide masked data
year_ds = year_ds.where(tide_bool)
return year_ds.compute()
def tidal_composite(
year_ds, label, label_dim, output_dir, output_suffix="", export_geotiff=False
):
"""
For a given year of data, takes median, counts and standard
deviation of valid water index results, and optionally writes
each water index, tide height, standard deviation and valid pixel
counts for the time period to file as GeoTIFFs.
Parameters:
-----------
year_ds : xarray.Dataset
A tide-masked `xarray.Dataset` containing a time series of
water index data (e.g. MNDWI) for a single epoch
(typically annually).
label : int, float or str
An int, float or string used to name the output variables and
files, For example, a year label for the data in `year_ds`.
label_dim : str
A string giving the name for the dimension that will be created
to contain labels supplied to `label`.
output_dir : str
The directory to output files for the specific analysis.
output_suffix : str
An optional suffix that will be used to identify certain
outputs. For example, outputs used for gapfilling data
can be differentiated from other outputs by supplying
`output_suffix='_gapfill'`. Defaults to '', which will not
add a suffix to the output file names.
output_geotiff : bool, optional
Whether to export output files as GeoTIFFs. Defaults to False.
Returns:
--------
median_ds : xarray.Dataset
An in-memory `xarray.Dataset` containing output composite
arrays with a dimension labelled by `label` and `label_dim`.
"""
# Compute median water indices and counts of valid pixels
median_ds = year_ds.median(dim="time", keep_attrs=True)
median_ds["stdev"] = year_ds.mndwi.std(dim="time", keep_attrs=True)
median_ds["count"] = year_ds.mndwi.count(dim="time", keep_attrs=True).astype(
"int16"
)
# Set nodata values, using np.nan for floats and -999 for ints
for var_name, var in median_ds.data_vars.items():
median_ds[var_name].attrs["nodata"] = -999 if var.dtype == "int16" else np.nan
# Load data into memory
median_ds.load()
# Write each variable to file
if export_geotiff:
for i in median_ds:
write_cog(
geo_im=median_ds[i],
fname=f"{output_dir}/{str(label)}_{i}{output_suffix}.tif",
overwrite=True,
)
# Set coordinate and dim
median_ds = median_ds.assign_coords(**{label_dim: label}).expand_dims(label_dim)
return median_ds
def export_annual_gapfill(
ds, output_dir, tide_cutoff_min, tide_cutoff_max, start_year, end_year
):
"""
To calculate both annual median composites and three-year gapfill
composites without having to load more than three years in memory
at the one time, this function loops through the years in the
dataset, progressively updating three datasets (the previous year,
current year and subsequent year of data).
Parameters:
-----------
ds : xarray.Dataset
A tide-masked `xarray.Dataset` containing a time series of
water index data (e.g. MNDWI).
output_dir : str
The directory to output files for the specific analysis.
tide_cutoff_min, tide_cutoff_max : numeric or xarray.DataArray
Numeric values or 2D data arrays containing minimum and
maximum tide height values used to select a subset of
satellite observations for each individual pixel that fall
within this range. All pixels with tide heights outside of
this range will be set to `NaN`.
start_year, end year : int
The first and last years you wish to export annual median
composites and three-year gapfill composites for.
"""
# Create empty vars containing un-composited data from the previous,
# current and future year. This is progressively updated to ensure that
# no more than 3 years of data are loaded into memory at any one time
previous_ds = None
current_ds = None
future_ds = None
# Iterate through each year in the dataset, starting at one year before
for year in np.arange(start_year - 2, end_year + 1):
try:
# Load data for the subsequent year; drop tide variable as
# we do not need to create annual composites from this data
future_ds = load_tidal_subset(
ds.sel(time=str(year + 1)),
tide_cutoff_min=tide_cutoff_min,
tide_cutoff_max=tide_cutoff_max,
).drop_vars("tide_m")
except KeyError:
# Create empty array if error is raised due to no data being
# available for time period
future_ds = xr.DataArray(
data=np.empty(shape=(0, len(ds.y), len(ds.x))),
dims=["time", "y", "x"],
coords={"x": ds.x, "y": ds.y, "time": []},
name="mndwi",
).to_dataset()
# Ensure that time uses the correct datetime dtype so we
# can combine this empty array with our data data
future_ds["time"] = future_ds.time.astype(np.dtype("datetime64[ns]"))
# If the current year var contains data, combine these observations
# into annual median composites and export GeoTIFFs
if current_ds:
# Generate composite
tidal_composite(
current_ds,
label=year,
label_dim="year",
output_dir=output_dir,
export_geotiff=True,
)
# If ALL of the previous, current and future year vars contain data,
# combine these three years of observations into a single median
# 3-year gapfill composite
if previous_ds and current_ds and future_ds:
# Concatenate the three years into one xarray.Dataset
gapfill_ds = xr.concat([previous_ds, current_ds, future_ds], dim="time")
# Generate composite
tidal_composite(
gapfill_ds,
label=year,
label_dim="year",
output_dir=output_dir,
output_suffix="_gapfill",
export_geotiff=True,
)
# Shift all loaded data back so that we can re-use it in the next
# iteration and not have to load the same data multiple times
previous_ds = current_ds
current_ds = future_ds
future_ds = []
def generate_rasters(
dc,
config,
study_area,
raster_version,
start_year,
end_year,
tide_centre,
buffer,
log=None,
):
#####################################
# Connect to datacube, Dask cluster #
#####################################
if log is None:
log = configure_logging()
# Create local dask client for parallelisation
client = create_local_dask_cluster(return_client=True)
###########################
# Load supplementary data #
###########################
# Grid cells used to process the analysis
gridcell_gdf = (
gpd.read_file(config["Input files"]["grid_path"])
.to_crs(epsg=4326)
.set_index("id")
)
gridcell_gdf.index = gridcell_gdf.index.astype(int).astype(str)
gridcell_gdf = gridcell_gdf.loc[[str(study_area)]]
log.info(f"Study area {study_area}: Loaded study area grid")
################
# Loading data #
################
# Create query; start year and end year are buffered by one year
# on either side to facilitate gapfilling low data observations
geopoly = Geometry(gridcell_gdf.iloc[0].geometry, crs=gridcell_gdf.crs)
query = {
"geopolygon": geopoly.buffer(buffer),
"time": (str(start_year - 1), str(end_year + 1)),
"dask_chunks": {"time": 1, "x": 2048, "y": 2048},
"dataset_maturity": "final",
}
# Load virtual product
try:
ds = load_water_index(
dc,
query,
yaml_path=config["Virtual product"]["virtual_product_path"],
product_name=config["Virtual product"]["virtual_product_name"],
mask_terrain_shadow=False,
)
except (ValueError, IndexError):
raise ValueError(f"Study area {study_area}: No valid data found")
log.info(f"Study area {study_area}: Loaded virtual product")
###################
# Tidal modelling #
###################
# For each satellite timestep, model tide heights into a low-resolution
# 5 x 5 km grid (matching resolution of the FES2014 tidal model), then
# reproject modelled tides into the spatial extent of our satellite image.
# Add this new data as a new variable in our satellite dataset to allow
# each satellite pixel to be analysed and filtered/masked based on the
# tide height at the exact moment of satellite image acquisition.
try:
ds["tide_m"], tides_lowres = pixel_tides(ds, resample=True)
log.info(f"Study area {study_area}: Finished modelling tide heights")
except FileNotFoundError:
log.exception(f"Study area {study_area}: Unable to access tide modelling files")
sys.exit(2)
# Based on the entire time-series of tide heights, compute the max
# and min satellite-observed tide height for each pixel, then
# calculate tide cutoffs used to restrict our data to satellite
# observations centred over mid-tide (0 m Above Mean Sea Level).
tide_cutoff_min, tide_cutoff_max = tide_cutoffs(
ds, tides_lowres, tide_centre=tide_centre
)
log.info(
f"Study area {study_area}: Calculating low and high tide cutoffs for each pixel"
)
##############################
# Generate yearly composites #
##############################
# If output folder doesn't exist, create it
output_dir = (
f"data/interim/raster/{raster_version}/" f"{study_area}_{raster_version}"
)
os.makedirs(output_dir, exist_ok=True)
# Iterate through each year and export annual and 3-year
# gapfill composites
log.info(f"Study area {study_area}: Started exporting raster data")
export_annual_gapfill(
ds, output_dir, tide_cutoff_min, tide_cutoff_max, start_year, end_year
)
log.info(f"Study area {study_area}: Completed exporting raster data")
# Close dask client
client.close()
@click.command()
@click.option(
"--config_path",
type=str,
required=True,
help="Path to the YAML config file defining inputs to "
"use for this analysis. These are typically located in "
"the `dea-coastlines/configs/` directory.",
)
@click.option(
"--study_area",
type=str,
required=True,
help="A string providing a unique ID of an analysis "
"gridcell that will be used to run the analysis. This "
'should match a row in the "id" column of the provided '
"analysis gridcell vector file.",
)
@click.option(
"--raster_version",
type=str,
required=True,
help="A unique string proving a name that will be used "
"for output raster directories and files. This can be "
"used to version different analysis outputs.",
)
@click.option(
"--start_year",
type=int,
default=2000,
help="The first annual shoreline you wish to be included "
"in the final outputs. To allow low data pixels to be "
"gapfilled with additional satellite data from neighbouring "
"years, the full timeseries of satellite data loaded in this "
"step will include one additional year of preceding satellite data "
"(i.e. if `--start_year 2000`, satellite data from 1999 onward "
"will be loaded for gapfilling purposes). Because of this, we "
"recommend that at least one year of satellite data exists in "
"your datacube prior to `--start_year`.",
)
@click.option(
"--end_year",
type=int,
default=2020,
help="The final annual shoreline you wish to be included "
"in the final outputs. To allow low data pixels to be "
"gapfilled with additional satellite data from neighbouring "
"years, the full timeseries of satellite data loaded in this "
"step will include one additional year of ensuing satellite data "
"(i.e. if `--end_year 2020`, satellite data up to and including "
"2021 will be loaded for gapfilling purposes). Because of this, we "
"recommend that at least one year of satellite data exists in your "
"datacube after `--end_year`.",
)
@click.option(
"--tide_centre",
type=float,
default=0.0,
help="The central tide height used to compute the min and max tide "
"height cutoffs. Tide heights will be masked so all satellite "
"observations are approximately centred over this value. The "
"default is 0.0 which represents 0 m Above Mean Sea Level.",
)
@click.option(
"--buffer",
type=float,
default=0.05,
help="The distance (in degrees) to buffer the study area grid cell "
"extent. This buffer is important for ensuring that generated "
"rasters overlap along the boundaries of neighbouring study areas "
"so that we can extract seamless vector shorelines. Defaults to "
"0.05 degrees, or roughly 5 km at the equator.",
)
@click.option(
"--aws_unsigned/--no-aws_unsigned",
type=bool,
default=True,
help="Whether to use sign AWS requests for S3 access",
)
@click.option(
"--overwrite/--no-overwrite",
type=bool,
default=True,
help="Whether to overwrite tiles with existing outputs, "
"or skip these tiles entirely.",
)
def generate_rasters_cli(
config_path,
study_area,
raster_version,
start_year,
end_year,
tide_centre,
buffer,
aws_unsigned,
overwrite,
):
log = configure_logging(f"Coastlines raster generation for study area {study_area}")
# Test if study area has already been run by checking if run status file exists
run_status_file = f"data/interim/raster/{raster_version}/{study_area}_{raster_version}/run_completed"
output_exists = os.path.exists(run_status_file)
# Skip if outputs exist but overwrite is False
if output_exists and not overwrite:
log.info(
f"Study area {study_area}: Data exists but overwrite set to False; skipping."
)
sys.exit(0)
# Connect to datacube
dc = datacube.Datacube(app="Coastlines")
# Load analysis params from config file
config = load_config(config_path=config_path)
# Do an opinionated configuration of S3
configure_s3_access(cloud_defaults=True, aws_unsigned=aws_unsigned)
try:
generate_rasters(
dc,
config,
study_area,
raster_version,
start_year,
end_year,
tide_centre,
buffer,
log=log,
)
# Create blank run status file to indicate run completion
with open(
run_status_file,
mode="w",
):
pass
except Exception as e:
log.exception(f"Study area {study_area}: Failed to run process with error {e}")
sys.exit(1)
if __name__ == "__main__":
generate_rasters_cli()