-
Notifications
You must be signed in to change notification settings - Fork 1
/
dea_spatialtools.py
468 lines (377 loc) · 19.4 KB
/
dea_spatialtools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
## dea_spatialtools.py
'''
Description: This file contains a set of python functions for conducting spatial analyses on Digital Earth Australia data.
License: The code in this notebook is licensed under the Apache License, Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0). Digital Earth Australia data is licensed under the Creative Commons by Attribution 4.0 license (https://creativecommons.org/licenses/by/4.0/).
Contact: If you need assistance, please post a question on the Open Data Cube Slack channel (http://slack.opendatacube.org/) or on the GIS Stack Exchange (https://gis.stackexchange.com/questions/ask?tags=open-data-cube) using the `open-data-cube` tag (you can view previously asked questions here: https://gis.stackexchange.com/questions/tagged/open-data-cube).
If you would like to report an issue with this script, you can file one on Github (https://github.com/GeoscienceAustralia/dea-notebooks/issues/new).
Functions included:
contour_extract
interpolate_2d
contours_to_array
Last modified: October 2019
'''
# Import required packages
import fiona
import affine
import collections
import numpy as np
import xarray as xr
import geopandas as gpd
import scipy.interpolate
from scipy import ndimage as nd
from skimage.measure import find_contours
from shapely.geometry import MultiLineString, mapping
def contour_extract(ds_array,
z_values,
ds_crs,
ds_affine,
output_shp,
min_vertices=2,
attribute_data=None,
attribute_dtypes=None,
dim='time',
verbose=True):
"""
Uses `skimage.measure.find_contours` to extract multiple z-value
contour lines from a two-dimensional array (e.g. multiple elevations
from a single DEM), or one z-value for each array along a specified
dimension of a multi-dimensional array (e.g. to map waterlines
across time by extracting a 0 NDVI contour from each individual
timestep in an xarray timeseries).
Contours are exported to file as a shapefile and returned as a
geopandas geodataframe with one row per z-value or one row per
array along a specified dimension. The `attribute_data` and
`attribute_dtypes` parameters can be used to pass custom attributes
to the output contour features.
Last modified: October 2019
Parameters
----------
ds_array : xarray DataArray
A two-dimensional or multi-dimensional array from which
contours are extracted. If a two-dimensional array is provided,
the analysis will run in 'single array, multiple z-values' mode
which allows you to specify multiple `z_values` to be extracted.
If a multi-dimensional array is provided, the analysis will run
in 'single z-value, multiple arrays' mode allowing you to
extract contours for each array along the dimension specified
by the `dim` parameter.
z_values : int, float or list of ints, floats
An individual z-value or list of multiple z-values to extract
from the array. If operating in 'single z-value, multiple
arrays' mode specify only a single z-value.
ds_crs : string or CRS object
Either a EPSG string giving the coordinate system of the array
(e.g. 'EPSG:3577'), or a crs object (e.g. from an xarray
dataset: `xarray_ds.geobox.crs`).
ds_affine : affine.Affine object or GDAL geotransform
Either an affine object from a rasterio or xarray object
(e.g. `xarray_ds.geobox.affine`), or a gdal-derived
geotransform object (e.g. `gdal_ds.GetGeoTransform()`) which
will be converted to an affine.
output_shp : string
The path and filename for the output shapefile.
min_vertices : int, optional
The minimum number of vertices required for a contour to be
extracted. The default (and minimum) value is 2, which is the
smallest number required to produce a contour line (i.e. a start
and end point). Higher values remove smaller contours,
potentially removing noise from the output dataset.
attribute_data : dict of lists, optional
An optional dictionary of lists used to define custom
attributes/fields to add to the shapefile. Dict keys give the
name of the shapefile field, while dict values must be lists of
the same length as `z_values` (for 'single array, multiple
z-values' mode) or the number of arrays along the dimension
specified by the `dim` parameter (for 'single z-value, multiple
arrays' mode). For example, if `z_values=[0, 10, 20]`, then
`attribute_data={'type: [1, 2, 3]}` can be used to create a
shapefile field called 'type' with a value for each contour in
the shapefile. The default is None, which produces a default
shapefile field called 'z_value' with values taken directly from
the `z_values` parameter and formatted as a 'float:9.2' ('single
array, multiple z-values' mode), or a field named after `dim`
numbered from 0 to the total number of arrays along the `dim`
dimension ('single z-value, multiple arrays' mode).
attribute_dtypes : dict, optional
An optional dictionary giving the output dtype for each custom
shapefile attribute field specified by `attribute_data`. For
example, `attribute_dtypes={'type: 'int'}` can be used to set
the 'type' field to an integer dtype. The dictionary should have
the same keys/field names as declared in `attribute_data`. Valid
values include 'int', 'str', 'datetime, and 'float:X.Y', where X
is the minimum number of characters before the decimal place,
and Y is the number of characters after the decimal place.
dim : string, optional
The name of the dimension along which to extract contours when
operating in 'single z-value, multiple arrays' mode. The default
is 'time', which extracts contours for each array along the time
dimension.
verbose: bool, optional
Whether to print the result of each contour extraction to the
console. The default is True which prints all results; set to
False for a cleaner output, particularly when extracting large
numbers of contours.
Returns
-------
output_gdf : geopandas geodataframe
A geopandas geodataframe object with one feature per z-value
('single array, multiple z-values' mode), or one row per array
along the dimension specified by the `dim` parameter ('single
z-value, multiple arrays' mode). If `attribute_data` and
`attribute_dtypes` are provided, these values will be included
in the shapefile's attribute table.
"""
# Obtain affine object from either rasterio/xarray affine or a
# gdal geotransform:
if type(ds_affine) != affine.Affine:
ds_affine = affine.Affine.from_gdal(*ds_affine)
# If z_values is supplied is not a list, convert to list:
z_values = z_values if isinstance(z_values, list) else [z_values]
# If array has only one layer along the `dim` dimension (e.g. time),
# remove the dim:
try:
ds_array = ds_array.squeeze(dim=dim)
print(f"Dimension '{dim}' has length of 1; removing from array")
except:
pass
########################################
# Single array, multiple z-values mode #
########################################
# Output dict to hold contours for each offset
contours_dict = collections.OrderedDict()
# If array has only two dimensions, run in single array,
# multiple z-values mode:
if len(ds_array.shape) == 2:
if verbose: print(f'Operating in single array, multiple z-values mode')
# If no custom attributes given, default to including a single
# z-value field based on `z_values`
if not attribute_data:
# Default field uses two decimal points by default
attribute_data = {'z_value': z_values}
attribute_dtypes = {'z_value': 'float:9.2'}
# If custom attributes are provided, test that they are equal
# in length to the number of `z-values`:
else:
for key, values in attribute_data.items():
if len(values) != len(z_values):
raise Exception(
f"Supplied attribute '{key}' has length of {len(values)} while z_values has "
f"length of {len(z_values)}; please supply the same number of attribute values "
"as z_values")
for z_value in z_values:
# Extract contours and convert output array cell coords
# into arrays of coordinate reference system coords.
# We need to add (0.5 x the pixel size) to the x and y
# values to correct coordinates to give the centre
# point of pixels, rather than the top-left corner
if verbose: print(f' Extracting contour {z_value}')
ps_x = ds_affine[0] # Compute pixel x size
ps_y = ds_affine[4] # Compute pixel y size
contours_geo = [
np.column_stack(ds_affine * (i[:, 1], i[:, 0])) +
np.array([0.5 * ps_x, 0.5 * ps_y])
for i in find_contours(ds_array, z_value)
]
# For each array of coordinates, drop xy points that have NA
contours_nona = [i[~np.isnan(i).any(axis=1)] for i in contours_geo]
# Drop 0 length and add list of contour arrays to dict
contours_withdata = [i for i in contours_nona
if len(i) >= min_vertices]
# If there is data for the contour, add to dict:
if len(contours_withdata) > 0:
contours_dict[z_value] = contours_withdata
else:
if verbose:
print(f' No data for contour {z_value}; skipping')
contours_dict[z_value] = None
########################################
# Single z-value, multiple arrays mode #
########################################
# For inputs with more than two dimensions, run in single z-value,
# multiple arrays mode:
else:
# Test if only a single z-value is given when operating in
# single z-value, multiple arrays mode
print(f'Operating in single z-value, multiple arrays mode')
if len(z_values) > 1:
raise Exception('Please provide a single z-value when operating '
'in single z-value, multiple arrays mode')
# If no custom attributes given, default to including one field
# based on the `dim` dimension:
if not attribute_data:
# Default field is numbered from 0 to the number of arrays
# along the `dim` dimension:
attribute_data = {dim: range(0, len(ds_array[dim]))}
attribute_dtypes = {dim: 'int'}
# If custom attributes are provided, test that they are equal
# in length to the number of arrays along `dim`:
else:
for key, values in attribute_data.items():
if len(values) != len(ds_array[dim]):
raise Exception(
f"Supplied attribute '{key}' has length of {len(values)} while there are "
f"{len(ds_array[dim])} arrays along the '{dim}' dimension. Please supply "
f"the same number of attribute values as arrays along the '{dim}' dimension"
)
for z_value, _ in enumerate(ds_array[dim]):
# Extract contours and convert output array cell coords into
# arrays of coordinate reference system coords. We need to
# add (0.5 x the pixel size) to the x and y values to
# correct coordinates to give the centre point of pixels,
# rather than the top-left corner
if verbose: print(f' Extracting contour {z_value}')
ps_x = ds_affine[0] # Compute pixel x size
ps_y = ds_affine[4] # Compute pixel y size
contours_geo = [
np.column_stack(ds_affine * (i[:, 1], i[:, 0])) +
np.array([0.5 * ps_x, 0.5 * ps_y]) for i in find_contours(
ds_array.isel({dim: z_value}), z_values[0])
]
# For each array of coordinates, drop any xy points that have NA
contours_nona = [i[~np.isnan(i).any(axis=1)] for i in contours_geo]
# Drop 0 length and add list of contour arrays to dict
contours_withdata = [
i for i in contours_nona if len(i) >= min_vertices
]
# If there is data for the contour, add to dict:
if len(contours_withdata) > 0:
contours_dict[z_value] = contours_withdata
else:
if verbose:
print(f' No data for contour {z_value}; skipping')
contours_dict[z_value] = None
#######################
# Export to shapefile #
#######################
# If a shapefile path is given, generate shapefile
if output_shp:
if verbose: print(f'Exporting contour shapefile to {output_shp}')
# Set up output multiline shapefile properties
schema = {'geometry': 'MultiLineString',
'properties': attribute_dtypes}
# Create output shapefile for writing
with fiona.open(output_shp,
'w',
crs={
'init': str(ds_crs),
'no_defs': True
},
driver='ESRI Shapefile',
schema=schema) as output:
# Write each shapefile to the dataset one by one
for i, (z_value, contours) in enumerate(contours_dict.items()):
if contours:
# Create multi-string object from all contour coordinates
contour_multilinestring = MultiLineString(contours)
# Get attribute values for writing
attribute_vals = {field_name: field_vals[i]
for field_name, field_vals
in attribute_data.items()}
# Write output shapefile to file with z-value field
output.write({
'properties': attribute_vals,
'geometry': mapping(contour_multilinestring)
})
# Return dict of contour arrays
output_gdf = gpd.read_file(output_shp)
return output_gdf
def interpolate_2d(ds, x_coords, y_coords, z_coords,
method='linear', fill_nearest=False, sigma=None):
"""
This function takes points with X, Y and Z coordinates, and
interpolates Z-values across the extent of an existing xarray
dataset. This can be useful for producing smooth surfaces from point
data that can be compared directly against satellite data derived
from an OpenDataCube query.
Last modified: October 2019
Parameters
----------
ds_array : xarray DataArray or Dataset
A two-dimensional or multi-dimensional array from which x and y
dimensions will be copied and used for the area in which to
interpolate point data.
x_coords, y_coords : numpy array
Arrays containing X and Y coordinates for all points (e.g.
longitudes and latitudes).
z_coords : numpy array
An array containing Z coordinates for all points (e.g.
elevations). These are the values you wish to interpolate
between.
method : string, optional
The method used to interpolate between point values. This string
is passed to `scipy.interpolate.griddata`; the default is
'linear' and options include 'linear', 'nearest' and 'cubic'.
fill_nearest : boolean, optional
A boolean value indicating whether to fill NaN areas outside of
the extent of the input X and Y coordinates with the value of
the nearest pixel. By default, `scipy.interpolate.griddata` only
returns interpolated values for the convex hull of the of the
input points, so this variable can be used to provide results
for all pixels instead. Warning: this can produce significant
artefacts for areas located far from the nearest point.
sigma : None or int, optional
An optional integer value can be provided to smooth the
interpolated surface using a guassian filter. Higher values of
sigma result in a smoother surface that may loose some of the
detail in the original interpolated layer.
Returns
-------
interp_2d_array : xarray DataArray
An xarray DataArray containing with x and y coordinates copied
from `ds_array`, and Z-values interpolated from the points data.
"""
# Extract xy and elev points
points_xy = np.vstack([x_coords, y_coords]).T
# Create grid to interpolate into
grid_y, grid_x = np.meshgrid(ds.x, ds.y)
# Interpolate x, y and z values using linear/TIN interpolation
out = scipy.interpolate.griddata(points=points_xy,
values=z_coords,
xi=(grid_y, grid_x),
method=method)
# Calculate nearest
if fill_nearest:
nearest_inds = nd.distance_transform_edt(input=np.isnan(out),
return_distances=False,
return_indices=True)
out = out[tuple(nearest_inds)]
# Apply guassian filter
if sigma:
out = nd.filters.gaussian_filter(out, sigma=sigma)
# Create xarray dataarray from the data
interp_2d_array = xr.DataArray(out,
coords=[ds.y, ds.x],
dims=['y', 'x'])
return interp_2d_array
def contours_to_arrays(gdf, col):
"""
This function converts a polyline shapefile into an array with three
columns giving the X, Y and Z coordinates of each vertex. This data
can then be used as an input to interpolation procedures (e.g. using
a function like `interpolate_2d`.
Last modified: October 2019
Parameters
----------
gdf : Geopandas GeoDataFrame
A GeoPandas GeoDataFrame of lines to convert into point
coordinates.
col : str
A string giving the name of the GeoDataFrame field to use as
Z-values.
Returns
-------
A numpy array with three columns giving the X, Y and Z coordinates
of each vertex in the input GeoDataFrame.
"""
coords_zvals = []
for i in range(0, len(gdf)):
val = gdf.iloc[i][col]
try:
coords = np.concatenate([np.vstack(x.coords.xy).T
for x in gdf.iloc[i].geometry])
except:
coords = np.vstack(gdf.iloc[i].geometry.coords.xy).T
coords_zvals.append(np.column_stack((coords,
np.full(np.shape(coords)[0],
fill_value=val))))
return np.concatenate(coords_zvals)