Skip to content

Commit

Permalink
Merge pull request #423 from okolekar/384-optimize-distance-computation
Browse files Browse the repository at this point in the history
384 optimize distance to anomaly
  • Loading branch information
nmaarnio authored Oct 23, 2024
2 parents 051f19b + f29356d commit c66cd68
Show file tree
Hide file tree
Showing 3 changed files with 436 additions and 175 deletions.
164 changes: 143 additions & 21 deletions eis_toolkit/raster_processing/distance_to_anomaly.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
from itertools import chain
from numbers import Number
from pathlib import Path
from tempfile import TemporaryDirectory

import geopandas as gpd
import numpy as np
import rasterio
from beartype import beartype
from beartype.typing import Literal, Optional, Tuple, Union
from osgeo import gdal
from rasterio import profiles

from eis_toolkit.exceptions import EmptyDataException, InvalidParameterValueException
from eis_toolkit.utilities.checks.raster import check_raster_profile
from eis_toolkit.utilities.miscellaneous import row_points, toggle_gdal_exceptions
from eis_toolkit.vector_processing.distance_computation import distance_computation

# Enabling gdal exceptions globally
toggle_gdal_exceptions()


def _check_threshold_criteria_and_value(threshold_criteria, threshold_criteria_value):
if threshold_criteria in {"lower", "higher"} and not isinstance(threshold_criteria_value, Number):
Expand Down Expand Up @@ -82,7 +85,7 @@ def distance_to_anomaly(
return out_image, anomaly_raster_profile


@beartype
'''@beartype
def distance_to_anomaly_gdal(
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
Expand Down Expand Up @@ -129,7 +132,7 @@ def distance_to_anomaly_gdal(
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
verbose=verbose,
)
)'''


def _fits_criteria(
Expand All @@ -156,6 +159,32 @@ def _fits_criteria(
return np.where(mask, False, criteria_dict[threshold_criteria](anomaly_raster_data))


def _validate_threshold_criteria(
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: Union[Tuple[Number, Number], Number],
threshold_criteria: Literal["lower", "higher", "in_between", "outside"],
) -> np.ndarray:
data_fits_criteria = _fits_criteria(
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
anomaly_raster_data=anomaly_raster_data,
nodata_value=anomaly_raster_profile.get("nodata"),
)
if np.sum(data_fits_criteria) == 0:
raise EmptyDataException(
" ".join(
[
"Expected the passed threshold criteria to match at least some data.",
f"Check that the values of threshold_criteria ({threshold_criteria})",
f"and threshold_criteria_value {threshold_criteria_value}",
"match at least part of the data.",
]
)
)
return data_fits_criteria


def _write_binary_anomaly_raster(tmp_dir: Path, anomaly_raster_profile, data_fits_criteria: np.ndarray):
anomaly_raster_binary_path = tmp_dir / "anomaly_raster_binary.tif"

Expand All @@ -166,7 +195,7 @@ def _write_binary_anomaly_raster(tmp_dir: Path, anomaly_raster_profile, data_fit
return anomaly_raster_binary_path


def _distance_to_anomaly_gdal(
"""def _distance_to_anomaly_gdal(
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: Union[Tuple[Number, Number], Number],
Expand Down Expand Up @@ -196,7 +225,7 @@ def _distance_to_anomaly_gdal(
quiet=not verbose,
)
return output_path
return output_path"""


def _distance_to_anomaly(
Expand All @@ -206,23 +235,12 @@ def _distance_to_anomaly(
threshold_criteria: Literal["lower", "higher", "in_between", "outside"],
max_distance: Optional[Number],
) -> np.ndarray:
data_fits_criteria = _fits_criteria(
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
anomaly_raster_data=anomaly_raster_data,
nodata_value=anomaly_raster_profile.get("nodata"),
data_fits_criteria = _validate_threshold_criteria(
anomaly_raster_profile,
anomaly_raster_data,
threshold_criteria_value,
threshold_criteria,
)
if np.sum(data_fits_criteria) == 0:
raise EmptyDataException(
" ".join(
[
"Expected the passed threshold criteria to match at least some data.",
f"Check that the values of threshold_criteria ({threshold_criteria})",
f"and threshold_criteria_value {threshold_criteria_value}",
"match at least part of the data.",
]
)
)

cols = np.arange(anomaly_raster_data.shape[1])
rows = np.arange(anomaly_raster_data.shape[0])
Expand All @@ -239,3 +257,107 @@ def _distance_to_anomaly(
)

return distance_array


def _distance_to_anomaly_gdal_compute_proximity(
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: Union[Tuple[Number, Number], Number],
threshold_criteria: Literal["lower", "higher", "in_between", "outside"],
) -> Tuple[np.ndarray, profiles.Profile]:

data_fits_criteria = _validate_threshold_criteria(
anomaly_raster_profile,
anomaly_raster_data,
threshold_criteria_value,
threshold_criteria,
)
# converting True False values to binary formant.
converted_values = np.where(data_fits_criteria, 1, 0)
driver = gdal.GetDriverByName("MEM")

width = anomaly_raster_profile["width"]
height = anomaly_raster_profile["height"]
temp_raster = driver.Create("", width, height, 1, gdal.GDT_Float32)
transformation = anomaly_raster_profile["transform"]
x_geo = (transformation.c, transformation.a, transformation.b)
y_geo = (transformation.f, transformation.d, transformation.e)
temp_raster.SetGeoTransform(x_geo + y_geo)
crs = anomaly_raster_profile["crs"].to_wkt()
band = temp_raster.GetRasterBand(1)
band.WriteArray(converted_values)
nodatavalue = anomaly_raster_profile["nodata"]
band.SetNoDataValue(nodatavalue)

# Create empty proximity raster
out_raster = driver.Create("", width, height, 1, gdal.GDT_Float32)
out_raster.SetGeoTransform(x_geo + y_geo)
out_raster.SetProjection(crs)
out_band = out_raster.GetRasterBand(1)
out_band.SetNoDataValue(nodatavalue)
options = ["values=1", "distunits=GEO"]

# Compute proximity
gdal.ComputeProximity(band, out_band, options)

# Create outputs
out_array = out_band.ReadAsArray()
out_meta = anomaly_raster_profile.copy()

# Update metadata
out_meta["dtype"] = out_array.dtype.name
out_meta["count"] = 1

return out_array, out_meta


@beartype
def distance_to_anomaly_gdal_compute_proximity(
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: Union[Tuple[Number, Number], Number],
threshold_criteria: Literal["lower", "higher", "in_between", "outside"],
) -> Tuple[np.ndarray, profiles.Profile]:
"""Calculate distance from raster cell to nearest anomaly.
The criteria for what is anomalous can be defined as a single number and
criteria text of "higher" or "lower". Alternatively, the definition can be
a range where values inside (criteria text of "within") or outside are
marked as anomalous (criteria text of "outside"). If anomaly_raster_profile does
contain "nodata" key, np.nan is assumed to correspond to nodata values.
This function demonstrates superior performance compared to the distance_to_anomaly
and distance_to_anomaly_gdal functions, as it uses a low-level, C++-based API
within the GDAL library. By directly computing the proximity map from the
source dataset, it benefits from the core-level optimizations inherent to GDAL,
ensuring enhanced efficiency and speed.
Args:
anomaly_raster_profile: The raster profile in which the distances
to the nearest anomalous value are determined.
anomaly_raster_data: The raster data in which the distances
to the nearest anomalous value are determined.
threshold_criteria_value: Value(s) used to define anomalous.
If the threshold criteria requires a tuple of values,
the first value should be the minimum and the second
the maximum value.
threshold_criteria: Method to define anomalous.
Returns:
A 2D numpy array with the distances to anomalies computed
and the original anomaly raster profile.
"""
check_raster_profile(raster_profile=anomaly_raster_profile)
_check_threshold_criteria_and_value(
threshold_criteria=threshold_criteria, threshold_criteria_value=threshold_criteria_value
)

out_array, out_meta = _distance_to_anomaly_gdal_compute_proximity(
anomaly_raster_profile=anomaly_raster_profile,
anomaly_raster_data=anomaly_raster_data,
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
)

return out_array, out_meta
107 changes: 51 additions & 56 deletions notebooks/distance_to_anomaly.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit c66cd68

Please sign in to comment.