From 9581b7417c505c699ce1273f98c925e1d5a4646d Mon Sep 17 00:00:00 2001
From: Niko Aarnio <niko.aarnio@protonmail.com>
Date: Tue, 17 Sep 2024 10:45:27 +0300
Subject: [PATCH] feat(exploratory-analyses): add band parameter to descriptive
 statistics raster, rename CLI parameter from input_file to input_raster

---
 eis_toolkit/cli.py                            |  6 +--
 .../descriptive_statistics.py                 | 51 ++++++++++++++-----
 .../descriptive_statistics_test.py            |  2 +-
 3 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py
index 86dafe56..369b6aca 100644
--- a/eis_toolkit/cli.py
+++ b/eis_toolkit/cli.py
@@ -786,15 +786,15 @@ def compute_pca_vector_cli(
 
 # DESCRIPTIVE STATISTICS (RASTER)
 @app.command()
-def descriptive_statistics_raster_cli(input_file: INPUT_FILE_OPTION):
+def descriptive_statistics_raster_cli(input_raster: INPUT_FILE_OPTION, band: int = 1):
     """Generate descriptive statistics from raster data."""
     from eis_toolkit.exploratory_analyses.descriptive_statistics import descriptive_statistics_raster
 
     typer.echo("Progress: 10%")
 
-    with rasterio.open(input_file) as raster:
+    with rasterio.open(input_raster) as raster:
         typer.echo("Progress: 25%")
-        results_dict = descriptive_statistics_raster(raster)
+        results_dict = descriptive_statistics_raster(raster, band)
     typer.echo("Progress: 75%")
 
     typer.echo("Progress: 100% \n")
diff --git a/eis_toolkit/exploratory_analyses/descriptive_statistics.py b/eis_toolkit/exploratory_analyses/descriptive_statistics.py
index 3bc064aa..e0da06f5 100644
--- a/eis_toolkit/exploratory_analyses/descriptive_statistics.py
+++ b/eis_toolkit/exploratory_analyses/descriptive_statistics.py
@@ -3,14 +3,14 @@
 import pandas as pd
 import rasterio
 from beartype import beartype
-from beartype.typing import Union
+from beartype.typing import Dict, Union
 from statsmodels.stats import stattools
 from statsmodels.stats.weightstats import DescrStatsW
 
-from eis_toolkit.exceptions import InvalidColumnException
+from eis_toolkit.exceptions import InvalidColumnException, InvalidRasterBandException
 
 
-def _descriptive_statistics(data: Union[rasterio.io.DatasetReader, pd.DataFrame, gpd.GeoDataFrame]) -> dict:
+def _descriptive_statistics(data: Union[rasterio.io.DatasetReader, pd.DataFrame, gpd.GeoDataFrame]) -> Dict[str, float]:
     statistics = DescrStatsW(data)
     min = np.min(data)
     max = np.max(data)
@@ -38,14 +38,25 @@ def _descriptive_statistics(data: Union[rasterio.io.DatasetReader, pd.DataFrame,
 
 
 @beartype
-def descriptive_statistics_dataframe(input_data: Union[pd.DataFrame, gpd.GeoDataFrame], column: str) -> dict:
-    """Generate descriptive statistics from vector data.
+def descriptive_statistics_dataframe(
+    input_data: Union[pd.DataFrame, gpd.GeoDataFrame], column: str
+) -> Dict[str, float]:
+    """Compute descriptive statistics from vector data.
 
-    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.
+    Computes the following statistics:
+    - min
+    - max
+    - mean
+    - quantiles 25%
+    - quantile 50% (median)
+    - quantile 75%
+    - standard deviation
+    - relative standard deviation
+    - skewness
 
     Args:
-        input_data: Data to generate descriptive statistics from.
-        column: Specify the column to generate descriptive statistics from.
+        input_data: Input vector data.
+        column: Column in vector data to compute descriptive statistics from.
 
     Returns:
         The descriptive statistics in previously described order.
@@ -58,19 +69,33 @@ def descriptive_statistics_dataframe(input_data: Union[pd.DataFrame, gpd.GeoData
 
 
 @beartype
-def descriptive_statistics_raster(input_data: rasterio.io.DatasetReader) -> dict:
-    """Generate descriptive statistics from raster data.
+def descriptive_statistics_raster(input_data: rasterio.io.DatasetReader, band: int = 1) -> Dict[str, float]:
+    """Compute descriptive statistics from raster data.
+
+    Computes the following statistics:
+    - min
+    - max
+    - mean
+    - quantiles 25%
+    - quantile 50% (median)
+    - quantile 75%
+    - standard deviation
+    - relative standard deviation
+    - skewness
 
-    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.
     Nodata values are removed from the data before the statistics are computed.
 
     Args:
-        input_data: Data to generate descriptive statistics from.
+        input_data: Input raster data.
+        band: Raster band to compute descriptive statistics from.
 
     Returns:
         The descriptive statistics in previously described order.
     """
-    data = input_data.read().flatten()
+    if band not in range(1, input_data.count + 1):
+        raise InvalidRasterBandException(f"Input raster does not contain the selected band: {band}.")
+
+    data = input_data.read(band)
     nodata_value = input_data.nodata
     data = data[data != nodata_value]
     statistics = _descriptive_statistics(data)
diff --git a/tests/exploratory_analyses/descriptive_statistics_test.py b/tests/exploratory_analyses/descriptive_statistics_test.py
index ee8f4a69..b7aa634c 100644
--- a/tests/exploratory_analyses/descriptive_statistics_test.py
+++ b/tests/exploratory_analyses/descriptive_statistics_test.py
@@ -61,7 +61,7 @@ def test_descriptive_statistics_geodataframe():
 
 def test_descriptive_statistics_raster():
     """Checks that returned statistics are correct when using numpy.ndarray."""
-    test = descriptive_statistics_raster(src_raster)
+    test = descriptive_statistics_raster(src_raster, 1)
     np.testing.assert_almost_equal(test["min"], 2.503)
     np.testing.assert_almost_equal(test["max"], 9.67)
     np.testing.assert_almost_equal(test["mean"], 5.1865644)