Merge pull request #204 from GispoCoding/112-toolkit-plugin-interface

112 toolkit plugin interface
GispoCoding · Oct 16, 2023 · d4e359c · d4e359c
2 parents 682d30a + 5268234
commit d4e359c
Show file tree

Hide file tree

Showing 13 changed files with 1,504 additions and 157 deletions.
diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py
diff --git a/eis_toolkit/exploratory_analyses/parallel_coordinates.py b/eis_toolkit/exploratory_analyses/parallel_coordinates.py
@@ -17,8 +17,8 @@
 
 
 def _normalize_data(data: np.ndarray) -> Tuple[np.ndarray, float, float]:
-    y_min = data.min(axis=0)
-    y_max = data.max(axis=0)
+    y_min = np.nanmin(data, axis=0)
+    y_max = np.nanmax(data, axis=0)
     dy = y_max - y_min
     y_min -= dy * 0.05
     y_max += dy * 0.05
@@ -108,8 +108,8 @@ def _plot_parallel_coordinates(
         # Create the colorbar for numerical data
         colorbar_mappable = ScalarMappable(cmap=cmap, norm=norm)
         colorbar_mappable.set_array([])
-        colorbar = plt.colorbar(colorbar_mappable)
-        colorbar.set_label(color_column_name, fontsize=14)
+        # colorbar = plt.colorbar(colorbar_mappable)
+        # colorbar.set_label(color_column_name, fontsize=14)
 
     # Draw lines
     for i in range(data.shape[0]):
@@ -141,6 +141,10 @@ def plot_parallel_coordinates(
 ) -> matplotlib.figure.Figure:
     """Plot a parallel coordinates plot.
 
+    Automatically removes all rows containing null/nan values. Tries to convert columns to numeric
+    to be able to plot them. If more than 8 columns are present (after numeric filtering), keeps only
+    the first 8 to plot.
+
     Args:
         df: The DataFrame to plot.
         color_column_name: The name of the column in df to use for color encoding.
@@ -165,21 +169,30 @@ def plot_parallel_coordinates(
             f"The provided color column {color_column_name} is not found in the DataFrame."
         )
 
+    df = df.convert_dtypes()
+    df = df.apply(pd.to_numeric, errors="ignore")
+
     color_data = df[color_column_name].to_numpy()
     if len(set([type(elem) for elem in color_data])) != 1:
         raise exceptions.InconsistentDataTypesException(
             "The color column should have a consistent datatype. Multiple data types detected in the color column."
         )
 
+    df = df.select_dtypes(include=np.number)
+
     # Drop non-numeric columns and the column used for coloring
     columns_to_drop = [color_column_name]
     for column in df.columns.values:
-        if not np.issubdtype(df[column].dtype, np.number):
+        if df[column].isnull().all():
             columns_to_drop.append(column)
-    filtered_df = df.loc[:, ~df.columns.isin(columns_to_drop)]
+    df = df.loc[:, ~df.columns.isin(columns_to_drop)]
+
+    # Keep only first 8 columns if more are still present
+    if len(df.columns.values) > 8:
+        df = df.iloc[:, :8]
 
-    data_labels = filtered_df.columns.values
-    data = filtered_df.to_numpy()
+    data_labels = df.columns.values
+    data = df.to_numpy()
 
     fig = _plot_parallel_coordinates(
         data=data,

diff --git a/eis_toolkit/prediction/fuzzy_overlay.py b/eis_toolkit/prediction/fuzzy_overlay.py
@@ -1,12 +1,14 @@
 import numpy as np
 from beartype import beartype
 
-from eis_toolkit.exceptions import InvalidParameterValueException
+from eis_toolkit import exceptions
 
 
-def _check_input_data(data):
+def _check_input_data(data: np.ndarray):
     if data.min() < 0 or data.max() > 1:
-        raise InvalidParameterValueException("All data must be in range [0, 1]")
+        raise exceptions.InvalidParameterValueException("All data must be in range [0, 1].")
+    if data.ndim != 3:
+        raise exceptions.InvalidParameterValueException("Input data for overlay should be 3D numpy array.")
 
 
 @beartype
@@ -103,7 +105,7 @@ def gamma_overlay(data: np.ndarray, gamma: float) -> np.ndarray:
         InvalidParameterValueException: If data values or gamma are not in range [0, 1].
     """
     if gamma < 0 or gamma > 1:
-        raise InvalidParameterValueException("The gamma parameter must be in range [0, 1]")
+        raise exceptions.InvalidParameterValueException("The gamma parameter must be in range [0, 1]")
 
     sum = sum_overlay(data=data)
     product = product_overlay(data=data)

diff --git a/eis_toolkit/transformations/clipping.py → eis_toolkit/transformations/clip.py b/eis_toolkit/transformations/clipping.py → eis_toolkit/transformations/clip.py
@@ -22,7 +22,7 @@
 
 
 @beartype
-def _clipping(  # type: ignore[no-any-unimported]
+def _clip_transform(  # type: ignore[no-any-unimported]
     in_array: np.ndarray,
     limits: Tuple[Optional[Number], Optional[Number]],
 ) -> np.ndarray:
@@ -40,14 +40,14 @@ def _clipping(  # type: ignore[no-any-unimported]
 
 
 @beartype
-def clipping(  # type: ignore[no-any-unimported]
+def clip_transform(  # type: ignore[no-any-unimported]
     raster: rasterio.io.DatasetReader,
     limits: Sequence[Tuple[Optional[Number], Optional[Number]]],
     bands: Optional[Sequence[int]] = None,
     nodata: Optional[Number] = None,
 ) -> Tuple[np.ndarray, dict, dict]:
     """
-    Clipping data based on specified upper and lower limits.
+    Clips data based on specified upper and lower limits.
 
     Takes one nodata value that will be ignored in calculations.
     Replaces values below the lower limit and above the upper limit with provided values, respecively.
@@ -101,7 +101,7 @@ def clipping(  # type: ignore[no-any-unimported]
         band_array = cast_array_to_float(band_array, cast_int=True)
         band_array = nodata_to_nan(band_array, nodata_value=nodata)
 
-        band_array = _clipping(band_array, limits=limits[i])
+        band_array = _clip_transform(band_array, limits=limits[i])
 
         band_array = nan_to_nodata(band_array, nodata_value=nodata)
         band_array = cast_array_to_int(band_array, scalar=nodata, initial_dtype=inital_dtype)

diff --git a/environment.yml b/environment.yml
@@ -15,11 +15,11 @@ dependencies:
   - statsmodels >=0.13.5,<1.0.0
   - keras >=2.10.0,<3.0.0
   - tensorflow >=2.10.0,<3.0.0
-  - plotly >=5.14.0,<6.0.0
   - beartype >=0.13.1,<2.0.0
   - seaborn >=0.12.2
   - pykrige >=1.7.0
   - rtree >= 1.0.1
+  - typer >=0.9.0
   - imbalanced-learn >= 0.11.0
   # Dependencies for testing
   - pytest >=7.2.1