Merge pull request #157 from shakedzy/153-add-type-hints-to-functions

#153 add type hints to functions #156 ImportError: cannot import name 'interp' from 'scipy'
shakedzy · Jan 27, 2024 · 9e65935 · 9e65935
2 parents d4593b0 + 2d1655f
commit 9e65935
Show file tree

Hide file tree

Showing 12 changed files with 602 additions and 334 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,8 +4,8 @@ syntax: glob
 env/*
 venv/*
 ENV/*
-.idea/*
-.vscode/*
+.idea
+.vscode
 .DS_Store
 dython.egg*/*
 *__pycache__*

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Change Log
 
+# 0.7.5 _(dev)_
+* Adding type hints to all functions (issue [#153](https://github.com/shakedzy/dython/issues/153))
+* Dropping dependency in `scikit-plot` as it is no longer maintained (issue [#156](https://github.com/shakedzy/dython/issues/156))
+
 ## 0.7.4
 * Handling running plotting functions with `plot=False` in Jupyter and truly avoid plotting (issue [#147](https://github.com/shakedzy/dython/issues/147))
 

diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@ If you wish to install from source:
 pip install git+https://github.com/shakedzy/dython.git
 ```
 
-**Dependencies:** `numpy`, `pandas`, `seaborn`, `scipy`, `matplotlib`, `sklearn`, `scikit-plot`
+**Dependencies:** `numpy`, `pandas`, `seaborn`, `scipy`, `matplotlib`, `sklearn`
 
 ## Contributing:
 Contributions are always welcomed - if you found something you can fix, or have an idea for a new feature, feel free to write it and open a pull request. Please make sure to go over the [contributions guidelines](https://github.com/shakedzy/dython/blob/master/CONTRIBUTING.md).

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.7.4
+0.7.5.dev
diff --git a/dython/_private.py b/dython/_private.py
@@ -2,26 +2,34 @@
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
+from numpy.typing import NDArray
+from typing import Optional, Any, Tuple, Union, List, Literal
+from .typing import Number, OneDimArray
 
-IS_JUPYTER = None
 
+IS_JUPYTER: bool = False
 
-def set_is_jupyter(force_to=None):
+
+def set_is_jupyter(force_to: Optional[bool] = None) -> None:
     global IS_JUPYTER
     if force_to is not None:
         IS_JUPYTER = force_to
     else:
         IS_JUPYTER = "ipykernel_launcher.py" in sys.argv[0]
 
 
-def plot_or_not(plot):
+def plot_or_not(plot: bool) -> None:
     if plot:
         plt.show()
     elif not plot and IS_JUPYTER:
         plt.close()
 
 
-def convert(data, to, copy=True):
+def convert(
+    data: Union[List[Number], NDArray, pd.DataFrame],
+    to: Literal["array", "list", "dataframe"],
+    copy: bool = True,
+) -> Union[List[Number], NDArray, pd.DataFrame]:
     converted = None
     if to == "array":
         if isinstance(data, np.ndarray):
@@ -31,7 +39,7 @@ def convert(data, to, copy=True):
         elif isinstance(data, list):
             converted = np.array(data)
         elif isinstance(data, pd.DataFrame):
-            converted = data.values()
+            converted = data.values()  # type: ignore
     elif to == "list":
         if isinstance(data, list):
             converted = data.copy() if copy else data
@@ -53,10 +61,12 @@ def convert(data, to, copy=True):
             )
         )
     else:
-        return converted
+        return converted  # type: ignore
 
 
-def remove_incomplete_samples(x, y):
+def remove_incomplete_samples(
+    x: Union[List[Any], OneDimArray], y: Union[List[Any], OneDimArray]
+) -> Tuple[Union[List[Any], OneDimArray], Union[List[Any], OneDimArray]]:
     x = [v if v is not None else np.nan for v in x]
     y = [v if v is not None else np.nan for v in y]
     arr = np.array([x, y]).transpose()
@@ -67,7 +77,11 @@ def remove_incomplete_samples(x, y):
         return arr[0], arr[1]
 
 
-def replace_nan_with_value(x, y, value):
+def replace_nan_with_value(
+    x: Union[List[Any], OneDimArray],
+    y: Union[List[Any], OneDimArray],
+    value: Any,
+) -> Tuple[NDArray, NDArray]:
     x = np.array(
         [v if v == v and v is not None else value for v in x]
     )  # NaN != NaN

diff --git a/dython/data_utils.py b/dython/data_utils.py
@@ -1,6 +1,9 @@
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
+from typing import Optional, Tuple, List, Any, Union
+from numpy.typing import NDArray
+from .typing import Number, TwoDimArray
 from ._private import convert, plot_or_not
 
 
@@ -12,7 +15,10 @@
 ]
 
 
-def one_hot_encode(arr, classes=None):
+def one_hot_encode(
+    array: Union[List[Union[Number, str]], NDArray],
+    classes: Optional[int] = None,
+) -> NDArray:
     """
     One-hot encode a 1D array.
     Based on this StackOverflow answer: https://stackoverflow.com/a/29831596/5863503
@@ -35,31 +41,31 @@ def one_hot_encode(arr, classes=None):
            [1., 0., 0., 0., 0., 0.],
            [0., 0., 0., 0., 0., 1.]])
     """
-    arr = convert(arr, "array").astype(int)
+    arr: NDArray = convert(array, "array").astype(int)  # type: ignore
     if not len(arr.shape) == 1:
         raise ValueError(
             f"array must have only one dimension, but has shape: {arr.shape}"
         )
     if arr.min() < 0:
         raise ValueError("array cannot contain negative values")
     classes = classes if classes is not None else arr.max() + 1
-    h = np.zeros((arr.size, classes))
+    h = np.zeros((arr.size, classes))  # type: ignore
     h[np.arange(arr.size), arr] = 1
     return h
 
 
 def split_hist(
-    dataset,
-    values,
-    split_by,
-    title="",
-    xlabel="",
-    ylabel=None,
-    figsize=None,
-    legend="best",
-    plot=True,
+    dataset: pd.DataFrame,
+    values: str,
+    split_by: str,
+    title: Optional[str] = "",
+    xlabel: Optional[str] = "",
+    ylabel: Optional[str] = None,
+    figsize: Optional[Tuple[int, int]] = None,
+    legend: Optional[str] = "best",
+    plot: bool = True,
     **hist_kwargs,
-):
+) -> plt.Axes:
     """
     Plot a histogram of values from a given dataset, split by the values of a chosen column
 
@@ -88,7 +94,7 @@ def split_hist(
 
     Returns:
     --------
-    A Matplotlib `Axe`
+    A Matplotlib `Axes`
 
     Example:
     --------
@@ -111,13 +117,16 @@ def split_hist(
         if title == "":
             title = values + " by " + split_by
         plt.title(title)
-    plt.ylabel(ylabel)
+    if ylabel:
+        plt.ylabel(ylabel)
     ax = plt.gca()
     plot_or_not(plot)
     return ax
 
 
-def identify_columns_by_type(dataset, include):
+def identify_columns_by_type(
+    dataset: TwoDimArray, include: List[str]
+) -> List[Any]:
     """
     Given a dataset, identify columns of the types requested.
 
@@ -138,12 +147,12 @@ def identify_columns_by_type(dataset, include):
     ['col2', 'col3']
 
     """
-    dataset = convert(dataset, "dataframe")
-    columns = list(dataset.select_dtypes(include=include).columns)
+    df: pd.DataFrame = convert(dataset, "dataframe")  # type: ignore
+    columns = list(df.select_dtypes(include=include).columns)
     return columns
 
 
-def identify_columns_with_na(dataset):
+def identify_columns_with_na(dataset: TwoDimArray) -> pd.DataFrame:
     """
     Return columns names having NA values, sorted in descending order by their number of NAs
 
@@ -164,10 +173,10 @@ def identify_columns_with_na(dataset):
     1   col2         2
     0   col1         1
     """
-    dataset = convert(dataset, "dataframe")
-    na_count = [sum(dataset[cc].isnull()) for cc in dataset.columns]
+    df: pd.DataFrame = convert(dataset, "dataframe")  # type: ignore
+    na_count = [sum(df[cc].isnull()) for cc in df.columns]
     return (
-        pd.DataFrame({"column": dataset.columns, "na_count": na_count})
+        pd.DataFrame({"column": df.columns, "na_count": na_count})
         .query("na_count > 0")
         .sort_values("na_count", ascending=False)
     )
diff --git a/dython/examples.py b/dython/examples.py
@@ -42,7 +42,9 @@ def roc_graph_example():
     y_score = classifier.fit(X_train, y_train).predict_proba(X_test)
 
     # Plot ROC graphs
-    return metric_graph(y_test, y_score, "roc", class_names=iris.target_names)
+    return metric_graph(
+        y_test, y_score, "roc", class_names_list=iris.target_names
+    )
 
 
 def pr_graph_example():
@@ -73,7 +75,9 @@ def pr_graph_example():
     y_score = classifier.fit(X_train, y_train).predict_proba(X_test)
 
     # Plot PR graphs
-    return metric_graph(y_test, y_score, "pr", class_names=iris.target_names)
+    return metric_graph(
+        y_test, y_score, "pr", class_names_list=iris.target_names
+    )
 
 
 def associations_iris_example():