refactor: move init stats to own module

dynobo · Jun 24, 2024 · f4e5484 · f4e5484
1 parent e1b4c01
commit f4e5484
Show file tree

Hide file tree

Showing 8 changed files with 100 additions and 91 deletions.
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
@@ -23,12 +23,6 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-      - name: Install matplotlib default font
-        run: |
-          wget -O dejavu.zip http://sourceforge.net/projects/dejavu/files/dejavu/2.37/dejavu-fonts-ttf-2.37.zip
-          unzip -d dejavu/ dejavu.zip
-          mv dejavu /usr/share/fonts/
-          fc-cache -fv
       - name: Install dependencies
         run: pip install '.[dev]'
       - name: Format

diff --git a/CHANGELOG b/CHANGELOG
@@ -1,8 +1,10 @@
 # Changelog
 
-## 0.4.0 (upcoming)
+## 0.4.0 (2024-06-24)
 
-- Breaking change: Drop support for Python < 3.9.
-- Improve performance
+- Breaking changes:
+  - Drop support for Python < 3.9.
+  - Rename `lmdiag.info()` to `lmdiag.help()`
 - Fix crash for `linearmodels` with 2+ degrees of freedom.
 - Add support for scikit-learn's `LinearRegression`
+- Improve performance
diff --git a/benchmark.py b/benchmark.py
@@ -5,12 +5,12 @@
 import statsmodels.api as sma
 
 import lmdiag
-import lmdiag.statistics
+import lmdiag.statistics.select
 
 df = sma.datasets.get_rdataset("ames", "openintro").data
 lm = sm.formula.api.ols("np.log10(price) ~ Q('Overall.Qual') + np.log(area)", df).fit()
 
-lm_stats = lmdiag.statistics.init_stats(lm)
+lm_stats = lmdiag.statistics.select.get_stats(lm)
 
 
 if __name__ == "__main__":
@@ -22,7 +22,7 @@
         "lm_stats.standard_residuals",
         "lm_stats.cooks_d",
         "lm_stats.leverage",
-        "lm_stats.params_count",
+        "lm_stats.parameter_count",
         "lm_stats.sqrt_abs_residuals",
         "lm_stats.normalized_quantiles",
         "lmdiag.plot(lm)",

diff --git a/lmdiag/plots.py b/lmdiag/plots.py
@@ -9,8 +9,8 @@
 from statsmodels.nonparametric.smoothers_lowess import lowess
 
 from lmdiag import style
-from lmdiag.statistics import init_stats
 from lmdiag.statistics.base import StatsBase
+from lmdiag.statistics.select import get_stats
 
 LOWESS_DELTA = 0.005
 LOWESS_IT = 2
@@ -54,7 +54,7 @@ def resid_fit(
     Returns:
         Figure of the plot.
     """
-    lm_stats = lm if isinstance(lm, StatsBase) else init_stats(lm, x=x, y=y)
+    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
 
     fitted = lm_stats.fitted_values
     residuals = lm_stats.residuals
@@ -92,7 +92,7 @@ def q_q(
     Returns:
         Figure of the plot.
     """
-    lm_stats = lm if isinstance(lm, StatsBase) else init_stats(lm, x=x, y=y)
+    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
 
     std_resid = lm_stats.standard_residuals
     quantiles = lm_stats.normalized_quantiles
@@ -149,7 +149,7 @@ def scale_loc(
     Returns:
         Figure of the plot.
     """
-    lm_stats = lm if isinstance(lm, StatsBase) else init_stats(lm, x=x, y=y)
+    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
 
     fitted_vals = lm_stats.fitted_values
     sqrt_abs_res = lm_stats.sqrt_abs_residuals
@@ -199,7 +199,7 @@ def resid_lev(
     Returns:
         Figure of the plot.
     """
-    lm_stats = lm if isinstance(lm, StatsBase) else init_stats(lm, x=x, y=y)
+    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
 
     std_resid = lm_stats.standard_residuals
     cooks_d = lm_stats.cooks_d
@@ -265,7 +265,7 @@ def plot(
     Returns:
         Figure of the plot.
     """
-    lm_stats = init_stats(lm=lm, x=x, y=y)
+    lm_stats = get_stats(lm=lm, x=x, y=y)
 
     fig, axs = plt.subplots(2, 2, **style.subplots)
 

diff --git a/lmdiag/statistics/__init__.py b/lmdiag/statistics/__init__.py
@@ -1,72 +0,0 @@
-import warnings
-from typing import Any, Optional
-
-import numpy as np
-from statsmodels.genmod.generalized_linear_model import GLMResults
-from statsmodels.regression.linear_model import RegressionResultsWrapper
-from statsmodels.robust.robust_linear_model import RLMResults
-
-from lmdiag.statistics.base import StatsBase
-
-try:
-    import sklearn
-except ImportError:
-    sklearn = None
-
-try:
-    import linearmodels
-except ImportError:
-    linearmodels = None
-
-
-def _warn_x_y() -> None:
-    warnings.warn(
-        "`x` and `y` arguments are ignored for this model type. Do not pass them.",
-        stacklevel=3,
-    )
-
-
-def _init_linearmodels_stats(lm: Any) -> StatsBase:
-    from lmdiag.statistics.linearmodels_stats import LinearmodelsStats
-
-    return LinearmodelsStats(lm)
-
-
-def _init_sklearn_stats(lm: Any, x: np.ndarray, y: np.ndarray) -> StatsBase:
-    from lmdiag.statistics.sklearn_stats import SklearnStats
-
-    return SklearnStats(lm, x=x, y=y)
-
-
-def _init_statsmodels_stats(lm: Any) -> StatsBase:
-    from lmdiag.statistics.statsmodels_stats import StatsmodelsStats
-
-    return StatsmodelsStats(lm)
-
-
-def init_stats(
-    lm: Any, x: Optional[np.ndarray] = None, y: Optional[np.ndarray] = None
-) -> StatsBase:
-    """Gather statistics depending on linear model type."""
-    if isinstance(lm, (RegressionResultsWrapper, GLMResults, RLMResults)):
-        if x or y:
-            _warn_x_y()
-        model_stats = _init_statsmodels_stats(lm)
-
-    elif linearmodels and isinstance(lm, linearmodels.iv.results.OLSResults):
-        if x or y:
-            _warn_x_y()
-        model_stats = _init_linearmodels_stats(lm)
-
-    elif sklearn and isinstance(lm, sklearn.linear_model.LinearRegression):
-        if x is None or y is None:
-            raise ValueError("x and y args must be provided this model type!")
-        model_stats = _init_sklearn_stats(lm, x, y)
-
-    else:
-        raise TypeError(
-            "Model type not (yet) supported. Currently supported are linear "
-            "models from `statsmodels`, `linearmodels` and `sklearn` packages."
-        )
-
-    return model_stats

diff --git a/lmdiag/statistics/select.py b/lmdiag/statistics/select.py
@@ -0,0 +1,74 @@
+import warnings
+from typing import Any, Optional
+
+import numpy as np
+from statsmodels.genmod.generalized_linear_model import GLMResults
+from statsmodels.regression.linear_model import RegressionResultsWrapper
+from statsmodels.robust.robust_linear_model import RLMResults
+
+from lmdiag.statistics.base import StatsBase
+
+try:
+    import sklearn
+except ImportError:
+    sklearn = None
+
+try:
+    import linearmodels
+except ImportError:
+    linearmodels = None
+
+
+def _warn_x_y() -> None:
+    warnings.warn(
+        "`x` and `y` arguments are ignored for this model type. Do not pass them.",
+        stacklevel=3,
+    )
+
+
+def _init_linearmodels_stats(lm: Any) -> StatsBase:
+    from lmdiag.statistics.linearmodels_stats import LinearmodelsStats
+
+    return LinearmodelsStats(lm)
+
+
+def _init_sklearn_stats(lm: Any, x: np.ndarray, y: np.ndarray) -> StatsBase:
+    from lmdiag.statistics.sklearn_stats import SklearnStats
+
+    return SklearnStats(lm, x=x, y=y)
+
+
+def _init_statsmodels_stats(lm: Any) -> StatsBase:
+    from lmdiag.statistics.statsmodels_stats import StatsmodelsStats
+
+    return StatsmodelsStats(lm)
+
+
+def get_stats(
+    lm: Any, x: Optional[np.ndarray] = None, y: Optional[np.ndarray] = None
+) -> StatsBase:
+    """Gather statistics depending on linear model type."""
+    if isinstance(lm, (RegressionResultsWrapper, GLMResults, RLMResults)):
+        if x or y:
+            _warn_x_y()
+        model_stats = _init_statsmodels_stats(lm)
+
+    elif linearmodels and isinstance(
+        lm, (linearmodels.iv.results.OLSResults, linearmodels.iv.results.IVResults)
+    ):
+        if x or y:
+            _warn_x_y()
+        model_stats = _init_linearmodels_stats(lm)
+
+    elif sklearn and isinstance(lm, sklearn.linear_model.LinearRegression):
+        if x is None or y is None:
+            raise ValueError("x and y args must be provided this model type!")
+        model_stats = _init_sklearn_stats(lm, x, y)
+
+    else:
+        raise TypeError(
+            "Model type not (yet) supported. Currently supported are linear "
+            "models from `statsmodels`, `linearmodels` and `sklearn` packages."
+        )
+
+    return model_stats
diff --git a/lmdiag/style.py b/lmdiag/style.py
@@ -36,6 +36,17 @@ class MplKwargs(TypedDict, total=False):
 
 
 def use(style: str) -> None:
+    """Set predefined style for plots.
+
+    Available styles:
+    - 'black_and_red' (mimics style of R's lm.diag)
+
+    Args:
+        style: Name of the preset style.
+
+    Raises:
+        ValueError: If style is unknown.
+    """
     if style == "black_and_red":
         scatter.update(
             {"marker": "o", "color": "none", "edgecolors": "black", "linewidth": 1}

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "lmdiag"
-version = "0.3.8"
+version = "0.4.0"
 description = "Diagnostic Plots for Lineare Regression Models. Similar to plot.lm in R."
 keywords = [
     "lm",