diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index a4c6d20..bf6f59c 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -23,12 +23,6 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install matplotlib default font - run: | - wget -O dejavu.zip http://sourceforge.net/projects/dejavu/files/dejavu/2.37/dejavu-fonts-ttf-2.37.zip - unzip -d dejavu/ dejavu.zip - mv dejavu /usr/share/fonts/ - fc-cache -fv - name: Install dependencies run: pip install '.[dev]' - name: Format diff --git a/CHANGELOG b/CHANGELOG index 45cb291..627d2ee 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,8 +1,10 @@ # Changelog -## 0.4.0 (upcoming) +## 0.4.0 (2024-06-24) -- Breaking change: Drop support for Python < 3.9. -- Improve performance +- Breaking changes: + - Drop support for Python < 3.9. + - Rename `lmdiag.info()` to `lmdiag.help()` - Fix crash for `linearmodels` with 2+ degrees of freedom. - Add support for scikit-learn's `LinearRegression` +- Improve performance diff --git a/benchmark.py b/benchmark.py index e87fbaf..f0259e0 100644 --- a/benchmark.py +++ b/benchmark.py @@ -5,12 +5,12 @@ import statsmodels.api as sma import lmdiag -import lmdiag.statistics +import lmdiag.statistics.select df = sma.datasets.get_rdataset("ames", "openintro").data lm = sm.formula.api.ols("np.log10(price) ~ Q('Overall.Qual') + np.log(area)", df).fit() -lm_stats = lmdiag.statistics.init_stats(lm) +lm_stats = lmdiag.statistics.select.get_stats(lm) if __name__ == "__main__": @@ -22,7 +22,7 @@ "lm_stats.standard_residuals", "lm_stats.cooks_d", "lm_stats.leverage", - "lm_stats.params_count", + "lm_stats.parameter_count", "lm_stats.sqrt_abs_residuals", "lm_stats.normalized_quantiles", "lmdiag.plot(lm)", diff --git a/lmdiag/plots.py b/lmdiag/plots.py index 6942b6f..5a6c099 100644 --- a/lmdiag/plots.py +++ b/lmdiag/plots.py @@ -9,8 +9,8 @@ from statsmodels.nonparametric.smoothers_lowess import lowess from lmdiag import style -from lmdiag.statistics import init_stats from lmdiag.statistics.base import StatsBase +from lmdiag.statistics.select import get_stats LOWESS_DELTA = 0.005 LOWESS_IT = 2 @@ -54,7 +54,7 @@ def resid_fit( Returns: Figure of the plot. """ - lm_stats = lm if isinstance(lm, StatsBase) else init_stats(lm, x=x, y=y) + lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y) fitted = lm_stats.fitted_values residuals = lm_stats.residuals @@ -92,7 +92,7 @@ def q_q( Returns: Figure of the plot. """ - lm_stats = lm if isinstance(lm, StatsBase) else init_stats(lm, x=x, y=y) + lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y) std_resid = lm_stats.standard_residuals quantiles = lm_stats.normalized_quantiles @@ -149,7 +149,7 @@ def scale_loc( Returns: Figure of the plot. """ - lm_stats = lm if isinstance(lm, StatsBase) else init_stats(lm, x=x, y=y) + lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y) fitted_vals = lm_stats.fitted_values sqrt_abs_res = lm_stats.sqrt_abs_residuals @@ -199,7 +199,7 @@ def resid_lev( Returns: Figure of the plot. """ - lm_stats = lm if isinstance(lm, StatsBase) else init_stats(lm, x=x, y=y) + lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y) std_resid = lm_stats.standard_residuals cooks_d = lm_stats.cooks_d @@ -265,7 +265,7 @@ def plot( Returns: Figure of the plot. """ - lm_stats = init_stats(lm=lm, x=x, y=y) + lm_stats = get_stats(lm=lm, x=x, y=y) fig, axs = plt.subplots(2, 2, **style.subplots) diff --git a/lmdiag/statistics/__init__.py b/lmdiag/statistics/__init__.py index 44741fa..e69de29 100644 --- a/lmdiag/statistics/__init__.py +++ b/lmdiag/statistics/__init__.py @@ -1,72 +0,0 @@ -import warnings -from typing import Any, Optional - -import numpy as np -from statsmodels.genmod.generalized_linear_model import GLMResults -from statsmodels.regression.linear_model import RegressionResultsWrapper -from statsmodels.robust.robust_linear_model import RLMResults - -from lmdiag.statistics.base import StatsBase - -try: - import sklearn -except ImportError: - sklearn = None - -try: - import linearmodels -except ImportError: - linearmodels = None - - -def _warn_x_y() -> None: - warnings.warn( - "`x` and `y` arguments are ignored for this model type. Do not pass them.", - stacklevel=3, - ) - - -def _init_linearmodels_stats(lm: Any) -> StatsBase: - from lmdiag.statistics.linearmodels_stats import LinearmodelsStats - - return LinearmodelsStats(lm) - - -def _init_sklearn_stats(lm: Any, x: np.ndarray, y: np.ndarray) -> StatsBase: - from lmdiag.statistics.sklearn_stats import SklearnStats - - return SklearnStats(lm, x=x, y=y) - - -def _init_statsmodels_stats(lm: Any) -> StatsBase: - from lmdiag.statistics.statsmodels_stats import StatsmodelsStats - - return StatsmodelsStats(lm) - - -def init_stats( - lm: Any, x: Optional[np.ndarray] = None, y: Optional[np.ndarray] = None -) -> StatsBase: - """Gather statistics depending on linear model type.""" - if isinstance(lm, (RegressionResultsWrapper, GLMResults, RLMResults)): - if x or y: - _warn_x_y() - model_stats = _init_statsmodels_stats(lm) - - elif linearmodels and isinstance(lm, linearmodels.iv.results.OLSResults): - if x or y: - _warn_x_y() - model_stats = _init_linearmodels_stats(lm) - - elif sklearn and isinstance(lm, sklearn.linear_model.LinearRegression): - if x is None or y is None: - raise ValueError("x and y args must be provided this model type!") - model_stats = _init_sklearn_stats(lm, x, y) - - else: - raise TypeError( - "Model type not (yet) supported. Currently supported are linear " - "models from `statsmodels`, `linearmodels` and `sklearn` packages." - ) - - return model_stats diff --git a/lmdiag/statistics/select.py b/lmdiag/statistics/select.py new file mode 100644 index 0000000..9338cd9 --- /dev/null +++ b/lmdiag/statistics/select.py @@ -0,0 +1,74 @@ +import warnings +from typing import Any, Optional + +import numpy as np +from statsmodels.genmod.generalized_linear_model import GLMResults +from statsmodels.regression.linear_model import RegressionResultsWrapper +from statsmodels.robust.robust_linear_model import RLMResults + +from lmdiag.statistics.base import StatsBase + +try: + import sklearn +except ImportError: + sklearn = None + +try: + import linearmodels +except ImportError: + linearmodels = None + + +def _warn_x_y() -> None: + warnings.warn( + "`x` and `y` arguments are ignored for this model type. Do not pass them.", + stacklevel=3, + ) + + +def _init_linearmodels_stats(lm: Any) -> StatsBase: + from lmdiag.statistics.linearmodels_stats import LinearmodelsStats + + return LinearmodelsStats(lm) + + +def _init_sklearn_stats(lm: Any, x: np.ndarray, y: np.ndarray) -> StatsBase: + from lmdiag.statistics.sklearn_stats import SklearnStats + + return SklearnStats(lm, x=x, y=y) + + +def _init_statsmodels_stats(lm: Any) -> StatsBase: + from lmdiag.statistics.statsmodels_stats import StatsmodelsStats + + return StatsmodelsStats(lm) + + +def get_stats( + lm: Any, x: Optional[np.ndarray] = None, y: Optional[np.ndarray] = None +) -> StatsBase: + """Gather statistics depending on linear model type.""" + if isinstance(lm, (RegressionResultsWrapper, GLMResults, RLMResults)): + if x or y: + _warn_x_y() + model_stats = _init_statsmodels_stats(lm) + + elif linearmodels and isinstance( + lm, (linearmodels.iv.results.OLSResults, linearmodels.iv.results.IVResults) + ): + if x or y: + _warn_x_y() + model_stats = _init_linearmodels_stats(lm) + + elif sklearn and isinstance(lm, sklearn.linear_model.LinearRegression): + if x is None or y is None: + raise ValueError("x and y args must be provided this model type!") + model_stats = _init_sklearn_stats(lm, x, y) + + else: + raise TypeError( + "Model type not (yet) supported. Currently supported are linear " + "models from `statsmodels`, `linearmodels` and `sklearn` packages." + ) + + return model_stats diff --git a/lmdiag/style.py b/lmdiag/style.py index 147db46..1b37831 100644 --- a/lmdiag/style.py +++ b/lmdiag/style.py @@ -36,6 +36,17 @@ class MplKwargs(TypedDict, total=False): def use(style: str) -> None: + """Set predefined style for plots. + + Available styles: + - 'black_and_red' (mimics style of R's lm.diag) + + Args: + style: Name of the preset style. + + Raises: + ValueError: If style is unknown. + """ if style == "black_and_red": scatter.update( {"marker": "o", "color": "none", "edgecolors": "black", "linewidth": 1} diff --git a/pyproject.toml b/pyproject.toml index 13d8f78..71b21b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lmdiag" -version = "0.3.8" +version = "0.4.0" description = "Diagnostic Plots for Lineare Regression Models. Similar to plot.lm in R." keywords = [ "lm",