From e14a7e2b48e0742c25525fc04432d02e77574534 Mon Sep 17 00:00:00 2001
From: Fabi <schmidtfa91@gmail.com>
Date: Thu, 22 Aug 2024 14:10:50 +0200
Subject: [PATCH] adding sample size adjusted bic

---
 pyrasa/utils/fit_funcs.py   | 23 +++++++++++++++--------
 tests/test_compute_slope.py |  2 +-
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/pyrasa/utils/fit_funcs.py b/pyrasa/utils/fit_funcs.py
index 2343b8b..2166e30 100644
--- a/pyrasa/utils/fit_funcs.py
+++ b/pyrasa/utils/fit_funcs.py
@@ -75,14 +75,21 @@ def _get_gof(psd: np.ndarray, psd_pred: np.ndarray, k: int, fit_type: str) -> pd
     mse = np.mean(residuals**2)
     n = len(psd)
 
-    loglik = -n / 2 * (1 + np.log(mse) + np.log(2 * np.pi))
-    aic = 2 * (k - loglik)
-    bic = k * np.log(n) - 2 * loglik
-
-    # bic = n * np.log(mse) + k * np.log(n)
-    # aic = n * np.log(mse) + 2 * k
-
-    gof = pd.DataFrame({'mse': mse, 'r_squared': 1 - (ss_res / ss_tot), 'BIC': bic, 'AIC': aic}, index=[0])
+    # https://robjhyndman.com/hyndsight/lm_aic.html
+    c = n + n * np.log(2 * np.pi)
+    aic = 2 * k + n * np.log(mse) + c
+    # according to Sclove 1987 only difference between BIC and AIC
+    # is that BIC uses log(n) * k instead of 2
+    bic = np.log(n) * k + n * np.log(mse) + c
+    # Sclove 1987 also hints at sample size adjusted bic
+    an = np.log((n + 2) / 24)  # defined in Rissanen 1978 based on minimum-bit representation of a signal
+    # abic -> https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7299313/
+    abic = np.log(an) * k + an * np.log(mse)
+
+    r2 = 1 - (ss_res / ss_tot)
+    r2_adj = 1 - (((1 - r2) * (n - 1)) / (n - k - 1))
+
+    gof = pd.DataFrame({'mse': mse, 'R2': r2, 'R2_adj.': r2_adj, 'BIC': bic, 'ABIC': abic, 'AIC': aic}, index=[0])
     gof['fit_type'] = fit_type
     return gof
 
diff --git a/tests/test_compute_slope.py b/tests/test_compute_slope.py
index 69b802e..eca1332 100644
--- a/tests/test_compute_slope.py
+++ b/tests/test_compute_slope.py
@@ -24,7 +24,7 @@ def test_slope_fitting_fixed(fixed_aperiodic_signal, fs, exponent):
     aperiodic_fit_f = compute_aperiodic_model(psd, freqs, fit_func='fixed')
     assert pytest.approx(aperiodic_fit_f.aperiodic_params['Exponent'][0], abs=TOLERANCE) == np.abs(exponent)
     # test goodness of fit should be close to r_squared == 1 for linear model
-    assert aperiodic_fit_f.gof['r_squared'][0] > MIN_R2
+    assert aperiodic_fit_f.gof['R2'][0] > MIN_R2
 
     # test if we can set fit bounds w/o error
     # _, _ = compute_slope(psd, freqs, fit_func='fixed', fit_bounds=[2, 50])