From e14a7e2b48e0742c25525fc04432d02e77574534 Mon Sep 17 00:00:00 2001 From: Fabi Date: Thu, 22 Aug 2024 14:10:50 +0200 Subject: [PATCH] adding sample size adjusted bic --- pyrasa/utils/fit_funcs.py | 23 +++++++++++++++-------- tests/test_compute_slope.py | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pyrasa/utils/fit_funcs.py b/pyrasa/utils/fit_funcs.py index 2343b8b..2166e30 100644 --- a/pyrasa/utils/fit_funcs.py +++ b/pyrasa/utils/fit_funcs.py @@ -75,14 +75,21 @@ def _get_gof(psd: np.ndarray, psd_pred: np.ndarray, k: int, fit_type: str) -> pd mse = np.mean(residuals**2) n = len(psd) - loglik = -n / 2 * (1 + np.log(mse) + np.log(2 * np.pi)) - aic = 2 * (k - loglik) - bic = k * np.log(n) - 2 * loglik - - # bic = n * np.log(mse) + k * np.log(n) - # aic = n * np.log(mse) + 2 * k - - gof = pd.DataFrame({'mse': mse, 'r_squared': 1 - (ss_res / ss_tot), 'BIC': bic, 'AIC': aic}, index=[0]) + # https://robjhyndman.com/hyndsight/lm_aic.html + c = n + n * np.log(2 * np.pi) + aic = 2 * k + n * np.log(mse) + c + # according to Sclove 1987 only difference between BIC and AIC + # is that BIC uses log(n) * k instead of 2 + bic = np.log(n) * k + n * np.log(mse) + c + # Sclove 1987 also hints at sample size adjusted bic + an = np.log((n + 2) / 24) # defined in Rissanen 1978 based on minimum-bit representation of a signal + # abic -> https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7299313/ + abic = np.log(an) * k + an * np.log(mse) + + r2 = 1 - (ss_res / ss_tot) + r2_adj = 1 - (((1 - r2) * (n - 1)) / (n - k - 1)) + + gof = pd.DataFrame({'mse': mse, 'R2': r2, 'R2_adj.': r2_adj, 'BIC': bic, 'ABIC': abic, 'AIC': aic}, index=[0]) gof['fit_type'] = fit_type return gof diff --git a/tests/test_compute_slope.py b/tests/test_compute_slope.py index 69b802e..eca1332 100644 --- a/tests/test_compute_slope.py +++ b/tests/test_compute_slope.py @@ -24,7 +24,7 @@ def test_slope_fitting_fixed(fixed_aperiodic_signal, fs, exponent): aperiodic_fit_f = compute_aperiodic_model(psd, freqs, fit_func='fixed') assert pytest.approx(aperiodic_fit_f.aperiodic_params['Exponent'][0], abs=TOLERANCE) == np.abs(exponent) # test goodness of fit should be close to r_squared == 1 for linear model - assert aperiodic_fit_f.gof['r_squared'][0] > MIN_R2 + assert aperiodic_fit_f.gof['R2'][0] > MIN_R2 # test if we can set fit bounds w/o error # _, _ = compute_slope(psd, freqs, fit_func='fixed', fit_bounds=[2, 50])