From de83c3298fbb2ba2f17b8e9fad9a694f6cf0607c Mon Sep 17 00:00:00 2001
From: Phillip Alday <me@phillipalday.com>
Date: Mon, 11 Sep 2023 02:45:53 -0500
Subject: [PATCH 1/5] use StatsModels.[g]vif

---
 Project.toml             | 4 ++--
 src/MixedModelsExtras.jl | 8 +++++---
 test/vif.jl              | 6 +++---
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/Project.toml b/Project.toml
index 777dee9..138fd9b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "MixedModelsExtras"
 uuid = "781a26e1-49f4-409a-8f4c-c3159d78c17e"
 authors = ["Phillip Alday <me@phillipalday.com> and contributors"]
-version = "1.1.0"
+version = "2.0.0"
 
 [deps]
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -14,7 +14,7 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 [compat]
 MixedModels = "3, 4"
 StatsBase = "0.33, 0.34"
-StatsModels = "0.6.28, 0.7"
+StatsModels = "0.7.3"
 Tables = "1"
 julia = "1.6"
 
diff --git a/src/MixedModelsExtras.jl b/src/MixedModelsExtras.jl
index a66dc21..680e97e 100644
--- a/src/MixedModelsExtras.jl
+++ b/src/MixedModelsExtras.jl
@@ -7,15 +7,17 @@ using StatsBase
 using StatsModels
 using Tables
 
+using StatsModels: termnames, vif, gvif
+export termnames, gvif, vif
+
+StatsModels.termnames(::RandomEffectsTerm) = String[] 
+
 include("icc.jl")
 export icc
 
 include("r2.jl")
 export r², r2, adjr², adjr2
 
-include("vif.jl")
-export termnames, gvif, vif
-
 include("remef.jl")
 export partial_fitted
 
diff --git a/test/vif.jl b/test/vif.jl
index 1df3f65..34fc03b 100644
--- a/test/vif.jl
+++ b/test/vif.jl
@@ -18,7 +18,7 @@ progress = false
     fm2 = fit(MixedModel, @formula(reaction ~ 1 + days * days^2 + (1 | subj)),
               dataset(:sleepstudy); progress)
 
-    ae_int = ArgumentError("VIF only defined for models with an intercept term")
+    ae_int = ArgumentError("VIF is only defined for models with an intercept term")
     ae_nterm = ArgumentError("VIF not meaningful for models with only one non-intercept term")
     @test_throws ae_int vif(fm0)
     @test_throws ae_nterm vif(fm1)
@@ -46,7 +46,7 @@ end
     duncan = rdataset("car", "Duncan")
 
     lm1 = lm(@formula(Prestige ~ 1 + Income + Education), duncan)
-    @test termnames(lm1) == coefnames(lm1)
+    @test termnames(lm1)[2] == coefnames(lm1)
     vif_lm1 = vif(lm1)
 
     # values here taken from car
@@ -54,7 +54,7 @@ end
     @test isapprox(vif_lm1, gvif(lm1))
 
     lm2 = lm(@formula(Prestige ~ 1 + Income + Education + Type), duncan)
-    @test termnames(lm2) == ["(Intercept)", "Income", "Education", "Type"]
+    @test termnames(lm2)[2] == ["(Intercept)", "Income", "Education", "Type"]
     @test isapprox(gvif(lm2), [2.209178, 5.297584, 5.098592]; atol=1e-5)
     @test isapprox(gvif(lm2; scale=true),
                    [1.486330, 2.301648, 1.502666]; atol=1e-5)

From ca347284e4ff51ef2e7e812ca457274d515c03ee Mon Sep 17 00:00:00 2001
From: Phillip Alday <me@phillipalday.com>
Date: Wed, 13 Sep 2023 04:30:26 -0500
Subject: [PATCH 2/5] compat

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 138fd9b..d075330 100644
--- a/Project.toml
+++ b/Project.toml
@@ -12,7 +12,7 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 
 [compat]
-MixedModels = "3, 4"
+MixedModels = "4"
 StatsBase = "0.33, 0.34"
 StatsModels = "0.7.3"
 Tables = "1"

From eff372f7e012956f10e15b032dd8749468f07217 Mon Sep 17 00:00:00 2001
From: Phillip Alday <me@phillipalday.com>
Date: Wed, 13 Sep 2023 04:33:14 -0500
Subject: [PATCH 3/5] julia compat floor

---
 .github/workflows/ci.yml | 2 +-
 Project.toml             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d7125fa..fd0c6bc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,7 +17,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        version: [1.6, 1]
+        version: [1.8, 1]
         arch: [x64]
         os: [ubuntu-20.04] # macos-10.15, windows-2019
     steps:
diff --git a/Project.toml b/Project.toml
index d075330..3db052b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -16,7 +16,7 @@ MixedModels = "4"
 StatsBase = "0.33, 0.34"
 StatsModels = "0.7.3"
 Tables = "1"
-julia = "1.6"
+julia = "1.8"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

From 2fb1518ebab7a33a1a4cc6cfe1a8049fa7e62cb0 Mon Sep 17 00:00:00 2001
From: Phillip Alday <palday@users.noreply.github.com>
Date: Wed, 13 Sep 2023 09:36:48 +0000
Subject: [PATCH 4/5] Update src/MixedModelsExtras.jl

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 src/MixedModelsExtras.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/MixedModelsExtras.jl b/src/MixedModelsExtras.jl
index 680e97e..476e874 100644
--- a/src/MixedModelsExtras.jl
+++ b/src/MixedModelsExtras.jl
@@ -10,7 +10,7 @@ using Tables
 using StatsModels: termnames, vif, gvif
 export termnames, gvif, vif
 
-StatsModels.termnames(::RandomEffectsTerm) = String[] 
+StatsModels.termnames(::RandomEffectsTerm) = String[]
 
 include("icc.jl")
 export icc

From d037e207a812b8a0b0d32a15961945c8452eac8e Mon Sep 17 00:00:00 2001
From: Phillip Alday <me@phillipalday.com>
Date: Wed, 13 Sep 2023 05:23:02 -0500
Subject: [PATCH 5/5] remove unused file

---
 src/vif.jl | 143 -----------------------------------------------------
 1 file changed, 143 deletions(-)
 delete mode 100644 src/vif.jl

diff --git a/src/vif.jl b/src/vif.jl
deleted file mode 100644
index 87643e8..0000000
--- a/src/vif.jl
+++ /dev/null
@@ -1,143 +0,0 @@
-_rename_intercept(s) = strip(s) == "1" ? "(Intercept)" : s
-
-"""
-    termnames(model)
-
-Return the names associated with (fixed effects) terms in a model.
-
-For models with only continuous predictors, this is the same as
-`coefnames(model)`.
-For models with categorical predictors, the returned names reflect
-the categorical predictor and not the coefficients resulting from
-the choice of contrast coding.
-"""
-termnames(model) = _rename_intercept.(string.(_terms(model)))
-
-_terms(model) = collect(formula(model).rhs.terms)
-_terms(model::MixedModel) = collect(formula(model).rhs[1].terms)
-
-"""
-    vif(m::RegressionModel)
-
-Compute the variance inflation factor (VIF).
-
-Returns a vector of inflation factors computed for each coefficient except
-for the intercept.
-In other words, the corresponding coefficients are `coefnames(m)[2:end]`.
-
-The [variance inflation factor (VIF)](https://en.wikipedia.org/wiki/Variance_inflation_factor) measures
-the increase in the variance of a parameter's estimate in a model with multiple parameters relative to
-the variance of a parameter's estimate in a model containing only that parameter.
-
-See also [`coefnames`](@ref), [`gvif`](@ref).
-
-!!! warning
-    This method will fail if there is (numerically) perfect multicollinearity,
-    i.e. rank deficiency (in the fixed effects). In that case though, the VIF
-    isn't particularly informative anyway.
-"""
-function vif(m::RegressionModel)
-    mm = Statistics.cov2cor!(vcov(m), stderror(m))
-    all(==(1), view(modelmatrix(m), :, 1)) ||
-        throw(ArgumentError("VIF only defined for models with an intercept term"))
-    mm = @view mm[2:end, 2:end]
-    size(mm, 2) > 1 ||
-        throw(ArgumentError("VIF not meaningful for models with only one non-intercept term"))
-    # NB: The correlation matrix is positive definite and hence invertible
-    #     unless there is perfect rank deficiency, hence the warning.
-    # NB: The determinate technique for GVIF could also be applied
-    #     columnwise (instead of Term-wise) here, but it wouldn't really
-    #     be any more efficient because this is a Symmetric matrix and computing
-    #     all those determinants has its cost. The determinants also hint at
-    #     how you could show equivalency, if you remember that inversion is solving
-    #     a linear system and that Cramer's rule -- which uses determinants --
-    #     can also a linear system
-    # so we want diag(inv(mm)) but directly computing inverses is bad.
-    # that said, these are typically small-ish matrices and this is Simple.
-    return diag(inv(mm))
-end
-
-"""
-    gvif(m::RegressionModel; scale=false)
-
-Compute the generalized variance inflation factor (GVIF).
-
-If `scale=true`, then each GVIF is scaled by the degrees of freedom
-for (number of coefficients associated with) the predictor: ``GVIF^(1 / (2*df))``
-
-Returns a vector of inflation factors computed for each term except
-for the intercept.
-In other words, the corresponding coefficients are `termnames(m)[2:end]`.
-
-The [generalized variance inflation factor (VIF)](https://doi.org/10.2307/2290467)
-measures the increase in the variance of a (group of) parameter's estimate in a model
-with multiple parameters relative to the variance of a parameter's estimate in a
-model containing only that parameter. For continuous, numerical predictors, the GVIF
-is the same as the VIF, but for categorical predictors, the GVIF provides a single
-number for the entire group of contrast-coded coefficients associated with a categorical
-predictor.
-
-See also [`termnames`](@ref), [`vif`](@ref).
-
-!!! warning
-    This method will fail if there is (numerically) perfect multicollinearity,
-    i.e. rank deficiency (in the fixed effects). In that case though, the VIF
-    isn't particularly informative anyway.
-
-## References
-
-Fox, J., & Monette, G. (1992). Generalized Collinearity Diagnostics.
-Journal of the American Statistical Association, 87(417), 178. doi:10.2307/2290467
-"""
-function gvif(m::RegressionModel; scale=false)
-    mm = StatsBase.cov2cor!(vcov(m), stderror(m))
-
-    StatsModels.hasintercept(formula(m)) ||
-        throw(ArgumentError("GVIF only defined for models with an intercept term"))
-    mm = @view mm[2:end, 2:end]
-    size(mm, 2) > 1 ||
-        throw(ArgumentError("GVIF not meaningful for models with only one non-intercept term"))
-
-    tn = @view termnames(m)[2:end]
-    trms = @view _terms(m)[2:end]
-
-    df = width.(trms)
-    vals = zeros(eltype(mm), length(tn))
-    # julia> @benchmark gvif($fm2) # logdet(mm)
-    # BenchmarkTools.Trial: 10000 samples with 1 evaluation.
-    #  Range (min … max):  28.776 μs …  12.991 ms  ┊ GC (min … max): 0.00% … 99.34%
-    #  Time  (median):     32.320 μs               ┊ GC (median):    0.00%
-    #  Time  (mean ± σ):   33.939 μs ± 129.624 μs  ┊ GC (mean ± σ):  3.80% ±  0.99%
-
-    #      ▂▆▇▇▃▂▄▇█▆▅▄▂▁
-    #   ▂▃▆██████████████▇▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂ ▄
-    #   28.8 μs         Histogram: frequency by time         48.3 μs <
-    #  Memory estimate: 16.19 KiB, allocs estimate: 303.
-    # julia> @benchmark gvif($fm2) #  logdet(cholesky(Symmetric(mm)))
-    # BenchmarkTools.Trial: 10000 samples with 1 evaluation.
-    #  Range (min … max):  30.091 μs …  13.029 ms  ┊ GC (min … max): 0.00% … 99.31%
-    #  Time  (median):     33.279 μs               ┊ GC (median):    0.00%
-    #  Time  (mean ± σ):   35.016 μs ± 129.993 μs  ┊ GC (mean ± σ):  3.70% ±  0.99%
-
-    #      ▆█▆▄▃▆▇▆▆▄▄▁
-    #   ▂▃▇█████████████▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▂▂▁▂▂▂▂▂▂▂▁▂▂▂▂▂ ▄
-    #   30.1 μs         Histogram: frequency by time         50.1 μs <
-
-    #  Memory estimate: 16.22 KiB, allocs estimate: 304.
-    logdetmm = logdet(mm)
-    acc = 0
-    for idx in axes(vals, 1)
-        wt = df[idx]
-        trm_only = [acc < i <= (acc + wt) for i in axes(mm, 2)]
-        trm_excl = .!trm_only
-        vals[idx] = exp(logdet(view(mm, trm_only, trm_only)) +
-                        logdet(view(mm, trm_excl, trm_excl)) -
-                        logdetmm)
-        acc += wt
-    end
-
-    if scale
-        vals .= vals .^ (1 ./ (2 .* df))
-    end
-    return vals
-end