Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VIF #26

Merged
merged 16 commits into from
Sep 6, 2023
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
name = "StatsAPI"
uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
authors = ["Milan Bouchet-Valat <[email protected]"]
version = "1.6.0"
version = "1.7.0"

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[compat]
julia = "1"
Expand Down
1 change: 1 addition & 0 deletions src/StatsAPI.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module StatsAPI

using LinearAlgebra
using Statistics

include("statisticalmodel.jl")
palday marked this conversation as resolved.
Show resolved Hide resolved
include("regressionmodel.jl")
Expand Down
42 changes: 42 additions & 0 deletions src/regressionmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,45 @@ function linearpredictor end
In-place version of [`linearpredictor`](@ref), storing the result in `storage`.
"""
function linearpredictor! end

"""
vif(m::RegressionModel)

Compute the variance inflation factor (VIF).

Returns a vector of inflation factors computed for each coefficient except
for the intercept.
In other words, the corresponding coefficients are `coefnames(m)[2:end]`.

The [VIF](https://en.wikipedia.org/wiki/Variance_inflation_factor) measures
the increase in the variance of a parameter's estimate in a model with multiple parameters relative to
the variance of a parameter's estimate in a model containing only that parameter.

See also [`coefnames`](@ref).

!!! warning
This method will fail if there is (numerically) perfect multicollinearity,
i.e. rank deficiency. In that case though, the VIF
is not particularly informative anyway.
"""
function vif(model::RegressionModel)
# copy in case vcov returns a view / reference to internal data structures
mm = Statistics.cov2cor!(copy(vcov(model)), stderror(model))
palday marked this conversation as resolved.
Show resolved Hide resolved
# TODO: replace with hasintercept() when implemented (xref #17)
palday marked this conversation as resolved.
Show resolved Hide resolved
modelmat = modelmatrix(model)
# TODO: replace with eachcol() when support for Julia 1.0 is dropped
i = findfirst(Base.Fix1(all, isone), [view(modelmat, :, col) for col in axes(modelmat, 2)])
palday marked this conversation as resolved.
Show resolved Hide resolved
i === nothing &&
throw(ArgumentError("VIF is only defined for models with an intercept term"))
# Translate the column index in the model matrix to the corresponding indices in the
# correlation matrix. This removes the constant offset assumption, but there is still
# an assumption of linear indexing with unit stride.
j1 = firstindex(mm, 1) - i + 1
j2 = firstindex(mm, 2) - i + 1
m = view(mm, axes(mm, 1) .!= j1, axes(mm, 2) .!= j2)
size(m, 2) > 1 ||
throw(ArgumentError("VIF not meaningful for models with only one non-intercept term"))
# NB: The correlation matrix is positive definite and hence invertible
# unless there is perfect rank deficiency, hence the warning.
return diag(inv(m))
end
29 changes: 27 additions & 2 deletions test/regressionmodel.jl
Original file line number Diff line number Diff line change
@@ -1,18 +1,43 @@
module TestRegressionModel

using Test, LinearAlgebra, StatsAPI
using StatsAPI: RegressionModel, crossmodelmatrix
using StatsAPI: RegressionModel, crossmodelmatrix, vif

struct MyRegressionModel <: RegressionModel
end

StatsAPI.modelmatrix(::MyRegressionModel) = [1 2; 3 4]
StatsAPI.vcov(::MyRegressionModel) = [1 0; 0 1]

struct MyRegressionModel2 <: RegressionModel
end

StatsAPI.modelmatrix(::MyRegressionModel2) = [1 2; 1 2]
StatsAPI.vcov(::MyRegressionModel2) = [1 0; 0 1]

struct MyRegressionModel3 <: RegressionModel
end

StatsAPI.modelmatrix(::MyRegressionModel3) = [1 2 3; 1 2 3]
StatsAPI.vcov(::MyRegressionModel3) = [1 0 0; 0 1 0; 0 0 1]


@testset "TestRegressionModel" begin
m = MyRegressionModel()

@test crossmodelmatrix(m) == [10 14; 14 20]
@test crossmodelmatrix(m) isa Symmetric

# no intercept term
@test_throws ArgumentError vif(m)

m2 = MyRegressionModel2()
# only one non intercept term
@test_throws ArgumentError vif(m2)

m3 = MyRegressionModel3()
# vcov is identity, so the VIF is just 1
@test vif(m3) ≈ [1, 1]
end

end # module TestRegressionModel
end # module TestRegressionModel
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should also be able to revert changes to this file and the next one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the missing terminal newlines? I think those should be required FWIW

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

3 changes: 2 additions & 1 deletion test/statisticalmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ StatsAPI.nobs(::MyStatisticalModel) = 100
@test adjr2 === adjr²
end

end # module TestStatisticalModel
end # module TestStatisticalModel
palday marked this conversation as resolved.
Show resolved Hide resolved