From e607229406a05016490e9e9423ac2733b07dc106 Mon Sep 17 00:00:00 2001 From: "mhsatman@gmail.com" Date: Mon, 4 Nov 2024 22:22:20 +0300 Subject: [PATCH] add more test for robust hat matrix based regression estimator --- CHANGELOG.md | 2 ++ src/robhatreg.jl | 7 ++-- test/testrobhatreg.jl | 82 ++++++++++++++++++++++++++++++++----------- 3 files changed, 66 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc0b1a4..74d0cd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,10 @@ # v0.11.5 (Upcoming Release) - Initial implementation of the robust hat matrix regression estimator +- Add more test to robust hat matrix regression estimator - Introduce `view` in LTS and LMS. + # v0.11.4 - More explicit return types, drop `Dict` with `Dict{String, Any}` or `Dict{String, Vector}` diff --git a/src/robhatreg.jl b/src/robhatreg.jl index 5247f51..810e9b6 100644 --- a/src/robhatreg.jl +++ b/src/robhatreg.jl @@ -25,13 +25,12 @@ function m(mat::AbstractMatrix{T}, u::AbstractVector{T})::AbstractMatrix where { for i in 1:L y[i, 1] = u[i] end - result = m(mat, y) - return result + return m(mat, y) end function m(m1::AbstractMatrix{T}, m2::AbstractMatrix{T})::AbstractMatrix where {T<:Real} - n1, _ = size(m1) - _, p2 = size(m2) + n1 = size(m1, 1) + p2 = size(m2, 2) newmat = zeros(Float64, n1, p2) for i in 1:n1 for j in 1:p2 diff --git a/test/testrobhatreg.jl b/test/testrobhatreg.jl index 9d793c8..35e5814 100644 --- a/test/testrobhatreg.jl +++ b/test/testrobhatreg.jl @@ -1,34 +1,74 @@ +@testset "Robust Hat Matrix based Robust Regression" verbose = true begin -@testset "Robust Hat Matrix based Robust Regression" begin - # Create simple data - rng = MersenneTwister(12345) - n = 50 - x = collect(1:n) - e = randn(rng, n) .* 2.0 - y = 5 .+ 5 .* x .+ e + @testset "Random data" begin + # Create simple data + rng = MersenneTwister(12345) + n = 50 + x = collect(1:n) + e = randn(rng, n) .* 2.0 + y = 5 .+ 5 .* x .+ e - # Contaminate some values - y[n] = y[n] * 2.0 - y[n-1] = y[n-1] * 2.0 - y[n-2] = y[n-2] * 2.0 - y[n-3] = y[n-3] * 2.0 - y[n-4] = y[n-4] * 2.0 + # Contaminate some values + y[n] = y[n] * 2.0 + y[n-1] = y[n-1] * 2.0 + y[n-2] = y[n-2] * 2.0 + y[n-3] = y[n-3] * 2.0 + y[n-4] = y[n-4] * 2.0 - df = DataFrame(x=x, y=y) + df = DataFrame(x=x, y=y) - reg = createRegressionSetting(@formula(y ~ x), df) - result = robhatreg(reg) + reg = createRegressionSetting(@formula(y ~ x), df) + result = robhatreg(reg) - betas = result["betas"] + betas = result["betas"] - atol = 1.0 + atol = 1.0 - @test isapprox(betas[1], 5.0, atol=atol) - @test isapprox(betas[2], 5.0, atol=atol) -end + @test isapprox(betas[1], 5.0, atol=atol) + @test isapprox(betas[2], 5.0, atol=atol) + end + + @testset "Phone data" begin + df = phones + reg = createRegressionSetting(@formula(calls ~ year), df) + result = robhatreg(reg) + + betas = result["betas"] + + atol = 0.001 + @test isapprox(betas[1], -54.967349441923226, atol=atol) + @test isapprox(betas[2], 1.1406353489513064, atol=atol) + end + @testset "Large Data" begin + X = randn(10000, 10) + y = randn(10000) + result = robhatreg(X, y) + betas = result["betas"] + + atol = 0.1 + + for i in 1:10 + @test isapprox(betas[i], 0.0, atol=atol) + end + end + + @testset "Single Y outlier" begin + @testset "LAD - Algorithm - Exact" begin + df2 = DataFrame( + x=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + y=[2, 4, 6, 8, 10, 12, 14, 16, 18, 1000], + ) + reg2 = createRegressionSetting(@formula(y ~ x), df2) + result2 = lad(reg2) + betas2 = result2["betas"] + @test betas2[1] == 0.0 + @test betas2[2] == 2.0 + end + end +end