Skip to content

Commit

Permalink
add more test for robust hat matrix based regression estimator
Browse files Browse the repository at this point in the history
  • Loading branch information
jbytecode committed Nov 4, 2024
1 parent 4a2bcf8 commit e607229
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 25 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# v0.11.5 (Upcoming Release)

- Initial implementation of the robust hat matrix regression estimator
- Add more test to robust hat matrix regression estimator
- Introduce `view` in LTS and LMS.


# v0.11.4

- More explicit return types, drop `Dict` with `Dict{String, Any}` or `Dict{String, Vector}`
Expand Down
7 changes: 3 additions & 4 deletions src/robhatreg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,12 @@ function m(mat::AbstractMatrix{T}, u::AbstractVector{T})::AbstractMatrix where {
for i in 1:L
y[i, 1] = u[i]
end
result = m(mat, y)
return result
return m(mat, y)
end

function m(m1::AbstractMatrix{T}, m2::AbstractMatrix{T})::AbstractMatrix where {T<:Real}
n1, _ = size(m1)
_, p2 = size(m2)
n1 = size(m1, 1)
p2 = size(m2, 2)
newmat = zeros(Float64, n1, p2)
for i in 1:n1
for j in 1:p2
Expand Down
82 changes: 61 additions & 21 deletions test/testrobhatreg.jl
Original file line number Diff line number Diff line change
@@ -1,34 +1,74 @@

@testset "Robust Hat Matrix based Robust Regression" verbose = true begin

@testset "Robust Hat Matrix based Robust Regression" begin
# Create simple data
rng = MersenneTwister(12345)
n = 50
x = collect(1:n)
e = randn(rng, n) .* 2.0
y = 5 .+ 5 .* x .+ e
@testset "Random data" begin
# Create simple data
rng = MersenneTwister(12345)
n = 50
x = collect(1:n)
e = randn(rng, n) .* 2.0
y = 5 .+ 5 .* x .+ e

# Contaminate some values
y[n] = y[n] * 2.0
y[n-1] = y[n-1] * 2.0
y[n-2] = y[n-2] * 2.0
y[n-3] = y[n-3] * 2.0
y[n-4] = y[n-4] * 2.0
# Contaminate some values
y[n] = y[n] * 2.0
y[n-1] = y[n-1] * 2.0
y[n-2] = y[n-2] * 2.0
y[n-3] = y[n-3] * 2.0
y[n-4] = y[n-4] * 2.0

df = DataFrame(x=x, y=y)
df = DataFrame(x=x, y=y)

reg = createRegressionSetting(@formula(y ~ x), df)
result = robhatreg(reg)
reg = createRegressionSetting(@formula(y ~ x), df)
result = robhatreg(reg)

betas = result["betas"]
betas = result["betas"]

atol = 1.0
atol = 1.0

@test isapprox(betas[1], 5.0, atol=atol)
@test isapprox(betas[2], 5.0, atol=atol)
end
@test isapprox(betas[1], 5.0, atol=atol)
@test isapprox(betas[2], 5.0, atol=atol)
end

@testset "Phone data" begin
df = phones
reg = createRegressionSetting(@formula(calls ~ year), df)
result = robhatreg(reg)

betas = result["betas"]

atol = 0.001

@test isapprox(betas[1], -54.967349441923226, atol=atol)
@test isapprox(betas[2], 1.1406353489513064, atol=atol)
end

@testset "Large Data" begin
X = randn(10000, 10)
y = randn(10000)

result = robhatreg(X, y)

betas = result["betas"]

atol = 0.1

for i in 1:10
@test isapprox(betas[i], 0.0, atol=atol)
end
end

@testset "Single Y outlier" begin
@testset "LAD - Algorithm - Exact" begin
df2 = DataFrame(
x=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
y=[2, 4, 6, 8, 10, 12, 14, 16, 18, 1000],
)
reg2 = createRegressionSetting(@formula(y ~ x), df2)
result2 = lad(reg2)
betas2 = result2["betas"]
@test betas2[1] == 0.0
@test betas2[2] == 2.0
end
end
end

0 comments on commit e607229

Please sign in to comment.