From 313a0070733efdc7e89bd6e48698c076dbea6e48 Mon Sep 17 00:00:00 2001 From: "mhsatman@gmail.com" Date: Wed, 23 Aug 2023 18:43:31 +0300 Subject: [PATCH] add deepest regression estimator (#13) --- CHANGELOG.md | 10 ++++- Project.toml | 4 +- README.md | 3 +- docs/src/algorithms.md | 4 ++ src/LinRegOutliers.jl | 71 +++++++++++++++++++---------------- src/deepestregression.jl | 66 ++++++++++++++++++++++++++++++++ src/precompile/precompile.jl | 1 + test/runtests.jl | 1 + test/testdeepestregression.jl | 39 +++++++++++++++++++ 9 files changed, 163 insertions(+), 36 deletions(-) create mode 100644 src/deepestregression.jl create mode 100644 test/testdeepestregression.jl diff --git a/CHANGELOG.md b/CHANGELOG.md index f3c2443..c8cf9f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,12 @@ -# v0.10.2 (Upcoming Release) +# v0.11.1 (Upcoming Release) + + +# v0.11.0 + +- Deepest Regression Estimator added. + + +# v0.10.2 - mahalanobisSquaredBetweenPairs() return Union{Nothing, Matrix} depending on the determinant of the covariance matrix - mahalanobisSquaredMatrix() returns Union{Nothing, Matrix} depending on the determinant of the covariance matrix diff --git a/Project.toml b/Project.toml index 0bc7eaf..353b3c3 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LinRegOutliers" uuid = "6d4de0fb-32d9-4c65-aac1-cc9ed8b94b1a" authors = ["Mehmet Hakan Satman ", "Shreesh Adiga <16567adigashreesh@gmail.com>", "Guillermo Angeris ", "Emre Akadal "] -version = "0.10.2" +version = "0.11.0" [deps] Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" @@ -14,6 +14,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Requires = "ae029012-a4dd-5104-9daa-d747884805df" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" +mrfDepth_jll = "53656f53-9700-50e7-bf9c-d3aea1338d1b" [compat] Clustering = "0.12.2, 0.13, 0.14, 0.15" @@ -26,6 +27,7 @@ PrecompileTools = "1" Requires = "1" StatsModels = "0.4, 0.5, 0.6, 0.7" julia = "1.4" +mrfDepth_jll = "1.0.14" [extras] Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" diff --git a/README.md b/README.md index bb48f52..b5742c8 100644 --- a/README.md +++ b/README.md @@ -38,11 +38,12 @@ A Julia package for outlier detection in linear regression. - Hadi (1994) Algorithm - Chatterjee & Mächler (1997) - Theil-Sen estimator for multiple regression +- Deepest Regression Estimator - Summary ## Unimplemented Methods -- Depth based estimators (Regression depth, deepest regression, etc.) See [#13](https://github.com/jbytecode/LinRegOutliers/issues/13) for the related issue. + - Pena & Yohai (1999). See [#25](https://github.com/jbytecode/LinRegOutliers/issues/25) for the related issue. diff --git a/docs/src/algorithms.md b/docs/src/algorithms.md index bdf57dd..c1b8612 100644 --- a/docs/src/algorithms.md +++ b/docs/src/algorithms.md @@ -135,6 +135,10 @@ LinRegOutliers.quantileregression LinRegOutliers.theilsen ``` +## Deepest Regression Estimator +```@docs +LinRegOutliers.deepestregression +``` diff --git a/src/LinRegOutliers.jl b/src/LinRegOutliers.jl index 4936fec..f9cef49 100644 --- a/src/LinRegOutliers.jl +++ b/src/LinRegOutliers.jl @@ -5,33 +5,33 @@ using Requires # After the module is loaded, we check if Plots is installed and loaded. # If Plots is installed and loaded, we load the corresponding modules. function __init__() - @require Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin - - import .Plots: RGBX + @require Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin - include("mveltsplot.jl") - include("dataimage.jl") - include("bchplot.jl") + import .Plots: RGBX - import .MVELTSPlot: mveltsplot - import .DataImage: dataimage - import .BCHPlot: bchplot - - export mveltsplot, dataimage, bchplot, RGBX + include("mveltsplot.jl") + include("dataimage.jl") + include("bchplot.jl") - end + import .MVELTSPlot: mveltsplot + import .DataImage: dataimage + import .BCHPlot: bchplot + + export mveltsplot, dataimage, bchplot, RGBX + + end end # Basis include("basis.jl") import .Basis: - RegressionSetting, - createRegressionSetting, - @extractRegressionSetting, - applyColumns, - find_minimum_nonzero, - designMatrix, - responseVector + RegressionSetting, + createRegressionSetting, + @extractRegressionSetting, + applyColumns, + find_minimum_nonzero, + designMatrix, + responseVector export RegressionSetting export createRegressionSetting export designMatrix @@ -65,20 +65,20 @@ import .OrdinaryLeastSquares: OLS, ols, wls, residuals, predict, coef # Regression diagnostics include("diagnostics.jl") import .Diagnostics: - dffit, - dffits, - dfbeta, - dfbetas, - hatmatrix, - studentizedResiduals, - adjustedResiduals, - jacknifedS, - cooks, - cooksoutliers, - mahalanobisSquaredMatrix, - covratio, - hadimeasure, - diagnose + dffit, + dffits, + dfbeta, + dfbetas, + hatmatrix, + studentizedResiduals, + adjustedResiduals, + jacknifedS, + cooks, + cooksoutliers, + mahalanobisSquaredMatrix, + covratio, + hadimeasure, + diagnose # Hadi & Simonoff (1993) algorithm @@ -205,6 +205,10 @@ import .CM97: cm97 include("theilsen.jl") import .TheilSen: theilsen +# Deepest Regression Estimator +include("deepestregression.jl") +import .DeepestRegression: deepestregression + # All-in-one include("summary.jl") import .Summary: detectOutliers @@ -267,6 +271,7 @@ export atkinson94, atkinsonstalactiteplot, generate_stalactite_plot export bacon export cm97 export theilsen +export deepestregression # Snoop-Precompile diff --git a/src/deepestregression.jl b/src/deepestregression.jl new file mode 100644 index 0000000..e94fc30 --- /dev/null +++ b/src/deepestregression.jl @@ -0,0 +1,66 @@ +module DeepestRegression + +export deepestregression + +import ..Basis: + RegressionSetting, @extractRegressionSetting, designMatrix, responseVector + +using mrfDepth_jll: mrfDepth_jll + + +""" + deepestregression(setting; maxit = 1000) + +Estimate Deepest Regression paramaters. + + +# Arguments +- `setting::RegressionSetting`: RegressionSetting object with a formula and dataset. +- `maxit`: Maximum number of iterations + +# Description +Estimates Deepest Regression Estimator coefficients. + +# References +Van Aelst S., Rousseeuw P.J., Hubert M., Struyf A. (2002). The +deepest regression method. Journal of Multivariate Analysis, +81, 138-166. + + +# Output +- `betas`: Vector of regression coefficients estimated. +""" +function deepestregression(setting::RegressionSetting; maxit::Int = 10000) + X = designMatrix(setting) + y = responseVector(setting) + if all(x -> isone(x), X[:, 1]) + X = X[:, 2:end] + end + return deepestregression(X, y, maxit = maxit) +end + +function deepestregression(X::Matrix{Float64}, y::Vector{Float64}; maxit::Int = 10000)::Vector{Float64} + drdata = hcat(X, y) + n, p = size(drdata) + n = Int32(n) + p = Int32(p) + betas = zeros(Float64, p) + maxit = Int32(maxit) + iter = Int32(1) + MDEPAPPR = Int32(p) + ccall((:sweepmedres_, mrfDepth_jll.libmrfDepth), + Cint, + (Ref{Float64}, # X + Ref{Int32}, # n + Ref{Int32}, # np + Ref{Float64}, # betas + Ref{Cint}, # maxit + Ref{Cint}, # iter + Ref{Cint}, # MDEPAPPR + ), drdata, n, p, betas, maxit, iter, MDEPAPPR) + + return vcat(betas[end], betas[1:(end-1)]) +end + + +end # end of module DeepestRegression diff --git a/src/precompile/precompile.jl b/src/precompile/precompile.jl index 59c2c45..12cd568 100644 --- a/src/precompile/precompile.jl +++ b/src/precompile/precompile.jl @@ -26,5 +26,6 @@ using PrecompileTools smr98(reg) ransac(reg, t = 0.8, w = 0.85) theilsen(reg, 2, nsamples = 10) + deepestregression(reg) end end diff --git a/test/runtests.jl b/test/runtests.jl index 2503dbb..7510073 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -36,3 +36,4 @@ include("testbacon2000.jl") include("testdataimage.jl") include("testtheilsen.jl") include("testsummary.jl") +include("testdeepestregression.jl") \ No newline at end of file diff --git a/test/testdeepestregression.jl b/test/testdeepestregression.jl new file mode 100644 index 0000000..1c60e80 --- /dev/null +++ b/test/testdeepestregression.jl @@ -0,0 +1,39 @@ +import LinRegOutliers: DataSets + +@testset "Deepest Regression" begin + + @testset "Simple Data" begin + eps = 0.1 + + n = 100000 + x1 = rand(n) + x2 = rand(n) + o = ones(Float64, n) + e = randn(n) + y = 15 .+ 10 .* x1 + 5 .* x2 + e + X = hcat(x1, x2) + + result = deepestregression(X, y) + + @test isapprox(result[1], 15, atol = eps) + @test isapprox(result[2], 10, atol = eps) + @test isapprox(result[3], 5, atol = eps) + end + + @testset "Stackloss Data Example" begin + + eps = 0.001 + + setting = createRegressionSetting( + @formula(stackloss ~ airflow + watertemp + acidcond), + DataSets.stackloss) + + result = deepestregression(setting) + + @test isapprox(result[1], -35.37610619, atol = eps) + @test isapprox(result[2], 0.82522124, atol = eps) + @test isapprox(result[3], 0.44247788, atol = eps) + @test isapprox(result[4], -0.07964602, atol = eps) + + end +end \ No newline at end of file