Skip to content

Commit

Permalink
v0.9.0
Browse files Browse the repository at this point in the history
  • Loading branch information
jbytecode committed Jan 8, 2023
1 parent 29e9e1b commit b180a29
Show file tree
Hide file tree
Showing 11 changed files with 961 additions and 213 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Upcoming Release
# Upcoming Release


# 0.9.0
- Add exact argument for LAD. If exact is true then the linear programming based exact solution is found. Otherwise, a GA based search is performed to yield approximate solutions.
- Remove dependency of Plots.jl. If Plots.jl is installed and loaded manually, the functionality that uses Plot is autmatically loaded by Requires.jl. Affected functions are `dataimage`, `mveltsplot`, and `bchplot`.


# v0.8.19
- Update Satman(2013) algorithm
Expand Down
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "LinRegOutliers"
uuid = "6d4de0fb-32d9-4c65-aac1-cc9ed8b94b1a"
authors = ["Mehmet Hakan Satman <[email protected]>", "Shreesh Adiga <[email protected]>", "Guillermo Angeris <[email protected]>", "Emre Akadal <[email protected]>"]
version = "0.8.19"
version = "0.9.0"

[deps]
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
Expand All @@ -11,7 +11,7 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"

[compat]
Expand All @@ -21,6 +21,6 @@ DataFrames = "0.22, 1"
Distributions = "0.17, 0.18, 0.19.1, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25"
GLPK = "0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 1"
JuMP = "0.19, 0.20, 0.21, 0.22, 0.23, 1"
Plots = "0.22.2, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 1"
Requires = "1"
StatsModels = "0.4, 0.5, 0.6"
julia = "1.4"
37 changes: 29 additions & 8 deletions src/LinRegOutliers.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
module LinRegOutliers

using Requires

# After the module is loaded, we check if Plots is installed and loaded.
# If Plots is installed and loaded, we load the corresponding modules.
function __init__()
@require Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin

import .Plots: RGB

include("mveltsplot.jl")
include("dataimage.jl")
include("bchplot.jl")

import .MVELTSPlot: mveltsplot
import .DataImage: dataimage
import .BCHPlot: bchplot

export mveltsplot, dataimage, bchplot, RGB

end
end

# Basis
include("basis.jl")
Expand Down Expand Up @@ -91,13 +112,14 @@ import .ASM2000: asm2000
include("mve.jl")
import .MVE: mve, mcd

# Moved into grahhics.jl
# MVE - LTS plot for visual detection of regression outliers
include("mveltsplot.jl")
import .MVELTSPlot: mveltsplot
#include("mveltsplot.jl")
#import .MVELTSPlot: mveltsplot

# Billor & Chatterjee & Hadi Algorithm for detecting outliers
include("bch.jl")
import .BCH: bch, bchplot
import .BCH: bch

# Pena & Yohai (1995) algorithm
include("py95.jl")
Expand Down Expand Up @@ -138,9 +160,10 @@ include("hadi1994.jl")
import .Hadi94: hadi1994


# Moved into graphics.jl
# Gray-scale images of distance matrices
include("dataimage.jl")
import .DataImage: dataimage
#include("dataimage.jl")
#import .DataImage: dataimage


# Modified and original Satman (2012) algorithms
Expand Down Expand Up @@ -218,8 +241,7 @@ export asm2000
export lms
export lts
export mve, mcd
export mveltsplot
export bch, bchplot
export bch
export py95, py95SuspectedObservations
export satman2013
export satman2015, dominates
Expand All @@ -228,7 +250,6 @@ export quantileregression
export lta
export hadi1992
export hadi1994
export dataimage
export gwcga, galts, ga, cga, RealChromosome
export detectOutliers
export ransac
Expand Down
58 changes: 1 addition & 57 deletions src/bch.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module BCH

export bch
export bchplot


import ..Basis:
RegressionSetting, @extractRegressionSetting, designMatrix, responseVector, applyColumns
Expand All @@ -13,8 +13,6 @@ import Distributions: Chisq
import LinearAlgebra: diag
import DataFrames: DataFrame

using Plots



"""
Expand Down Expand Up @@ -207,59 +205,5 @@ end



"""
bchplot(setting::RegressionSetting; alpha=0.05, maxiter=1000, epsilon=0.00001)
Perform the Billor & Chatterjee & Hadi (2006) algorithm and generates outlier plot
for the given regression setting.
# Arguments
- `setting::RegressionSetting`: RegressionSetting object with a formula and dataset.
- `alpha::Float64`: Optional argument of the probability of rejecting the null hypothesis.
- `maxiter::Int`: Maximum number of iterations for calculating iterative weighted least squares estimates.
- `epsilon::Float64`: Accuracy for determining convergency.
# References
Billor, Nedret, Samprit Chatterjee, and Ali S. Hadi. "A re-weighted least squares method
for robust regression estimation." American journal of mathematical and management sciences 26.3-4 (2006): 229-252.
"""
function bchplot(
setting::RegressionSetting;
alpha = 0.05,
maxiter = 1000,
epsilon = 0.00001,
)
X = designMatrix(setting)
y = responseVector(setting)
return bchplot(X, y, alpha = alpha, maxiter = maxiter, epsilon = epsilon)
end

function bchplot(
Xdesign::Array{Float64,2},
y::Array{Float64,1};
alpha = 0.05,
maxiter = 1000,
epsilon = 0.00001,
)
result = bch(Xdesign, y, alpha = alpha, maxiter = maxiter, epsilon = epsilon)
squared_normalized_residuals = result["squared.normalized.residuals"]
squared_normalized_robust_distances = result["squared.normalized.robust.distances"]
n = length(squared_normalized_robust_distances)
scplot = scatter(
squared_normalized_robust_distances,
squared_normalized_residuals,
legend = false,
series_annotations = text.(1:n, :bottom),
tickfont = font(10),
guidefont = font(10),
labelfont = font(10),
)
title!("Billor & Chatterjee & Hadi Plot")
xlabel!("Squared Normalized Robust Distances")
ylabel!("Squared Normalized Residuals")
end


end # end of module BCH
69 changes: 69 additions & 0 deletions src/bchplot.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
module BCHPlot


export bchplot


import ..BCH: bch
import ..Basis: RegressionSetting


"""
bchplot(setting::RegressionSetting; alpha=0.05, maxiter=1000, epsilon=0.00001)
Perform the Billor & Chatterjee & Hadi (2006) algorithm and generates outlier plot
for the given regression setting.
# Arguments
- `setting::RegressionSetting`: RegressionSetting object with a formula and dataset.
- `alpha::Float64`: Optional argument of the probability of rejecting the null hypothesis.
- `maxiter::Int`: Maximum number of iterations for calculating iterative weighted least squares estimates.
- `epsilon::Float64`: Accuracy for determining convergency.
# References
Billor, Nedret, Samprit Chatterjee, and Ali S. Hadi. "A re-weighted least squares method
for robust regression estimation." American journal of mathematical and management sciences 26.3-4 (2006): 229-252.
!!! warning "Dependencies"
This method is enabled when the Plots package is installed and loaded.
"""
function bchplot(
setting::RegressionSetting;
alpha = 0.05,
maxiter = 1000,
epsilon = 0.00001,
)
X = designMatrix(setting)
y = responseVector(setting)
return bchplot(X, y, alpha = alpha, maxiter = maxiter, epsilon = epsilon)
end

function bchplot(
Xdesign::Array{Float64,2},
y::Array{Float64,1};
alpha = 0.05,
maxiter = 1000,
epsilon = 0.00001,
)
result = bch(Xdesign, y, alpha = alpha, maxiter = maxiter, epsilon = epsilon)
squared_normalized_residuals = result["squared.normalized.residuals"]
squared_normalized_robust_distances = result["squared.normalized.robust.distances"]
n = length(squared_normalized_robust_distances)
scplot = scatter(
squared_normalized_robust_distances,
squared_normalized_residuals,
legend = false,
series_annotations = text.(1:n, :bottom),
tickfont = font(10),
guidefont = font(10),
labelfont = font(10),
)
title!("Billor & Chatterjee & Hadi Plot")
xlabel!("Squared Normalized Robust Distances")
ylabel!("Squared Normalized Residuals")
end

end # end of module
13 changes: 7 additions & 6 deletions src/dataimage.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
module DataImage


export dataimage

import ..Diagnostics:
mahalanobisSquaredMatrix, euclideanDistances, mahalanobisSquaredBetweenPairs


import Plots: RGB


import ..RGB

"""
Expand Down Expand Up @@ -41,11 +38,15 @@ julia> Plots.plot(di)
# References
Marchette, David J., and Jeffrey L. Solka. "Using data images for outlier detection."
Computational Statistics & Data Analysis 43.4 (2003): 541-552.
!!! warning "Dependencies"
This method is enabled when the Plots package is installed and loaded.
"""
function dataimage(
dataMatrix::Array{Float64,2};
distance = :mahalanobis,
)::Array{RGB{Float64},2}
)::Matrix{RGB{Float64}}
d = nothing
if distance == :mahalanobis
d = mahalanobisSquaredBetweenPairs(dataMatrix)
Expand Down
5 changes: 4 additions & 1 deletion src/mveltsplot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import ..MVE: mve

import Distributions: Chisq, quantile

using Plots

"""
mveltsplot(setting; alpha = 0.05, showplot = true)
Expand Down Expand Up @@ -41,6 +40,10 @@ good leverage points (observations far from the remaining of data in both x and
# References
Van Aelst, Stefan, and Peter Rousseeuw. "Minimum volume ellipsoid." Wiley
Interdisciplinary Reviews: Computational Statistics 1.1 (2009): 71-82.
!!! warning "Dependencies"
This method is enabled when the Plots package is installed and loaded.
"""
function mveltsplot(setting::RegressionSetting; alpha = 0.05, showplot = true)
ltsresult = lts(setting)
Expand Down
Loading

0 comments on commit b180a29

Please sign in to comment.