From eb0d23d322760c7bc6360f6ee644550e26c7da67 Mon Sep 17 00:00:00 2001 From: "mhsatman@gmail.com" Date: Fri, 24 May 2024 20:20:41 +0300 Subject: [PATCH] introduce applyColumns! for inplace replacement of calculations --- src/LinRegOutliers.jl | 3 +++ src/basis.jl | 10 ++++++++- src/mve.jl | 50 ++++++++++++++++++++++--------------------- 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/src/LinRegOutliers.jl b/src/LinRegOutliers.jl index f9cef49..974d982 100644 --- a/src/LinRegOutliers.jl +++ b/src/LinRegOutliers.jl @@ -29,14 +29,17 @@ import .Basis: createRegressionSetting, @extractRegressionSetting, applyColumns, + applyColumns!, find_minimum_nonzero, designMatrix, responseVector + export RegressionSetting export createRegressionSetting export designMatrix export responseVector export applyColumns +export applyColumns! export find_minimum_nonzero export @extractRegressionSetting diff --git a/src/basis.jl b/src/basis.jl index 4d5a37e..10924a1 100644 --- a/src/basis.jl +++ b/src/basis.jl @@ -3,7 +3,7 @@ module Basis export RegressionSetting export createRegressionSetting export @extractRegressionSetting -export applyColumns +export applyColumns, applyColumns! export find_minimum_nonzero @@ -297,6 +297,14 @@ function applyColumns(f::F, data::AbstractMatrix{Float64}) where {F <: Function} end +function applyColumns!(target::Vector, f::F, data::AbstractMatrix) where {F <: Function} + for i in 1:size(data, 2) + target[i] = f(data[:, i]) + end + return target +end + + """ diff --git a/src/mve.jl b/src/mve.jl index 313108c..8eb6cf6 100644 --- a/src/mve.jl +++ b/src/mve.jl @@ -8,28 +8,26 @@ import LinearAlgebra: diag, det import Distributions: median, cov, mean, quantile, sample, Chisq import ..Basis: - RegressionSetting, @extractRegressionSetting, designMatrix, responseVector, applyColumns + RegressionSetting, + @extractRegressionSetting, designMatrix, responseVector, applyColumns, applyColumns! + import ..Diagnostics: mahalanobisSquaredMatrix function enlargesubset(initialsubset, data::AbstractMatrix, h::Int) - n, p = size(data) + p = size(data, 2) basicsubset = copy(initialsubset) meanvector = Array{Float64}(undef, p) - covmatrix = Matrix{Float64}(undef, p, p) - md2mat = Matrix{Float64}(undef, n, n) - md2 = Array{Float64}(undef, n) - md2sortedindex = Array{Int}(undef, n) while length(basicsubset) < h - meanvector .= applyColumns(mean, data[basicsubset, :]) - covmatrix .= cov(data[basicsubset, :]) - md2mat .= + applyColumns!(meanvector, mean, data[basicsubset, :]) + covmatrix = cov(data[basicsubset, :]) + md2mat = mahalanobisSquaredMatrix(data, meanvector = meanvector, covmatrix = covmatrix) - md2 .= diag(md2mat) - md2sortedindex .= sortperm(md2) + md2 = diag(md2mat) + md2sortedindex = sortperm(md2) basicsubset = md2sortedindex[1:(length(basicsubset)+1)] end return basicsubset @@ -55,21 +53,22 @@ function robcov(data::Matrix; alpha = 0.01, estimator = :mve) hsubset = Array{Int}(undef, h) besthsubset = Array{Int}(undef, h) - covmatrix = Matrix{Float64}(undef, p, p) + meanvector = Array{Float64}(undef, p) - md2mat = Matrix{Float64}(undef, n, n) + fill!(meanvector, 0.0) - md2 = Array{Float64}(undef, n) for iter = 1:maxiter goal = Inf + + try - initialsubset .= sample(indices, k, replace = false) - hsubset .= enlargesubset(initialsubset, data, h) - covmatrix .= cov(data[hsubset, :]) + initialsubset = sample(indices, k, replace = false) + hsubset = enlargesubset(initialsubset, data, h) + covmatrix = cov(data[hsubset, :]) if estimator == :mve - meanvector .= applyColumns(mean, data[hsubset, :]) - md2mat .= mahalanobisSquaredMatrix( + applyColumns!(meanvector, mean, data[hsubset, :]) + md2mat = mahalanobisSquaredMatrix( data, meanvector = meanvector, covmatrix = covmatrix, @@ -82,17 +81,20 @@ function robcov(data::Matrix; alpha = 0.01, estimator = :mve) catch e # Possibly singularity end + + if goal < mingoal mingoal = goal - bestinitialsubset .= initialsubset - besthsubset .= hsubset + bestinitialsubset = initialsubset + besthsubset = hsubset end end - meanvector .= applyColumns(mean, data[besthsubset, :]) - covmatrix .= cov(data[besthsubset, :]) - md2 .= diag( + + applyColumns!(meanvector, mean, data[besthsubset, :]) + covmatrix = cov(data[besthsubset, :]) + md2 = diag( mahalanobisSquaredMatrix( data, meanvector = meanvector,