replace Array{Float64, x} with Vector and Matrix

jbytecode · Jul 23, 2023 · 7d17172 · 7d17172
1 parent 6ccc31b
commit 7d17172
Show file tree

Hide file tree

Showing 17 changed files with 71 additions and 72 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,11 @@
 # v0.10.2 (Upcoming Release)
 
 - mahalanobisSquaredBetweenPairs() return Union{Nothing, Matrix} depending on the determinant of the covariance matrix
-- mahalanobisSquaredMatrix return Union{Nothing, Matrix} depending on the determinant of the covariance matrix
+- mahalanobisSquaredMatrix() returns Union{Nothing, Matrix} depending on the determinant of the covariance matrix
+- import in DataImages fixed.
+- Array{Float64, 1} is replaced by Vector{Float64}.
+- Array{Float64, 2} is replaced by Matrix{Float64}.
+- Use of try/catch reduced, many tries were depending on singularities.
 
 
 # v0.10.1 

diff --git a/src/basis.jl b/src/basis.jl
@@ -305,7 +305,7 @@ end
     Return minimum of numbers greater than zero.
 
 # Arguments
-- `arr::Array{Float64, 1}`: A function that takes a one dimensional array as argument.
+- `arr::Vector{Float64}`: A function that takes a one dimensional array as argument.
 
 # Example
 ```julia-repl

diff --git a/src/bch.jl b/src/bch.jl
@@ -163,7 +163,7 @@ function bch(
     # Algorithm 3 - Fitting
     squared_normalized_robust_distances = (newd .^ 2.0) / sum(newd .^ 2.0)
     md = median(newd)
-    newdmd = Array{Float64, 1}(undef, n)
+    newdmd = Vector{Float64}(undef, n)
     for i in 1:n
         newdmd[i] = newd[i] / maximum([newd[i], md])
     end 

diff --git a/src/ccf.jl b/src/ccf.jl
@@ -79,8 +79,8 @@ end
 Perform signed gradient descent for clipped convex functions for a given regression setting.
 
 # Arguments
-- `X::Array{Float64, 2}`: Design matrix of the linear model.
-- `y::Array{Float64, 1}`: Response vector of the linear model.
+- `X::Matrix{Float64}`: Design matrix of the linear model.
+- `y::Vector{Float64}`: Response vector of the linear model.
 - `starting_lambdas::Array{Float64,1}`: Starting values of weighting parameters used by signed gradient descent.
 - `alpha::Float64`: Loss at which a point is labeled as an outlier. If unspecified, will be chosen as p*mean(residuals.^2), where residuals are OLS residuals.
 - `p::Float64`: Points that have squared OLS residual greater than p times the mean squared OLS residual are considered outliers.

diff --git a/src/data.jl b/src/data.jl
@@ -980,9 +980,9 @@ const woodgravity = DataFrame(
 Scottish Hill Races Data
 
 # Components
-- `dist::Array{Float64, 1}`: Distance in miles (Independent). 
-- `climb::Array{Float64, 1}`: Heights in feet (Independent).
-- `time::Array{Float64, 1}`: Record times in hours (Dependent).
+- `dist::Vector{Float64}`: Distance in miles (Independent). 
+- `climb::Vector{Float64}`: Heights in feet (Independent).
+- `time::Vector{Float64}`: Record times in hours (Dependent).
 
 # Model 
 time ~ dist + climb
@@ -1111,9 +1111,9 @@ const hills = DataFrame(
     Soft Drink Delivery Data
 
 # Components 
-- `cases::Array{Float64, 1}`: Independent variable. 
-- `distance::Array{Float64, 1}`: Independent variable. 
-- `time::Array{Float64, 1}`: Dependent variable. 
+- `cases::Vector{Float64}`: Independent variable. 
+- `distance::Vector{Float64}`: Independent variable. 
+- `time::Vector{Float64}`: Dependent variable. 
 
 # Model 
 time ~ distance + cases 

diff --git a/src/dataimage.jl b/src/dataimage.jl
@@ -3,7 +3,7 @@ module DataImage
 export dataimage
 
 import ..Diagnostics:
-    mahalanobisSquaredMatrix, euclideanDistances, SquaredBetweenPairs
+    mahalanobisSquaredMatrix, euclideanDistances, mahalanobisSquaredBetweenPairs
 
 import ..RGBX
 
@@ -14,7 +14,7 @@ import ..RGBX
 Generate the Marchette & Solka (2003) data image for a given data matrix. 
 
 # Arguments
-- `dataMatrix::Array{Float64, 1}`: Data matrix with dimensions n x p, where n is the number of observations and p is the number of variables.
+- `dataMatrix::Vector{Float64}`: Data matrix with dimensions n x p, where n is the number of observations and p is the number of variables.
 - `distance::Symbol`: Optional argument for the distance function.
 
 # Notes

diff --git a/src/diagnostics.jl b/src/diagnostics.jl
@@ -33,7 +33,7 @@ import DataFrames: DataFrame
 Calculate Euclidean distances between pairs. 
 
 # Arguments
-- `dataMatrix::Array{Float64, 1}`: Data matrix with dimensions n x p, where n is the number of observations and p is the number of variables.
+- `dataMatrix::Vector{Float64}`: Data matrix with dimensions n x p, where n is the number of observations and p is the number of variables.
 
 # Notes
 	This is the helper function for the dataimage() function defined in Marchette & Solka (2003).
@@ -42,7 +42,7 @@ Calculate Euclidean distances between pairs.
 Marchette, David J., and Jeffrey L. Solka. "Using data images for outlier detection." 
 Computational Statistics & Data Analysis 43.4 (2003): 541-552.
 """
-function euclideanDistances(dataMatrix::Array{Float64, 2})::Array{Float64, 2}
+function euclideanDistances(dataMatrix::Matrix{Float64})::Matrix{Float64}
 	n, _ = size(dataMatrix)
 	d = zeros(Float64, n, n)
 	for i ∈ 1:n
@@ -90,7 +90,7 @@ end
 Calculate Mahalanobis distances between pairs. 
 
 # Arguments
-- `dataMatrix::Array{Float64, 1}`: Data matrix with dimensions n x p, where n is the number of observations and p is the number of variables.
+- `dataMatrix::Vector{Float64}`: Data matrix with dimensions n x p, where n is the number of observations and p is the number of variables.
 
 # Notes
 	Differently from Mahalabonis distances, this function calculates Mahalanobis distances between 
@@ -101,7 +101,7 @@ Calculate Mahalanobis distances between pairs.
 Marchette, David J., and Jeffrey L. Solka. "Using data images for outlier detection." 
 Computational Statistics & Data Analysis 43.4 (2003): 541-552.
 """
-function mahalanobisBetweenPairs(dataMatrix::Array{Float64, 2})::Union{Nothing, Matrix}
+function mahalanobisBetweenPairs(dataMatrix::Matrix{Float64})::Union{Nothing, Matrix}
 
     n, _ = size(dataMatrix)
 
@@ -136,7 +136,7 @@ end
 	Return vector of medians of each column in a matrix.
 
 # Arguments
-- `datamat::Array{Float64, 2}`: A matrix.
+- `datamat::Matrix{Float64}`: A matrix.
 
 # Example
 ```julia-repl
@@ -152,7 +152,7 @@ julia> coordinatwisemedians(mat)
  4.0
 ```
 """
-function coordinatwisemedians(datamat::Array{Float64, 2})::Array{Float64, 1}
+function coordinatwisemedians(datamat::Matrix{Float64})::Vector{Float64}
 	_, p = size(datamat)
 	meds = map(i -> median(datamat[:, i]), 1:p)
 	return meds
@@ -199,7 +199,7 @@ function dffit(setting::RegressionSetting, i::Int)::Float64
 	return dffit(X, y, i)
 end
 
-function dffit(X::Array{Float64, 2}, y::Array{Float64, 1}, i::Int)::Float64
+function dffit(X::Matrix{Float64}, y::Vector{Float64}, i::Int)::Float64
 	n, _ = size(X)
 	indices = [j for j ∈ 1:n if i != j]
 	olsfull = ols(X, y)
@@ -255,13 +255,13 @@ julia> dffits(reg)
 Belsley, David A., Edwin Kuh, and Roy E. Welsch. Regression diagnostics: 
 Identifying influential data and sources of collinearity. Vol. 571. John Wiley & Sons, 2005.
 """
-function dffits(setting::RegressionSetting)::Array{Float64, 1}
+function dffits(setting::RegressionSetting)::Vector{Float64}
 	n, _ = size(setting.data)
 	result = [dffit(setting, i) for i ∈ 1:n]
 	return result
 end
 
-function dffits(X::Array{Float64, 2}, y::Array{Float64, 1})::Array{Float64, 1}
+function dffits(X::Matrix{Float64}, y::Vector{Float64})::Vector{Float64}
 	n, _ = size(X)
 	result = [dffit(X, y, i) for i ∈ 1:n]
 	return result
@@ -283,12 +283,12 @@ julia> size(hatmatrix(reg))
 
 (24, 24)
 """
-function hatmatrix(setting::RegressionSetting)::Array{Float64, 2}
+function hatmatrix(setting::RegressionSetting)::Matrix{Float64}
 	X = designMatrix(setting)
 	return hatmatrix(X)
 end
 
-function hatmatrix(X::Array{Float64, 2})::Array{Float64, 2}
+function hatmatrix(X::Matrix{Float64})::Matrix{Float64}
 	return X * inv(X'X) * X'
 end
 
@@ -332,12 +332,12 @@ julia> studentizedResiduals(reg)
  -1.529459974327181
 ```
 """
-function studentizedResiduals(setting::RegressionSetting)::Array{Float64, 1}
+function studentizedResiduals(setting::RegressionSetting)::Vector{Float64}
 	X, y = @extractRegressionSetting setting
 	return studentizedResiduals(X, y)
 end
 
-function studentizedResiduals(X::Array{Float64, 2}, y::Array{Float64, 1})::Array{Float64, 1}
+function studentizedResiduals(X::Matrix{Float64}, y::Vector{Float64})::Vector{Float64}
 	olsreg = ols(X, y)
 	n, p = size(X)
 	e = residuals(olsreg)
@@ -388,13 +388,13 @@ julia> adjustedResiduals(reg)
  -85.9914301855088
 ```
 """
-function adjustedResiduals(setting::RegressionSetting)::Array{Float64, 1}
+function adjustedResiduals(setting::RegressionSetting)::Vector{Float64}
 	X, y = @extractRegressionSetting setting
 	return adjustedResiduals(X, y)
 end
 
 
-function adjustedResiduals(X::Array{Float64, 2}, y::Array{Float64, 1})::Array{Float64, 1}
+function adjustedResiduals(X::Matrix{Float64}, y::Vector{Float64})::Vector{Float64}
 	olsreg = ols(X, y)
 	n, _ = size(X)
 	e = residuals(olsreg)
@@ -429,7 +429,7 @@ function jacknifedS(setting::RegressionSetting, k::Int)::Float64
 	return jacknifedS(X, y, k)
 end
 
-function jacknifedS(X::Array{Float64, 2}, y::Array{Float64, 1}, k::Int)::Float64
+function jacknifedS(X::Matrix{Float64}, y::Vector{Float64}, k::Int)::Float64
 	n, p = size(X)
 	indices = [i for i ∈ 1:n if i != k]
 	Xsub = X[indices, :]
@@ -486,12 +486,12 @@ julia> cooks(reg)
 Cook, R. Dennis. "Detection of influential observation in linear regression." 
 Technometrics 19.1 (1977): 15-18.
 """
-function cooks(setting::RegressionSetting)::Array{Float64, 1}
+function cooks(setting::RegressionSetting)::Vector{Float64}
 	X, y = @extractRegressionSetting setting
 	return cooks(X, y)
 end
 
-function cooks(X::Array{Float64, 2}, y::Array{Float64, 1})::Array{Float64, 1}
+function cooks(X::Matrix{Float64}, y::Vector{Float64})::Vector{Float64}
 	n, p = size(X)
 	olsreg = ols(X, y)
 	res = residuals(olsreg)
@@ -526,7 +526,7 @@ function cooksoutliers(setting::RegressionSetting; alpha::Float64 = 0.5)::Dict
 	return cooksoutliers(X, y, alpha = alpha)
 end
 
-function cooksoutliers(X::Array{Float64, 2}, y::Array{Float64, 1}; alpha::Float64 = 0.5)::Dict
+function cooksoutliers(X::Matrix{Float64}, y::Vector{Float64}; alpha::Float64 = 0.5)::Dict
 	n, p = size(X)
 	d = cooks(X, y)
 	cutoff = cookscritical(n, p)
@@ -542,8 +542,8 @@ Calculate Mahalanobis distances.
 
 # Arguments
 - `data::DataFrame`: A DataFrame object of the multivariate data.
-- `meanvector::Array{Float64, 1}`: Optional mean vector of variables.
-- `covmatrix::Array{Float64, 2}`: Optional covariance matrix of data.
+- `meanvector::Vector{Float64}`: Optional mean vector of variables.
+- `covmatrix::Matrix{Float64}`: Optional covariance matrix of data.
 
 # References
 Mahalanobis, Prasanta Chandra. "On the generalized distance in statistics." 
@@ -553,7 +553,7 @@ function mahalanobisSquaredMatrix(
 	data::DataFrame;
 	meanvector = nothing,
 	covmatrix = nothing,
-)::Union{Nothing, Array{Float64, 2}}
+)::Union{Nothing, Matrix{Float64}}
 	datamat = Matrix(data)
 	return mahalanobisSquaredMatrix(datamat, meanvector = meanvector, covmatrix = covmatrix)
 end
@@ -563,7 +563,7 @@ function mahalanobisSquaredMatrix(
 	datamat::Matrix;
 	meanvector = nothing,
 	covmatrix = nothing,
-)::Union{Nothing, Array{Float64, 2}}
+)::Union{Nothing, Matrix{Float64}}
 	if isnothing(meanvector)
 		meanvector = applyColumns(mean, datamat)
 	end
@@ -598,7 +598,7 @@ function dfbetas(setting)
 	return mapreduce(permutedims, vcat, results)
 end
 
-function dfbetas(X::Array{Float64, 2}, y::Array{Float64, 1})
+function dfbetas(X::Matrix{Float64}, y::Vector{Float64})
 	results = map(i -> dfbeta(X, y, i), 1:length(y))
 	return mapreduce(permutedims, vcat, results)
 end
@@ -622,16 +622,16 @@ julia> dfbeta(setting, 1)
  -0.14686166007904422
 ```
 """
-function dfbeta(setting::RegressionSetting, omittedIndex::Int)::Array{Float64, 1}
+function dfbeta(setting::RegressionSetting, omittedIndex::Int)::Vector{Float64}
 	X, y = @extractRegressionSetting setting
 	return dfbeta(X, y, omittedIndex)
 end
 
 function dfbeta(
-	X::Array{Float64, 2},
-	y::Array{Float64, 1},
+	X::Matrix{Float64},
+	y::Vector{Float64},
 	omittedIndex::Int,
-)::Array{Float64, 1}
+)::Vector{Float64}
 	n = length(y)
 	omittedindices = filter(x -> x != omittedIndex, 1:n)
 	regfull = ols(X, y)
@@ -663,7 +663,7 @@ function covratio(setting::RegressionSetting, omittedIndex::Int)
 	return covratio(X, y, omittedIndex)
 end
 
-function covratio(X::Array{Float64, 2}, y::Array{Float64, 1}, omittedIndex::Int)
+function covratio(X::Matrix{Float64}, y::Vector{Float64}, omittedIndex::Int)
 	n, p = size(X)
 	reg = ols(X, y)
 	r = residuals(reg)
@@ -709,7 +709,7 @@ function hadimeasure(setting::RegressionSetting; c::Float64 = 2.0)
 	hadimeasure(X, y, c = c)
 end
 
-function hadimeasure(X::Array{Float64, 2}, y::Array{Float64, 1}; c::Float64 = 2.0)
+function hadimeasure(X::Matrix{Float64}, y::Vector{Float64}; c::Float64 = 2.0)
 	n, p = size(X)
 	reg = ols(X, y)
 	res = residuals(reg)
@@ -769,7 +769,7 @@ function diagnose(setting::RegressionSetting; alpha = 0.5)
 end
 
 
-function diagnose(X::Array{Float64, 2}, y::Array{Float64, 1}; alpha = 0.5)
+function diagnose(X::Matrix{Float64}, y::Vector{Float64}; alpha = 0.5)
 	n, p = size(X)
 	resultdffits = dffits(X, y)
 	resultdfbetas = dfbetas(X, y)

diff --git a/src/hadi1992.jl b/src/hadi1992.jl
@@ -22,7 +22,7 @@ import Distributions: Chisq
 Perform the sub-algorithm of handling singularity defined in Hadi (1992).
 
 # Arguments 
-- `S::Array{Float64, 2}`: A covariance matrix.
+- `S::Matrix{Float64}`: A covariance matrix.
 
 # Reference
 Hadi, Ali S. "Identifying multiple outliers in multivariate data." 
@@ -48,7 +48,7 @@ end
 Perform Hadi (1992) algorithm for a given multivariate data. 
 
 # Arguments
-- `multivariateData::Array{Float64, 2}`: Multivariate data.
+- `multivariateData::Matrix{Float64}`: Multivariate data.
 
 # Description
 Algorithm starts with an initial subset and enlarges the subset to 

diff --git a/src/hadi1994.jl b/src/hadi1994.jl
@@ -20,7 +20,7 @@ import Distributions: Chisq
 Perform Hadi (1994) algorithm for a given multivariate data.
 
 # Arguments
-- `multivariateData::Array{Float64, 2}`: Multivariate data.
+- `multivariateData::Matrix{Float64}`: Multivariate data.
 
 # Description
 Algorithm starts with an initial subset and enlarges the subset to 

diff --git a/src/lad.jl b/src/lad.jl
@@ -68,8 +68,8 @@ end
 Perform Least Absolute Deviations regression for a given regression setting.
 
 # Arguments
-- `X::Array{Float64, 2}`: Design matrix of the linear model.
-- `y::Array{Float64, 1}`: Response vector of the linear model.
+- `X::Matrix{Float64}`: Design matrix of the linear model.
+- `y::Vector{Float64}`: Response vector of the linear model.
 - `exact::Bool`: If true, use exact LAD regression. If false, estimate LAD regression parameters using GA. Default is true.
 """
 function lad(X::Array{Float64,2}, y::Array{Float64,1}; exact::Bool = true)

diff --git a/src/lta.jl b/src/lta.jl
@@ -66,8 +66,8 @@ Perform the Hawkins & Olive (1999) algorithm (Least Trimmed Absolute Deviations)
 for the given regression setting.
 
 # Arguments
-- `X::Array{Float64, 2}`: Design matrix of linear regression model.
-- `y::Array{Float64, 1}`: Response vector of linear regression model.
+- `X::Matrix{Float64}`: Design matrix of linear regression model.
+- `y::Vector{Float64}`: Response vector of linear regression model.
 - `exact::Bool`: Consider all possible subsets of p or not where p is the number of regression parameters.
 - `earlystop::Bool`: Early stop if the best objective does not change in number of remaining iters / 5 iterations.
 

diff --git a/src/lts.jl b/src/lts.jl
@@ -47,7 +47,6 @@ function iterateCSteps(
     maxiter::Int = 10000
     eps::Float64 = 0.1
     while iter < maxiter
-        #try
         olsreg = ols(X[subsetindices, :], y[subsetindices])
         betas = coef(olsreg)
         res = y - X * betas
@@ -59,10 +58,6 @@ function iterateCSteps(
         end
         oldobjective = objective
         iter += 1
-        #catch er
-        #    @warn er
-        #    return (objective, subsetindices)
-        #end
     end
     #if iter >= maxiter
     #    @warn "in c-step stage of LTS, a h-subset is not converged for starting indices " starterset