From f2d5bad8879d50f04e72da0ab9347a39067319cc Mon Sep 17 00:00:00 2001 From: Mehmet Hakan Satman Date: Tue, 24 Jan 2023 13:10:05 +0300 Subject: [PATCH] py95() returns betas --- CHANGELOG.md | 2 ++ src/py95.jl | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index af8cdeb..092c719 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ - Replace GLPK with HiGHS. When n > 10000, HiGHS based lad() is now approximately 6x faster. - asm2000(), imon2005(), ks89(), bacon(), and smr98() now return regression coefficients calculated using the clean set of observations. - lts() has a new optional parameter called earlystop and it is true by default. If the objective function does not change in a predefined number of iterations, the search is stopped. +- py95() returns vector of estimated regression coefficients using the clean observations. + # v0.9.2 diff --git a/src/py95.jl b/src/py95.jl index e890720..84cecd6 100644 --- a/src/py95.jl +++ b/src/py95.jl @@ -174,7 +174,7 @@ the eigen structure of the influence matrix is examined for detecting suspected # Output - `["outliers"]`: Array of indices of outliers - `["suspected.sets"]`: Arrays of indices of observations for corresponding eigen value of the influence matrix. - +- `["betas]`: Vector of estimated regression coefficients using the clean observations. # Examples ```julia-repl @@ -214,9 +214,15 @@ function py95(X::Array{Float64,2}, y::Array{Float64,1}) push!(outlierset, element) end end + + inlierset = setdiff(1:n, outlierset) + cleanols = ols(X[inlierset, :], y[inlierset]) + cleanbetas = coef(cleanols) + result = Dict() result["suspected.sets"] = suspicious_sets result["outliers"] = sort(collect(outlierset)) + result["betas"] = cleanbetas return result end