From f2d5bad8879d50f04e72da0ab9347a39067319cc Mon Sep 17 00:00:00 2001
From: Mehmet Hakan Satman <mhsatman@gmail.com>
Date: Tue, 24 Jan 2023 13:10:05 +0300
Subject: [PATCH] py95() returns betas

---
 CHANGELOG.md | 2 ++
 src/py95.jl  | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index af8cdeb..092c719 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@
 - Replace GLPK with HiGHS. When n > 10000, HiGHS based lad() is now approximately 6x faster.
 - asm2000(), imon2005(), ks89(), bacon(), and smr98() now return regression coefficients calculated using the clean set of observations.
 - lts() has a new optional parameter called earlystop and it is true by default. If the objective function does not change in a predefined number of iterations, the search is stopped.
+- py95() returns vector of estimated regression coefficients using the clean observations.
+
 
 # v0.9.2
 
diff --git a/src/py95.jl b/src/py95.jl
index e890720..84cecd6 100644
--- a/src/py95.jl
+++ b/src/py95.jl
@@ -174,7 +174,7 @@ the eigen structure of the influence matrix is examined for detecting suspected
 # Output 
 - `["outliers"]`: Array of indices of outliers
 - `["suspected.sets"]`: Arrays of indices of observations for corresponding eigen value of the influence matrix.
-
+- `["betas]`: Vector of estimated regression coefficients using the clean observations.
 
 # Examples
 ```julia-repl
@@ -214,9 +214,15 @@ function py95(X::Array{Float64,2}, y::Array{Float64,1})
             push!(outlierset, element)
         end
     end
+    
+    inlierset = setdiff(1:n, outlierset)
+    cleanols = ols(X[inlierset, :], y[inlierset])
+    cleanbetas = coef(cleanols)
+    
     result = Dict()
     result["suspected.sets"] = suspicious_sets
     result["outliers"] = sort(collect(outlierset))
+    result["betas"] = cleanbetas
     return result
 end